earthengine-api/python/ee/classifier.py
Kurt Schwehr ff7a5fbdbc ee pyupgrade --py39-plus
PiperOrigin-RevId: 780619373
2025-07-08 10:27:19 -07:00

603 lines
23 KiB
Python

"""A wrapper for Classifiers."""
from __future__ import annotations
from ee import _arg_types
from ee import apifunction
from ee import computedobject
from ee import confusionmatrix
from ee import dictionary
from ee import ee_list
from ee import ee_string
class Classifier(computedobject.ComputedObject):
"""An object to represent an Earth Engine Classifier.
Example:
# https://developers.google.com/earth-engine/guides/classification
"""
_initialized: bool = False
def __init__(self, classifier: computedobject.ComputedObject):
"""Creates a Classifier wrapper.
Args:
classifier: A Classifier to cast.
"""
self.initialize()
if isinstance(classifier, computedobject.ComputedObject):
# There is no server-side constructor for ee.Classifier. Pass the object
# as-is to the server in case it is intended to be a Classifier cast.
super().__init__(classifier.func, classifier.args, classifier.varName)
return
raise TypeError(
'Classifier can only be used as a cast to Classifier. Found'
f' {type(classifier)}.'
)
@classmethod
def initialize(cls) -> None:
"""Imports API functions to this class."""
if not cls._initialized:
apifunction.ApiFunction.importApi(cls, cls.name(), cls.name())
cls._initialized = True
@classmethod
def reset(cls) -> None:
"""Removes imported API functions from this class."""
apifunction.ApiFunction.clearApi(cls)
cls._initialized = False
@staticmethod
def name() -> str:
return 'Classifier'
def amnhMaxent(
# pylint: disable=invalid-name
categoricalNames: _arg_types.List | None = None,
outputFormat: _arg_types.String | None = None,
autoFeature: _arg_types.Bool | None = None,
# pylint: enable=invalid-name
linear: _arg_types.Bool | None = None,
quadratic: _arg_types.Bool | None = None,
product: _arg_types.Bool | None = None,
threshold: _arg_types.Bool | None = None,
hinge: _arg_types.Bool | None = None,
# pylint: disable=invalid-name
hingeThreshold: _arg_types.Integer | None = None,
l2lqThreshold: _arg_types.Integer | None = None,
lq2lqptThreshold: _arg_types.Integer | None = None,
addSamplesToBackground: _arg_types.Bool | None = None,
addAllSamplesToBackground: _arg_types.Bool | None = None,
betaMultiplier: _arg_types.Number | None = None,
betaHinge: _arg_types.Number | None = None,
betaLqp: _arg_types.Number | None = None,
betaCategorical: _arg_types.Number | None = None,
betaThreshold: _arg_types.Number | None = None,
# pylint: enable=invalid-name
extrapolate: _arg_types.Bool | None = None,
# pylint: disable=invalid-name
doClamp: _arg_types.Bool | None = None,
writeClampGrid: _arg_types.Bool | None = None,
randomTestPoints: _arg_types.Integer | None = None,
# pylint: enable=invalid-name
seed: _arg_types.Integer | None = None,
) -> Classifier:
"""Returns a Maximum Entropy classifier.
Maxent is used to model species distribution probabilities using
environmental data for locations of known presence and for a large number of
'background' locations. For more information and to cite, see:
https://biodiversityinformatics.amnh.org/open_source/maxent/ and the
reference publication: Phillips, et al., 2004. A maximum entropy approach
to species distribution modeling, Proceedings of the Twenty-First
International Conference on Machine Learning. The output is a single band
named 'probability', containing the modeled probability, and an additional
band named 'clamp' when the 'writeClampGrid' argument is true.
Args:
categoricalNames: A list of the names of the categorical inputs. Any
inputs not listed in this argument are considered to be continuous.
outputFormat: Representation of probabilities in output.
autoFeature: Automatically select which feature classes to use, based on
number of training samples.
linear: Allow linear features to be used. Ignored when autofeature is
true.
quadratic: Allow quadratic features to be used. Ignored when autofeature
is true.
product: Allow product features to be used. Ignored when autofeature is
true.
threshold: Allow threshold features to be used. Ignored when autofeature
is true.
hinge: Allow hinge features to be used. Ignored when autofeature is true.
hingeThreshold: Number of samples at which hinge features start being
used. Ignored when autofeature is false.
l2lqThreshold: Number of samples at which quadratic features start being
used. Ignored when autofeature is false.
lq2lqptThreshold: Number of samples at which product and threshold
features start being used. Ignored when autofeature is false.
addSamplesToBackground: Add to the background any sample for which has a
combination of environmental values that isn't already present in the
background.
addAllSamplesToBackground: Add all samples to the background, even if they
have combinations of environmental values that are already present in
the background.
betaMultiplier: Regularization multiplier. Multiply all automatic
regularization parameters by this number. A higher number gives a more
spread-out distribution.
betaHinge: Regularization parameter to be applied to all hinge features;
negative value enables automatic setting.
betaLqp: Regularization parameter to be applied to all linear, quadratic
and product features; negative value enables automatic setting.
betaCategorical: Regularization parameter to be applied to all categorical
features; negative value enables automatic setting.
betaThreshold: Regularization parameter to be applied to all threshold
features; negative value enables automatic setting.
extrapolate: Extrapolate. Predict to regions of environmental space
outside the limits encountered during training.
doClamp: Apply clamping to output.
writeClampGrid: Adds a band to the output ('clamp') showing the spatial
distribution of clamping. At each point, the value is the absolute
difference between prediction values with and without clamping.
randomTestPoints: Random test percentage. The percentage of training
points to hold aside as test points, used to compute AUX, omission, etc.
seed: A seed used when generating random numbers.
"""
return apifunction.ApiFunction.call_(
'Classifier.amnhMaxent',
categoricalNames,
outputFormat,
autoFeature,
linear,
quadratic,
product,
threshold,
hinge,
hingeThreshold,
l2lqThreshold,
lq2lqptThreshold,
addSamplesToBackground,
addAllSamplesToBackground,
betaMultiplier,
betaHinge,
betaLqp,
betaCategorical,
betaThreshold,
extrapolate,
doClamp,
writeClampGrid,
randomTestPoints,
seed,
)
def confusionMatrix(self) -> confusionmatrix.ConfusionMatrix:
"""Returns a 2D confusion matrix.
Computes a 2D confusion matrix for a classifier based on its training data
(e.g., resubstitution error). Axis 0 of the matrix corresponds to the input
classes and axis 1 corresponds to the output classes. The rows and columns
start at class 0 and increase sequentially up to the maximum class value, so
some rows or columns might be empty if the input classes aren't 0-based or
sequential.
"""
return apifunction.ApiFunction.call_('Classifier.confusionMatrix', self)
@staticmethod
def decisionTree(
treeString: _arg_types.String, # pylint: disable=invalid-name
) -> Classifier:
"""Returns a classifier that applies the given decision tree.
Args:
treeString: The decision tree, specified in the text format generated by R
and other similar tools.
"""
return apifunction.ApiFunction.call_('Classifier.decisionTree', treeString)
@staticmethod
def decisionTreeEnsemble(
treeStrings: _arg_types.List, # pylint: disable=invalid-name
) -> Classifier:
"""Creates a classifier that applies the given decision trees.
Args:
treeStrings: The decision trees, specified in the text format generated by
R and other similar tools. Each item in the list should contain one or
more trees in text format.
Returns:
An ee.Classifier.
"""
return apifunction.ApiFunction.call_(
'Classifier.decisionTreeEnsemble', treeStrings
)
def explain(self) -> dictionary.Dictionary:
"""Returns a dictionary describing the results of a trained classifier."""
return apifunction.ApiFunction.call_(self.name() + '.explain', self)
@staticmethod
def libsvm(
# pylint: disable=invalid-name
decisionProcedure: _arg_types.String | None = None,
svmType: _arg_types.String | None = None,
kernelType: _arg_types.String | None = None,
# pylint: enable=invalid-name
shrinking: _arg_types.Bool | None = None,
degree: _arg_types.Integer | None = None,
gamma: _arg_types.Number | None = None,
coef0: _arg_types.Number | None = None,
cost: _arg_types.Number | None = None,
nu: _arg_types.Number | None = None,
# pylint: disable=invalid-name
terminationEpsilon: _arg_types.Number | None = None,
lossEpsilon: _arg_types.Number | None = None,
oneClass: _arg_types.Integer | None = None,
# pylint: enable=invalid-name
) -> Classifier:
"""Returns an empty Support Vector Machine classifier.
Args:
decisionProcedure: The decision procedure to use for classification.
Either 'Voting' or 'Margin'. Not used for regression.
svmType: The SVM type. One of `C_SVC`, `NU_SVC`, `ONE_CLASS`,
`EPSILON_SVR`, or `NU_SVR`.
kernelType: The kernel type. One of LINEAR, POLY, RBF, or SIGMOID.
shrinking: Whether to use shrinking heuristics.
degree: The degree of polynomial. Valid for POLY kernels.
gamma: The gamma value in the kernel function. Defaults to the reciprocal
of the number of features. Valid for POLY, RBF, and SIGMOID kernels.
coef0: The coef₀ value in the kernel function. Defaults to 0. Valid for
POLY and SIGMOID kernels.
cost: The cost (C) parameter. Defaults to 1. Only valid for C-SVC,
epsilon-SVR, and nu-SVR.
nu: The nu parameter. Defaults to 0.5. Only valid for nu-SVC, one-class
SVM, and nu-SVR.
terminationEpsilon: The termination criterion tolerance (e). Defaults to
0.001. Only valid for epsilon-SVR.
lossEpsilon: The epsilon in the loss function (p). Defaults to 0.1. Only
valid for epsilon-SVR.
oneClass: The class of the training data on which to train in a one-class
SVM. Defaults to 0. Only valid for one-class SVM. Possible values are 0
and 1. The classifier output is binary (0/1) and will match this class
value for the data determined to be in the class.
"""
return apifunction.ApiFunction.call_(
'Classifier.libsvm',
decisionProcedure,
svmType,
kernelType,
shrinking,
degree,
gamma,
coef0,
cost,
nu,
terminationEpsilon,
lossEpsilon,
oneClass,
)
@staticmethod
# pylint: disable-next=redefined-builtin
def load(id: _arg_types.String) -> Classifier:
"""Returns a classifier from an asset.
Args:
id: The Classifier's Asset ID.
Returns:
An ee.Classifier.
"""
return apifunction.ApiFunction.call_('Classifier.load', id)
@staticmethod
def minimumDistance(
metric: _arg_types.String | None = None,
# pylint: disable-next=invalid-name
kNearest: _arg_types.Integer | None = None,
) -> Classifier:
# pyformat: disable
"""Returns a minimum distance classifier for the given distance metric.
Creates a minimum distance classifier for the given distance metric. In
CLASSIFICATION mode, the nearest class is returned. In REGRESSION mode, the
distance to the nearest class center is returned. In RAW mode, the distance
to every class center is returned.
Args:
metric: The distance metric to use. Options are:
* 'euclidean' - Euclidean distance from the unnormalized class mean.
* 'cosine' - spectral angle from the unnormalized class mean.
* 'mahalanobis' - Mahalanobis distance from the class mean.
* 'manhattan' - Manhattan distance from the unnormalized class mean.
kNearest: If greater than 1, the result will contain an array of the k
nearest neighbors or distances, based on the output mode setting. If
kNearest is greater than the total number of classes, it will be set
equal to the number of classes.
"""
# pyformat: enable
return apifunction.ApiFunction.call_(
'Classifier.minimumDistance', metric, kNearest
)
def mode(self) -> ee_string.String:
"""Returns the classifier mode string.
The string will be one of `CLASSIFICATION`, `REGRESSION`, `PROBABILITY`,
`MULTIPROBABILITY`, `RAW`, or `RAW_REGRESSION`.
"""
return apifunction.ApiFunction.call_(self.name() + '.mode', self)
def schema(self) -> ee_list.List:
"""Returns a list of the schema of the classifier.
Returns the names of the inputs used by this classifier or null if this
classifier has not had any training data added yet.
"""
return apifunction.ApiFunction.call_(self.name() + '.schema', self)
def setOutputMode(self, mode: _arg_types.String) -> Classifier:
# pyformat: disable
"""Returns a classifier with the given output mode.
Refer to https://developers.google.com/earth-engine/guides/classification
for a list of supported modes for each classifier.
Args:
mode: The output mode. One of:
* CLASSIFICATION (default): The output is the class number.
* REGRESSION: The output is the result of standard regression.
* PROBABILITY: The output is the probability that the classification is
correct.
* MULTIPROBABILITY: The output is an array of probabilities that each
class is correct ordered by classes seen.
* RAW: The output is an array of the internal representation of the
classification process. For example, the raw votes in multi-decision
tree models.
* RAW_REGRESSION: The output is an array of the internal representation
of the regression process. For example, the raw predictions of
multiple regression trees.
"""
# pyformat: enable
return apifunction.ApiFunction.call_(
self.name() + '.setOutputMode', self, mode
)
@staticmethod
def smileCart(
# pylint: disable=invalid-name
maxNodes: _arg_types.Integer | None = None,
minLeafPopulation: _arg_types.Integer | None = None,
# pylint: enable=invalid-name
) -> Classifier:
"""Returns an empty CART classifier.
See: Classification and Regression Trees, L. Breiman, J. Friedman, R.
Olshen, C. Stone Chapman and Hall, 1984.
Args:
maxNodes: The maximum number of leaf nodes in each tree. If unspecified,
defaults to no limit.
minLeafPopulation: Only create nodes whose training set contains at least
this many points.
"""
return apifunction.ApiFunction.call_(
'Classifier.smileCart', maxNodes, minLeafPopulation
)
@staticmethod
def smileGradientTreeBoost(
numberOfTrees: _arg_types.Integer, # pylint: disable=invalid-name
shrinkage: _arg_types.Number | None = None,
# pylint: disable=invalid-name
samplingRate: _arg_types.Number | None = None,
maxNodes: _arg_types.Integer | None = None,
# pylint: enable=invalid-name
loss: _arg_types.String | None = None,
seed: _arg_types.Integer | None = None,
) -> Classifier:
"""Returns an empty Gradient Tree Boost classifier.
Args:
numberOfTrees: The number of decision trees to create.
shrinkage: The shrinkage parameter in (0, 1] controls the learning rate of
procedure.
samplingRate: The sampling rate for stochastic tree boosting.
maxNodes: The maximum number of leaf nodes in each tree. If unspecified,
defaults to no limit.
loss: Loss function for regression. One of: LeastSquares,
LeastAbsoluteDeviation, Huber.
seed: The randomization seed.
"""
return apifunction.ApiFunction.call_(
'Classifier.smileGradientTreeBoost',
numberOfTrees,
shrinkage,
samplingRate,
maxNodes,
loss,
seed,
)
@staticmethod
def smileKNN(
k: _arg_types.Integer | None = None,
# pylint: disable-next=invalid-name
searchMethod: _arg_types.String | None = None,
metric: _arg_types.String | None = None,
) -> Classifier:
# pyformat: disable
"""Returns an empty k-NN classifier.
The k-nearest neighbor algorithm (k-NN) is a method for classifying objects
by a majority vote of its neighbors, with the object being assigned to the
class most common amongst its k nearest neighbors (k is a positive integer,
typically small, typically odd).
Args:
k: The number of neighbors for classification.
searchMethod: Search method. The following are valid: [AUTO,
LINEAR_SEARCH, KD_TREE, COVER_TREE]. AUTO will choose between KD_TREE
and COVER_TREE depending on the dimension count. Results may vary
between the different search methods for distance ties and probability
values. Since performance and results may vary consult with SMILE's
documentation and other literature.
metric: The distance metric to use. NOTE: KD_TREE (and AUTO for low
dimensions) will not use the metric selected. Options are:
* 'EUCLIDEAN' - Euclidean distance.
* 'MAHALANOBIS' - Mahalanobis distance.
* 'MANHATTAN' - Manhattan distance.
* 'BRAYCURTIS' - Bray-Curtis distance.
"""
# pyformat: enable
return apifunction.ApiFunction.call_(
'Classifier.smileKNN', k, searchMethod, metric
)
@staticmethod
def smileNaiveBayes(
lambda_: _arg_types.Number | None = None, **kwargs
) -> Classifier:
# pylint: disable=g-doc-args
"""Returns an empty Naive Bayes classifier.
This classifier assumes that the feature vector consists of positive
integers, and negative inputs are discarded.
Args:
lambda: A smoothing lambda. Used to avoid assigning zero probability to
classes not seen during training, instead using lambda / (lambda *
nFeatures).
"""
# pylint: enable=g-doc-args
if kwargs:
if kwargs.keys() != {'lambda'}:
raise ValueError(
f'Unexpected arguments: {list(kwargs.keys())}. Expected: lambda.'
)
lambda_ = kwargs['lambda']
return apifunction.ApiFunction.call_('Classifier.smileNaiveBayes', lambda_)
@staticmethod
def smileRandomForest(
# pylint: disable=invalid-name
numberOfTrees: _arg_types.Integer,
variablesPerSplit: _arg_types.Integer | None = None,
minLeafPopulation: _arg_types.Integer | None = None,
bagFraction: _arg_types.Number | None = None,
maxNodes: _arg_types.Integer | None = None,
# pylint: enable=invalid-name
seed: _arg_types.Integer | None = None,
) -> Classifier:
"""Returns an empty Random Forest classifier.
Args:
numberOfTrees: The number of decision trees to create.
variablesPerSplit: The number of variables per split. If unspecified, uses
the square root of the number of variables.
minLeafPopulation: Only create nodes whose training set contains at least
this many points.
bagFraction: The fraction of input to bag per tree.
maxNodes: The maximum number of leaf nodes in each tree. If unspecified,
defaults to no limit.
seed: The randomization seed.
"""
return apifunction.ApiFunction.call_(
'Classifier.smileRandomForest',
numberOfTrees,
variablesPerSplit,
minLeafPopulation,
bagFraction,
maxNodes,
seed,
)
@staticmethod
def spectralRegion(
coordinates: _arg_types.List, schema: _arg_types.List | None = None
) -> Classifier:
"""Returns a spectral region classifier.
Creates a classifier that tests if its inputs lie within a polygon defined
by a set of coordinates in an arbitrary 2D coordinate system. Each input to
be classified must have 2 values (e.g., images must have 2 bands). The
result will be 1 wherever the input values are contained within the given
polygon and 0 otherwise.
Args:
coordinates: The coordinates of the polygon, as a list of rings. Each ring
is a list of coordinate pairs (e.g., [u1, v1, u2, v2, ..., uN, vN]). No
edge may intersect any other edge. The resulting classification will be
a 1 wherever the input values are within the interior of the given
polygon, that is, an odd number of polygon edges must be crossed to get
outside the polygon and 0 otherwise.
schema: The classifier's schema. A list of band or property names that the
classifier will operate on. Since this classifier doesn't undergo a
training step, these have to be specified manually. Defaults to ['u',
'v'].
"""
return apifunction.ApiFunction.call_(
'Classifier.spectralRegion', coordinates, schema
)
def train(
# classifier: _ClassifierType,
self,
features: _arg_types.FeatureCollection,
# pylint: disable=invalid-name
classProperty: _arg_types.String,
inputProperties: _arg_types.List | None = None,
# pylint: enable=invalid-name
subsampling: _arg_types.Number | None = None,
# pylint: disable-next=invalid-name
subsamplingSeed: _arg_types.Integer | None = None,
) -> Classifier:
"""Returns a trained classifier.
Trains the classifier on a collection of features, using the specified
numeric properties of each feature as training data. The geometry of the
features is ignored.
Args:
features: The collection to train on.
classProperty: The name of the property containing the class value. Each
feature must have this property and its value must be numeric.
inputProperties: The list of property names to include as training data.
Each feature must have all these properties and their values must be
numeric. This argument is optional if the input collection contains a
'band_order' property, (as produced by Image.sample).
subsampling: An optional subsampling factor, within (0, 1].
subsamplingSeed: A randomization seed to use for subsampling.
"""
return apifunction.ApiFunction.call_(
self.name() + '.train',
self,
features,
classProperty,
inputProperties,
subsampling,
subsamplingSeed,
)