mirror of
https://github.com/google/earthengine-api.git
synced 2025-12-08 19:26:12 +00:00
599 lines
22 KiB
Python
599 lines
22 KiB
Python
#!/usr/bin/env python3
|
|
"""A serializer that encodes EE object trees as JSON DAGs."""
|
|
|
|
import collections
|
|
import datetime
|
|
import hashlib
|
|
import json
|
|
import numbers
|
|
from typing import Any, Dict, List, Optional, Set
|
|
|
|
from ee import _cloud_api_utils
|
|
from ee import ee_exception
|
|
from ee import encodable
|
|
|
|
# The datetime for the beginning of the Unix epoch.
|
|
_EPOCH_DATETIME = datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
|
|
|
|
# Don't generate very deep expressions, as the backend rejects them.
|
|
# The backend's limit is 100, and we want to stay well away from that
|
|
# as a few extra levels of wrapping are always added.
|
|
_DEPTH_LIMIT = 50
|
|
|
|
|
|
# pylint: disable-next=g-bad-name
|
|
def DatetimeToMicroseconds(date: datetime.datetime) -> int:
|
|
"""Convert a datetime to a timestamp, microseconds since the epoch."""
|
|
if date.tzinfo is None:
|
|
# Assume that the time is in utc.
|
|
date = date.replace(tzinfo=datetime.timezone.utc)
|
|
td = (date - _EPOCH_DATETIME)
|
|
return td.microseconds + (td.seconds + td.days * 24 * 3600) * 1000000
|
|
|
|
|
|
class Serializer:
|
|
"""A serializer for EE object trees."""
|
|
unbound_name: Optional[str]
|
|
|
|
# Whether the encoding should factor out shared subtrees.
|
|
_is_compound: bool
|
|
_for_cloud_api: bool
|
|
# A list of shared subtrees as [name, value] pairs.
|
|
_scope: List[str]
|
|
# A lookup table from object hash to subtree names as stored in self._scope
|
|
_encoded: Dict[Any, Any]
|
|
# A lookup table from object ID as retrieved by id() to md5 hash values.
|
|
_hashcache: Dict[Any, Any]
|
|
|
|
def __init__(
|
|
self,
|
|
is_compound: bool = True,
|
|
for_cloud_api: bool = False,
|
|
unbound_name: Optional[str] = None,
|
|
):
|
|
"""Constructs a serializer.
|
|
|
|
Args:
|
|
is_compound: Whether the encoding should factor out shared subtrees.
|
|
for_cloud_api: Whether the encoding should be done for the Cloud API or
|
|
the legacy API.
|
|
unbound_name: Provides a name for unbound variables in objects.
|
|
"""
|
|
self.unbound_name = unbound_name
|
|
|
|
self._is_compound = is_compound
|
|
self._for_cloud_api = for_cloud_api
|
|
self._scope = []
|
|
self._encoded = {}
|
|
self._hashcache = {}
|
|
|
|
def _encode(self, obj: Any) -> Any:
|
|
"""Encodes a top level object to be executed server-side.
|
|
|
|
Args:
|
|
obj: The object to encode.
|
|
|
|
Returns:
|
|
An encoded object ready for JSON serialization.
|
|
"""
|
|
if self._for_cloud_api:
|
|
return self._encode_for_cloud_api(obj)
|
|
value = self._encode_value(obj)
|
|
if self._is_compound:
|
|
if (isinstance(value, dict) and value['type'] == 'ValueRef' and
|
|
len(self._scope) == 1):
|
|
# Just one value. No need for complex structure.
|
|
value = self._scope[0][1]
|
|
else:
|
|
# Wrap the scopes and final value with a CompoundValue.
|
|
value = {'type': 'CompoundValue', 'scope': self._scope, 'value': value}
|
|
# Clear state in case of future encoding.
|
|
self._scope = []
|
|
self._encoded = {}
|
|
self._hashcache = {}
|
|
return value
|
|
|
|
def _encode_for_cloud_api(self, obj: Any) -> Any:
|
|
"""Encodes an object as an Expression or quasi-Expression."""
|
|
value = self._encode_cloud_object(obj)
|
|
if self._is_compound:
|
|
# Wrap the scopes and final value into an Expression.
|
|
value = _ExpressionOptimizer(value, self._scope).optimize()
|
|
# Clear state in case of future encoding.
|
|
self._scope = []
|
|
self._encoded = {}
|
|
self._hashcache = {}
|
|
else:
|
|
value = _ExpressionOptimizer(value).optimize()
|
|
return value
|
|
|
|
def _encode_value(self, obj: Any) -> Any:
|
|
"""Encodes a subtree as a Value in the EE API v2 (DAG) format.
|
|
|
|
If _is_compound is True, this will fill the _scope and _encoded properties.
|
|
|
|
Args:
|
|
obj: The object to encode.
|
|
|
|
Returns:
|
|
An encoded object.
|
|
"""
|
|
obj_id = id(obj)
|
|
hashval = self._hashcache.get(obj_id)
|
|
encoded = self._encoded.get(hashval, None)
|
|
if self._is_compound and encoded:
|
|
# Already encoded objects are encoded as ValueRefs and returned directly.
|
|
return {'type': 'ValueRef', 'value': encoded}
|
|
elif obj is None or isinstance(obj, (bool, numbers.Number, str)):
|
|
# Primitives are encoded as is and not saved in the scope.
|
|
return obj
|
|
elif isinstance(obj, datetime.datetime):
|
|
# A raw date slipped through. Wrap it. Calling ee.Date from here would
|
|
# cause a circular dependency, so we encode it manually.
|
|
return {
|
|
'type': 'Invocation',
|
|
'functionName': 'Date',
|
|
'arguments': {
|
|
'value': DatetimeToMicroseconds(obj) / 1e3
|
|
}
|
|
}
|
|
elif isinstance(obj, encodable.Encodable):
|
|
# Some objects know how to encode themselves.
|
|
result = obj.encode(self._encode_value)
|
|
if (not isinstance(result, (list, tuple)) and
|
|
(not isinstance(result, (dict)) or result['type'] == 'ArgumentRef')):
|
|
# Optimization: simple enough that adding it to the scope is probably
|
|
# not worth it.
|
|
return result
|
|
elif isinstance(obj, encodable.EncodableFunction):
|
|
result = obj.encode_invocation(self._encode_value)
|
|
if (not isinstance(result, (list, tuple)) and
|
|
(not isinstance(result, (dict)) or result['type'] == 'ArgumentRef')):
|
|
# Optimization: simple enough that adding it to the scope is probably
|
|
# not worth it.
|
|
return result
|
|
elif isinstance(obj, (list, tuple)):
|
|
# Lists are encoded recursively.
|
|
result = [self._encode_value(i) for i in obj]
|
|
elif isinstance(obj, dict):
|
|
# Dictionary are encoded recursively and wrapped in a type specifier.
|
|
result = {
|
|
'type':
|
|
'Dictionary',
|
|
'value':
|
|
dict([(key, self._encode_value(value))
|
|
for key, value in obj.items()])
|
|
}
|
|
else:
|
|
raise ee_exception.EEException('Cannot encode object: %s' % obj)
|
|
|
|
if self._is_compound:
|
|
# Save the new object and return a ValueRef.
|
|
hashval = hashlib.md5(json.dumps(result).encode()).digest()
|
|
self._hashcache[obj_id] = hashval
|
|
name = self._encoded.get(hashval, None)
|
|
if not name:
|
|
name = str(len(self._scope))
|
|
self._scope.append((name, result))
|
|
self._encoded[hashval] = name
|
|
return {'type': 'ValueRef', 'value': name}
|
|
else:
|
|
return result
|
|
|
|
def _encode_cloud_object(self, obj: Any) -> Any:
|
|
"""Encodes an object using the Cloud API Expression form.
|
|
|
|
If _is_compound is True, this will fill the _scope and _encoded properties.
|
|
|
|
Args:
|
|
obj: The object to encode.
|
|
|
|
Returns:
|
|
If _is_compound is True, a string that is the key under which the
|
|
encoded object is stored in _scope.
|
|
If _is_compound is False, the encoded object as a single quasi-Expression.
|
|
"""
|
|
obj_id = id(obj)
|
|
hashval = self._hashcache.get(obj_id)
|
|
reference = self._encoded.get(hashval, None)
|
|
if reference:
|
|
return reference
|
|
elif obj is None or isinstance(obj, (bool, str)):
|
|
result = {'constantValue': obj}
|
|
elif isinstance(obj, numbers.Number):
|
|
result = _cloud_api_utils.encode_number_as_cloud_value(obj)
|
|
elif isinstance(obj, datetime.datetime):
|
|
# A raw date slipped through. Wrap it. Calling ee.Date from here would
|
|
# cause a circular dependency, so we encode it manually.
|
|
result = {
|
|
'functionInvocationValue': {
|
|
'functionName': 'Date',
|
|
'arguments': {
|
|
'value': {
|
|
'constantValue': DatetimeToMicroseconds(obj) / 1e3
|
|
}
|
|
}
|
|
}
|
|
}
|
|
elif isinstance(obj, encodable.Encodable):
|
|
# Some objects know how to encode themselves.
|
|
result = obj.encode_cloud_value(self._encode_cloud_object)
|
|
elif isinstance(obj, (list, tuple)):
|
|
# Lists are encoded recursively.
|
|
if self._is_compound:
|
|
result = {
|
|
'arrayValue': {
|
|
'values': [{
|
|
'valueReference': self._encode_cloud_object(i)
|
|
} for i in obj]
|
|
}
|
|
}
|
|
else:
|
|
result = {
|
|
'arrayValue': {
|
|
'values': [self._encode_cloud_object(i) for i in obj]
|
|
}
|
|
}
|
|
elif isinstance(obj, dict):
|
|
# Dictionary are encoded recursively and wrapped in a type specifier.
|
|
# We iterate through the entries in a deterministic order, not because it
|
|
# affects the order of the entries in the output result, but because it
|
|
# affects the names that they are assigned in _scope; without the
|
|
# ordering, the encoding process may produce one of multiple different
|
|
# (albeit equivalent) representations.
|
|
if self._is_compound:
|
|
result = {
|
|
'dictionaryValue': {
|
|
'values': {
|
|
key: {
|
|
'valueReference': self._encode_cloud_object(obj[key])
|
|
} for key in sorted(obj)
|
|
}
|
|
}
|
|
}
|
|
else:
|
|
result = {
|
|
'dictionaryValue': {
|
|
'values': {
|
|
key: self._encode_cloud_object(obj[key])
|
|
for key in sorted(obj)
|
|
}
|
|
}
|
|
}
|
|
else:
|
|
raise ee_exception.EEException('Cannot encode object: %s' % obj)
|
|
|
|
if self._is_compound:
|
|
# Save the new object and return a ValueRef.
|
|
hashval = hashlib.md5(json.dumps(result).encode()).digest()
|
|
self._hashcache[obj_id] = hashval
|
|
name = self._encoded.get(hashval, None)
|
|
if not name:
|
|
name = str(len(self._scope))
|
|
self._scope.append((name, result))
|
|
self._encoded[hashval] = name
|
|
return name
|
|
else:
|
|
return result
|
|
|
|
|
|
def encode(
|
|
obj: Any,
|
|
is_compound: bool = True,
|
|
for_cloud_api: bool = True,
|
|
unbound_name: Optional[str] = None,
|
|
) -> Any:
|
|
"""Serialize an object to a JSON-compatible structure for API calls.
|
|
|
|
Args:
|
|
obj: The object to serialize.
|
|
is_compound: Whether the encoding should factor out shared subtrees.
|
|
for_cloud_api: Whether the encoding should be done for the Cloud API or the
|
|
legacy API.
|
|
unbound_name: Provides a name for unbound variables in objects. Unbound
|
|
variables are otherwise disallowed. See the Count Functions usage in
|
|
customfunction.py.
|
|
|
|
Returns:
|
|
A JSON-compatible structure representing the input.
|
|
"""
|
|
serializer = Serializer(
|
|
is_compound, for_cloud_api=for_cloud_api, unbound_name=unbound_name)
|
|
return serializer._encode(obj) # pylint: disable=protected-access
|
|
|
|
|
|
# pylint: disable-next=g-bad-name
|
|
def toJSON(obj, opt_pretty: bool = False, for_cloud_api: bool = True) -> Any:
|
|
"""Serialize an object to a JSON string appropriate for API calls.
|
|
|
|
Args:
|
|
obj: The object to serialize.
|
|
opt_pretty: True to pretty-print the object.
|
|
for_cloud_api: Whether the encoding should be done for the Cloud API or the
|
|
legacy API.
|
|
|
|
Returns:
|
|
A JSON string representing the input.
|
|
"""
|
|
serializer = Serializer(not opt_pretty, for_cloud_api=for_cloud_api)
|
|
encoded = serializer._encode(obj) # pylint: disable=protected-access
|
|
return json.dumps(encoded, indent=2 if opt_pretty else None)
|
|
|
|
|
|
# pylint: disable-next=g-bad-name
|
|
def toReadableJSON(obj: Any, for_cloud_api: bool = True) -> Any:
|
|
"""Convert an object to readable JSON."""
|
|
return toJSON(obj, True, for_cloud_api=for_cloud_api)
|
|
|
|
|
|
class _ExpressionOptimizer:
|
|
"""Optimises the representation of an Expression.
|
|
|
|
The Expressions generated by recursive encoding can be inefficiently
|
|
represented. This class helps improve the representation.
|
|
|
|
The initial representation is intentionally simple, as it makes the encoding
|
|
logic simple. Constants end up as individual ValueNodes, though the Expression
|
|
format itself allows complex constants (nested arrays and/or dicts containing
|
|
constant values). There are also often places where references to ValueNodes
|
|
can be replaced by direct inclusion of those ValueNodes.
|
|
|
|
This operates in two modes:
|
|
- It can be passed an Expression as a dict of named ValueNodes, and the name
|
|
that represents the final result. In this case, it returns the optimised
|
|
Expression in the same form. This is the "compound" mode.
|
|
- It can be passed a quasi-Expression as a single object. In this case, it
|
|
returns the optimised quasi-Expression in the same form. This is the
|
|
"non-compound" mode. A "quasi-Expression" is essentially an Expression DAG
|
|
that's been expanded to a tree by replacing references with the actual thing
|
|
being referenced. This means that if the same entity is referenced more than
|
|
once, it will be duplicated in the tree.
|
|
|
|
The rules that the optimiser follows are straightforward:
|
|
- If a value is referred to only once, lift it into the place that references
|
|
it.
|
|
- If a value is a numeric or boolean constant, lift it into all the places
|
|
that reference it.
|
|
- If a value is a string constant, lift it if it is referred to only once.
|
|
- Collapse dicts and arrays of constants to constant dicts/arrays.
|
|
"""
|
|
|
|
def __init__(self, result: Any, values: Optional[Any] = None):
|
|
"""Builds an ExpressionOptimizer.
|
|
|
|
Args:
|
|
result: The result to optimize, either as a key of "values", or as a
|
|
quasi-Expression.
|
|
values: If provided (in compound mode), a set of named ValueNodes.
|
|
"""
|
|
self._result = result
|
|
# Convert sequence of tuples to a dict allowing lookup.
|
|
# Values will be of the form:
|
|
# [('0', {'constantValue': 99}), ('1', {'constantValue': 98}), ...]
|
|
self._values = dict(values) if values is not None else None
|
|
|
|
if self._is_compound():
|
|
self._single_uses = self._find_single_uses()
|
|
self._optimized_values = {}
|
|
self._reference_map = {}
|
|
|
|
def _is_compound(self) -> bool:
|
|
return self._values is not None
|
|
|
|
def _find_single_uses(self) -> Set[Any]:
|
|
"""Finds the names of all named values that are referred to only once."""
|
|
reference_counts = collections.defaultdict(int)
|
|
reference_counts[self._result] += 1
|
|
|
|
def _contained_reference(value: Any) -> Optional[Any]:
|
|
"""Gets a contained reference from a ValueNode, if there is one."""
|
|
if 'functionDefinitionValue' in value:
|
|
return value['functionDefinitionValue']['body']
|
|
elif 'functionInvocationValue' in value:
|
|
function_invocation = value['functionInvocationValue']
|
|
if 'functionReference' in function_invocation:
|
|
return function_invocation['functionReference']
|
|
elif 'valueReference' in value:
|
|
return value['valueReference']
|
|
return None
|
|
|
|
def increment_reference_count(value: Any) -> None:
|
|
reference = _contained_reference(value)
|
|
if reference is not None:
|
|
reference_counts[reference] += 1
|
|
|
|
self._visit_all_values_in_expression(increment_reference_count)
|
|
return set(reference for reference, count in reference_counts.items()
|
|
if count == 1)
|
|
|
|
def optimize(self) -> Any:
|
|
"""Optimises the expression, returning the optimised form."""
|
|
optimized_result = self._optimize_referred_value(self._result)
|
|
if self._is_compound():
|
|
return {'result': optimized_result, 'values': self._optimized_values}
|
|
else:
|
|
return optimized_result
|
|
|
|
def _optimize_referred_value(self, reference_or_value: Any) -> Any:
|
|
"""Recursively optimises a value.
|
|
|
|
Optimises a value and everything recursively reachable from it.
|
|
|
|
This operates differently depending on the mode.
|
|
In compound mode:
|
|
Takes a name (in _values) for a ValueNode, optimises the referenced
|
|
ValueNode, and returns a name (in _optimized_values) for the optimised
|
|
ValueNode. Updates _optimized_values and _reference_map.
|
|
In non-compound mode:
|
|
Takes a quasi-ValueNode, optimises it, and returns the optimised
|
|
quasi-ValueNode.
|
|
|
|
Args:
|
|
reference_or_value: The name in _values of the value to optimise, or the
|
|
actual value itself.
|
|
|
|
Returns:
|
|
The name, in _optimized_values, of the optimised value, or the optimised
|
|
value itself.
|
|
"""
|
|
if self._is_compound():
|
|
if reference_or_value in self._reference_map:
|
|
return self._reference_map[reference_or_value]
|
|
mapped_reference = str(len(self._reference_map))
|
|
self._reference_map[reference_or_value] = mapped_reference
|
|
self._optimized_values[mapped_reference] = self._optimize_value(
|
|
self._values[reference_or_value], 0)
|
|
return mapped_reference
|
|
else:
|
|
return self._optimize_value(reference_or_value, 0)
|
|
|
|
def _optimize_value(self, value: Any, depth: int) -> Any:
|
|
"""Optimises a single value.
|
|
|
|
Args:
|
|
value: The ValueNode to optimise, in dict form.
|
|
depth: How deep in the encoded output this value will be placed.
|
|
|
|
Returns:
|
|
An optimised version of that value, created by lifting in all feasible
|
|
constants and references, subject (in compound mode) to a depth limit.
|
|
"""
|
|
if any(
|
|
x in value for x in
|
|
['constantValue', 'integerValue', 'bytesValue', 'argumentReference']):
|
|
# Not optimisable.
|
|
return value
|
|
elif 'arrayValue' in value:
|
|
# Optimise recursively, then turn an array of constants into a constant
|
|
# array.
|
|
optimized_array = [
|
|
self._optimize_value(array_value, depth + 3)
|
|
for array_value in value['arrayValue']['values']
|
|
]
|
|
if all(self._is_constant_value(v) for v in optimized_array):
|
|
optimized_array = [v['constantValue'] for v in optimized_array]
|
|
return {'constantValue': optimized_array}
|
|
else:
|
|
return {'arrayValue': {'values': optimized_array}}
|
|
elif 'dictionaryValue' in value:
|
|
# Optimise recursively, then turn a dict of constants into a constant
|
|
# dict.
|
|
optimized_dict = {
|
|
key: self._optimize_value(dict_value, depth + 3)
|
|
for key, dict_value in value['dictionaryValue']['values'].items()
|
|
}
|
|
if all(self._is_constant_value(v) for v in optimized_dict.values()):
|
|
optimized_dict = {
|
|
k: v['constantValue'] for k, v in optimized_dict.items()
|
|
}
|
|
return {'constantValue': optimized_dict}
|
|
else:
|
|
return {'dictionaryValue': {'values': optimized_dict}}
|
|
elif 'functionDefinitionValue' in value:
|
|
function_definition = value['functionDefinitionValue']
|
|
return {
|
|
'functionDefinitionValue': {
|
|
'argumentNames': function_definition['argumentNames'],
|
|
'body': self._optimize_referred_value(function_definition['body'])
|
|
}
|
|
}
|
|
elif 'functionInvocationValue' in value:
|
|
function_invocation = value['functionInvocationValue']
|
|
arguments = function_invocation['arguments']
|
|
optimized_invocation = {}
|
|
if 'functionName' in function_invocation:
|
|
optimized_invocation['functionName'] = function_invocation[
|
|
'functionName']
|
|
else:
|
|
optimized_invocation[
|
|
'functionReference'] = self._optimize_referred_value(
|
|
function_invocation['functionReference'])
|
|
optimized_invocation['arguments'] = {
|
|
k: self._optimize_value(v, depth + 3) for k, v in arguments.items()
|
|
}
|
|
return {'functionInvocationValue': optimized_invocation}
|
|
elif 'valueReference' in value:
|
|
# Lift if possible: anything used only here, anything lightweight.
|
|
reference = value['valueReference']
|
|
if not self._is_compound():
|
|
return self._optimize_value(reference, depth)
|
|
|
|
referenced_value = self._values[reference]
|
|
if reference in self._single_uses and depth < _DEPTH_LIMIT:
|
|
return self._optimize_value(referenced_value, depth)
|
|
else:
|
|
if self._is_always_liftable(referenced_value):
|
|
return referenced_value
|
|
return {'valueReference': self._optimize_referred_value(reference)}
|
|
|
|
def _is_always_liftable(self, value: Any) -> bool:
|
|
"""Determines if a value is simple enough to lift unconditionally."""
|
|
# Non-string constants and argument references are simple enough.
|
|
if 'constantValue' in value:
|
|
return self._is_liftable_constant(value['constantValue'])
|
|
else:
|
|
return 'argumentReference' in value
|
|
|
|
def _is_liftable_constant(self, value: Any) -> bool:
|
|
"""Whether a constant is simple enough to lift to where it's referenced."""
|
|
return value is None or isinstance(value, (bool, numbers.Number))
|
|
|
|
def _is_constant_value(self, value: Any) -> bool:
|
|
"""Whether a ValueNode (as a dict) is a constant."""
|
|
return 'constantValue' in value
|
|
|
|
def _visit_all_values_in_expression(self, visitor: Any) -> None:
|
|
"""Calls visitor on all ValueNodes in the expression.
|
|
|
|
Args:
|
|
visitor: A callable that will be invoked once at every ValueNode in the
|
|
expression, including nested ValueNodes.
|
|
"""
|
|
self._visit_all_values(self._result, self._values[self._result], set(),
|
|
visitor)
|
|
|
|
def _visit_all_values(
|
|
self, reference: Any, value: Any, visited: Any, visitor: Any
|
|
) -> None:
|
|
"""Calls visitor on a ValueNode and its descendants.
|
|
|
|
Args:
|
|
reference: A reference to the ValueNode, or None.
|
|
value: The ValueNode, in dict form.
|
|
visited: A set of references for which the visitor has already been
|
|
invoked.
|
|
visitor: The callable to invoke.
|
|
"""
|
|
if reference is not None:
|
|
if reference in visited:
|
|
return
|
|
visited.add(reference)
|
|
visitor(value)
|
|
|
|
if 'arrayValue' in value:
|
|
for v in value['arrayValue']['values']:
|
|
self._visit_all_values(None, v, visited, visitor)
|
|
elif 'dictionaryValue' in value:
|
|
d = value['dictionaryValue']['values']
|
|
for k in sorted(d):
|
|
self._visit_all_values(None, d[k], visited, visitor)
|
|
elif 'functionDefinitionValue' in value:
|
|
definition_reference = value['functionDefinitionValue']['body']
|
|
self._visit_all_values(definition_reference,
|
|
self._values[definition_reference], visited,
|
|
visitor)
|
|
elif 'functionInvocationValue' in value:
|
|
function_invocation = value['functionInvocationValue']
|
|
if 'functionReference' in function_invocation:
|
|
function_reference = function_invocation['functionReference']
|
|
self._visit_all_values(function_reference,
|
|
self._values[function_reference], visited,
|
|
visitor)
|
|
arguments = function_invocation['arguments']
|
|
for k in sorted(arguments):
|
|
self._visit_all_values(None, arguments[k], visited, visitor)
|
|
elif 'valueReference' in value:
|
|
value_reference = value['valueReference']
|
|
self._visit_all_values(value_reference, self._values[value_reference],
|
|
visited, visitor)
|