mirror of
https://github.com/meteoinfo/MeteoInfo.git
synced 2025-12-08 20:36:05 +00:00
452 lines
14 KiB
Python
452 lines
14 KiB
Python
from mipylib import numeric as np
|
|
from ..core._ndarray import NDArray
|
|
from org.meteoinfo.ndarray.math import ArrayUtil
|
|
|
|
nomask = False
|
|
|
|
__all__ = ['MaskedArray','masked_array','masked_invalid','getdata','getmask','is_masked']
|
|
|
|
class MaskedArray(NDArray):
|
|
"""
|
|
An array class with possibly masked values.
|
|
Masked values of True exclude the corresponding element from any
|
|
computation.
|
|
Construction::
|
|
x = MaskedArray(data, mask=nomask, dtype=None, copy=False, subok=True,
|
|
ndmin=0, fill_value=None, keep_mask=True, hard_mask=None,
|
|
shrink=True, order=None)
|
|
Parameters
|
|
----------
|
|
data : array_like
|
|
Input data.
|
|
mask : sequence, optional
|
|
Mask. Must be convertible to an array of booleans with the same
|
|
shape as `data`. True indicates a masked (i.e. invalid) data.
|
|
dtype : dtype, optional
|
|
Data type of the output.
|
|
If `dtype` is None, the type of the data argument (``data.dtype``)
|
|
is used. If `dtype` is not None and different from ``data.dtype``,
|
|
a copy is performed.
|
|
copy : bool, optional
|
|
Whether to copy the input data (True), or to use a reference instead.
|
|
Default is False.
|
|
subok : bool, optional
|
|
Whether to return a subclass of `MaskedArray` if possible (True) or a
|
|
plain `MaskedArray`. Default is True.
|
|
ndmin : int, optional
|
|
Minimum number of dimensions. Default is 0.
|
|
fill_value : scalar, optional
|
|
Value used to fill in the masked values when necessary.
|
|
If None, a default based on the data-type is used.
|
|
"""
|
|
|
|
def __init__(self, data, mask=nomask, dtype=None, copy=False,
|
|
subok=True, ndmin=0, fill_value=None):
|
|
if isinstance(data, NDArray):
|
|
data = data._array
|
|
super(MaskedArray, self).__init__(data)
|
|
self._data = NDArray(self._array)
|
|
self._baseclass = getattr(data, '_baseclass', type(self._data))
|
|
if mask is nomask:
|
|
self._mask = mask
|
|
else:
|
|
self._mask = np.array(mask)
|
|
if self._mask.shape != self._data.shape:
|
|
self._mask = self._mask.reshape(self._data.shape)
|
|
if self._mask.dtype != np.dtype.bool:
|
|
self._mask = self._mask.astype(np.dtype.bool)
|
|
self._fill_value = fill_value
|
|
|
|
def __str__(self):
|
|
r = 'masked_array(data=' + ArrayUtil.convertToString(self._data._array) + ',\n\tmask='
|
|
if self._mask is nomask:
|
|
r = r + 'False,'
|
|
else:
|
|
r = r + ArrayUtil.convertToString(self._mask._array) + ','
|
|
r = r + '\n\tfill_value=' + str(self._fill_value) + ')'
|
|
return r
|
|
|
|
def __repr__(self):
|
|
return self.__str__()
|
|
|
|
def __setmask__(self, mask, copy=False):
|
|
"""
|
|
Set the mask.
|
|
|
|
"""
|
|
idtype = self.dtype
|
|
current_mask = self._mask
|
|
if mask is masked:
|
|
mask = True
|
|
|
|
if current_mask is nomask:
|
|
# Make sure the mask is set
|
|
# Just don't do anything if there's nothing to do.
|
|
if mask is nomask:
|
|
return
|
|
current_mask = self._mask = make_mask_none(self.shape, idtype)
|
|
|
|
if idtype.names is None:
|
|
# No named fields.
|
|
# Hardmask: don't unmask the data
|
|
if self._hardmask:
|
|
current_mask |= mask
|
|
# Softmask: set everything to False
|
|
# If it's obviously a compatible scalar, use a quick update
|
|
# method.
|
|
elif isinstance(mask, (int, float, np.bool, np.number)):
|
|
current_mask[...] = mask
|
|
# Otherwise fall back to the slower, general purpose way.
|
|
else:
|
|
current_mask.flat = mask
|
|
else:
|
|
# Named fields w/
|
|
mdtype = current_mask.dtype
|
|
mask = np.asarray(mask)
|
|
# Mask is a singleton
|
|
if not mask.ndim:
|
|
# It's a boolean : make a record
|
|
if mask.dtype.kind == 'b':
|
|
mask = np.array(tuple([mask.item()] * len(mdtype)),
|
|
dtype=mdtype)
|
|
# It's a record: make sure the dtype is correct
|
|
else:
|
|
mask = mask.astype(mdtype)
|
|
# Mask is a sequence
|
|
else:
|
|
# Make sure the new mask is a ndarray with the proper dtype
|
|
try:
|
|
copy = None if not copy else True
|
|
mask = np.array(mask, copy=copy, dtype=mdtype)
|
|
# Or assume it's a sequence of bool/int
|
|
except TypeError:
|
|
mask = np.array([tuple([m] * len(mdtype)) for m in mask],
|
|
dtype=mdtype)
|
|
# Hardmask: don't unmask the data
|
|
if self._hardmask:
|
|
for n in idtype.names:
|
|
current_mask[n] |= mask[n]
|
|
# Softmask: set everything to False
|
|
# If it's obviously a compatible scalar, use a quick update
|
|
# method.
|
|
elif isinstance(mask, (int, float, np.dtyp.bool, np.dtype.number)):
|
|
current_mask[...] = mask
|
|
# Otherwise fall back to the slower, general purpose way.
|
|
else:
|
|
current_mask.flat = mask
|
|
# Reshape if needed
|
|
if current_mask.shape:
|
|
current_mask.shape = self.shape
|
|
return
|
|
|
|
_set_mask = __setmask__
|
|
|
|
@property
|
|
def mask(self):
|
|
""" Current mask. """
|
|
|
|
# We could try to force a reshape, but that wouldn't work in some
|
|
# cases.
|
|
# Return a view so that the dtype and shape cannot be changed in place
|
|
# This still preserves nomask by identity
|
|
return self._mask.view()
|
|
|
|
@mask.setter
|
|
def mask(self, value):
|
|
self.__setmask__(value)
|
|
|
|
def filled(self, fill_value=None):
|
|
"""
|
|
Return a copy of self, with masked values filled with a given value.
|
|
**However**, if there are no masked values to fill, self will be
|
|
returned instead as an ndarray.
|
|
Parameters
|
|
----------
|
|
fill_value : array_like, optional
|
|
The value to use for invalid entries. Can be scalar or non-scalar.
|
|
If non-scalar, the resulting ndarray must be broadcastable over
|
|
input array. Default is None, in which case, the `fill_value`
|
|
attribute of the array is used instead.
|
|
Returns
|
|
-------
|
|
filled_array : ndarray
|
|
A copy of ``self`` with invalid entries replaced by *fill_value*
|
|
(be it the function argument or the attribute of ``self``), or
|
|
``self`` itself as an ndarray if there are no invalid entries to
|
|
be replaced.
|
|
Notes
|
|
-----
|
|
The result is **not** a MaskedArray!
|
|
Examples
|
|
--------
|
|
>>> x = np.ma.array([1,2,3,4,5], mask=[0,0,1,0,1], fill_value=-999)
|
|
>>> x.filled()
|
|
array([ 1, 2, -999, 4, -999])
|
|
>>> x.filled(fill_value=1000)
|
|
array([ 1, 2, 1000, 4, 1000])
|
|
>>> type(x.filled())
|
|
<class 'numpy.ndarray'>
|
|
Subclassing is preserved. This means that if, e.g., the data part of
|
|
the masked array is a recarray, `filled` returns a recarray:
|
|
>>> x = np.array([(-1, 2), (-3, 4)], dtype='i8,i8').view(np.recarray)
|
|
>>> m = np.ma.array(x, mask=[(True, False), (False, True)])
|
|
>>> m.filled()
|
|
rec.array([(999999, 2), ( -3, 999999)],
|
|
dtype=[('f0', '<i8'), ('f1', '<i8')])
|
|
"""
|
|
m = self._mask
|
|
if m is nomask:
|
|
return self._data
|
|
|
|
if fill_value is None:
|
|
fill_value = self._fill_value
|
|
|
|
result = self._data.copy()
|
|
result[m] = fill_value
|
|
return result
|
|
|
|
def sum(self, axis=None):
|
|
"""
|
|
Sum of array elements over a given axis.
|
|
|
|
:param axis: (*int*) Axis along which the standard deviation is computed.
|
|
The default is to compute the standard deviation of the flattened array.
|
|
|
|
returns: (*array_like*) Sum result.
|
|
"""
|
|
if self._mask is nomask:
|
|
return self._data.sum(axis=axis)
|
|
else:
|
|
r = self.filled(0)
|
|
return r.sum(axis=axis)
|
|
|
|
def count(self, axis=None):
|
|
"""
|
|
Count of valid array elements over a given axis.
|
|
|
|
:param axis: (*int*) Axis along which the standard deviation is computed.
|
|
The default is to compute the standard deviation of the flattened array.
|
|
:return: (*array_like*) Count result.
|
|
"""
|
|
if self._mask is nomask:
|
|
mask = np.ones(self._data.shape)
|
|
return mask.sum(axis=axis)
|
|
else:
|
|
return (~self._mask).sum(axis=axis)
|
|
|
|
def mean(self, axis=None):
|
|
"""
|
|
Compute tha arithmetic mean along the specified axis.
|
|
|
|
:param axis: (*int*) Axis along which the value is computed.
|
|
The default is to compute the value of the flattened array.
|
|
|
|
returns: (*array_like*) Mean result
|
|
"""
|
|
if self._mask is nomask:
|
|
return self._data.mean(axis=axis)
|
|
else:
|
|
dsum = self.sum(axis=axis)
|
|
cnt = self.count(axis=axis)
|
|
return dsum * 1. / cnt
|
|
|
|
|
|
masked_array = MaskedArray
|
|
|
|
|
|
def getdata(a, subok=True):
|
|
"""
|
|
Return the data of a masked array as an ndarray.
|
|
Return the data of `a` (if any) as an ndarray if `a` is a ``MaskedArray``,
|
|
else return `a` as a ndarray or subclass (depending on `subok`) if not.
|
|
Parameters
|
|
----------
|
|
a : array_like
|
|
Input ``MaskedArray``, alternatively a ndarray or a subclass thereof.
|
|
subok : bool
|
|
Whether to force the output to be a `pure` ndarray (False) or to
|
|
return a subclass of ndarray if appropriate (True, default).
|
|
See Also
|
|
--------
|
|
getmask : Return the mask of a masked array, or nomask.
|
|
getmaskarray : Return the mask of a masked array, or full array of False.
|
|
Examples
|
|
--------
|
|
>>> import mipylib.numeric.ma as ma
|
|
>>> a = ma.masked_equal([[1,2],[3,4]], 2)
|
|
>>> a
|
|
masked_array(
|
|
data=[[1, --],
|
|
[3, 4]],
|
|
mask=[[False, True],
|
|
[False, False]],
|
|
fill_value=2)
|
|
>>> ma.getdata(a)
|
|
array([[1, 2],
|
|
[3, 4]])
|
|
Equivalently use the ``MaskedArray`` `data` attribute.
|
|
>>> a.data
|
|
array([[1, 2],
|
|
[3, 4]])
|
|
"""
|
|
try:
|
|
data = a._data
|
|
except AttributeError:
|
|
data = np.array(a, copy=False, subok=subok)
|
|
|
|
return data
|
|
|
|
|
|
def getmask(a):
|
|
"""
|
|
Return the mask of a masked array, or nomask.
|
|
|
|
Return the mask of `a` as an ndarray if `a` is a `MaskedArray` and the
|
|
mask is not `nomask`, else return `nomask`. To guarantee a full array
|
|
of booleans of the same shape as a, use `getmaskarray`.
|
|
|
|
Parameters
|
|
----------
|
|
a : array_like
|
|
Input `MaskedArray` for which the mask is required.
|
|
|
|
See Also
|
|
--------
|
|
getdata : Return the data of a masked array as an ndarray.
|
|
getmaskarray : Return the mask of a masked array, or full array of False.
|
|
|
|
Examples
|
|
--------
|
|
>>> a = np.ma.masked_equal([[1,2],[3,4]], 2)
|
|
>>> a
|
|
masked_array(
|
|
data=[[1, --],
|
|
[3, 4]],
|
|
mask=[[False, True],
|
|
[False, False]],
|
|
fill_value=2)
|
|
>>> ma.getmask(a)
|
|
array([[False, True],
|
|
[False, False]])
|
|
|
|
Equivalently use the `MaskedArray` `mask` attribute.
|
|
|
|
>>> a.mask
|
|
array([[False, True],
|
|
[False, False]])
|
|
|
|
Result when mask == `nomask`
|
|
|
|
>>> b = np.ma.masked_array([[1,2],[3,4]])
|
|
>>> b
|
|
masked_array(
|
|
data=[[1, 2],
|
|
[3, 4]],
|
|
mask=False,
|
|
fill_value=999999)
|
|
>>> np.ma.nomask
|
|
False
|
|
>>> np.ma.getmask(b) == np.ma.nomask
|
|
True
|
|
>>> b.mask == np.ma.nomask
|
|
True
|
|
|
|
"""
|
|
return getattr(a, '_mask', nomask)
|
|
|
|
|
|
get_mask = getmask
|
|
|
|
|
|
def masked_invalid(a, copy=True):
|
|
"""
|
|
Mask an array where invalid values occur (NaNs or infs).
|
|
This function is a shortcut to ``masked_where``, with
|
|
`condition` = ~(np.isfinite(a)). Any pre-existing mask is conserved.
|
|
Only applies to arrays with a dtype where NaNs or infs make sense
|
|
(i.e. floating point types), but accepts any array_like object.
|
|
See Also
|
|
--------
|
|
masked_where : Mask where a condition is met.
|
|
Examples
|
|
--------
|
|
>>> import mipylib.numeric.ma as ma
|
|
>>> a = np.arange(5, dtype='float')
|
|
>>> a[2] = np.nan
|
|
>>> a[3] = np.inf
|
|
>>> a
|
|
array([ 0., 1., nan, inf, 4.])
|
|
>>> ma.masked_invalid(a)
|
|
masked_array(data=[0.0, 1.0, --, --, 4.0],
|
|
mask=[False, False, True, True, False],
|
|
fill_value=1e+20)
|
|
"""
|
|
a = np.array(a, copy=copy, subok=True)
|
|
mask = getattr(a, '_mask', None)
|
|
if mask is not None:
|
|
condition = ~(np.isfinite(getdata(a)))
|
|
if mask is not nomask:
|
|
condition |= mask
|
|
else:
|
|
condition = ~(np.isfinite(a))
|
|
if isinstance(a, MaskedArray):
|
|
result = a
|
|
else:
|
|
result = MaskedArray(a)
|
|
result._mask = condition
|
|
return result
|
|
|
|
|
|
def is_masked(x):
|
|
"""
|
|
Determine whether input has masked values.
|
|
|
|
Accepts any object as input, but always returns False unless the
|
|
input is a MaskedArray containing masked values.
|
|
|
|
Parameters
|
|
----------
|
|
x : array_like
|
|
Array to check for masked values.
|
|
|
|
Returns
|
|
-------
|
|
result : bool
|
|
True if `x` is a MaskedArray with masked values, False otherwise.
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> import numpy.ma as ma
|
|
>>> x = ma.masked_equal([0, 1, 0, 2, 3], 0)
|
|
>>> x
|
|
masked_array(data=[--, 1, --, 2, 3],
|
|
mask=[ True, False, True, False, False],
|
|
fill_value=0)
|
|
>>> ma.is_masked(x)
|
|
True
|
|
>>> x = ma.masked_equal([0, 1, 0, 2, 3], 42)
|
|
>>> x
|
|
masked_array(data=[0, 1, 0, 2, 3],
|
|
mask=False,
|
|
fill_value=42)
|
|
>>> ma.is_masked(x)
|
|
False
|
|
|
|
Always returns False if `x` isn't a MaskedArray.
|
|
|
|
>>> x = [False, True, False]
|
|
>>> ma.is_masked(x)
|
|
False
|
|
>>> x = 'a string'
|
|
>>> ma.is_masked(x)
|
|
False
|
|
|
|
"""
|
|
m = getmask(x)
|
|
if m is nomask:
|
|
return False
|
|
elif m.any():
|
|
return True
|
|
return False
|