2024-05-20 15:30:45 +08:00

446 lines
14 KiB
Python

#-----------------------------------------------------
# Author: Yaqiang Wang
# Date: 2017-3-7
# Purpose: MeteoInfo Series module
# Note: Jython
#-----------------------------------------------------
import datetime
import mipylib.miutil as miutil
import mipylib.numeric as np
from java.lang import Double
from java.util import Date
from org.meteoinfo.dataframe import Series as MISeries
from org.meteoinfo.ndarray import Range
import groupby
from index import Index
from indexing import LocIndexer, ILocIndexer
nan = Double.NaN
class Series(object):
def __init__(self, data=None, index=None, name=None, series=None):
"""
One-dimensional array with axis labels (including time series).
:param data: (*array_like*) One-dimensional array data.
:param index: (*list*) Data index list. Values must be unique and hashable, same length as data.
:param name: (*string*) Series name.
"""
if series is None:
if isinstance(data, (list, tuple)):
data = np.array(data)
if index is None:
index = range(0, len(data))
else:
if len(data) != len(index):
raise ValueError('Wrong length of index!')
if isinstance(index, np.NDArray):
index = index.tolist()
if isinstance(index, Index):
self._index = index
else:
self._index = Index.factory(index)
self._data = data
self._series = MISeries(data._array, self._index._index, name)
else:
self._series = series
self._data = np.array(self._series.getData())
self._index = Index.factory(index=self._series.getIndex())
def __and__(self, other):
if isinstance(other, Series):
other = other.values
return Series(self.values.__and__(other), index=self._index)
def __or__(self, other):
if isinstance(other, Series):
other = other.values
return Series(self.values.__or__(other), index=self._index)
def __xor__(self, other):
if isinstance(other, Series):
other = other.values
return Series(self.values.__xor__(other), index=self._index)
#---- index property
def get_index(self):
return self._index
def set_index(self, value):
self._index = Index(value)
self._series.setIndex(self._index.data)
index = property(get_index, set_index)
#---- values property
def get_values(self):
if isinstance(self._data[0], Date):
return miutil.pydate(self._data.aslist())
else:
return self._data
def set_values(self, value):
self._data = np.array(value)
self._series.setData(self._data._array)
values = property(get_values, set_values)
#---- name property
def get_name(self):
return self._series.getName()
def set_name(self, value):
self._series.setName(value)
name = property(get_name, set_name)
#---- dtype property
def get_dtype(self):
return self.values.dtype
dtype = property(get_dtype)
@property
def loc(self):
"""
Access a group of rows and columns by label(s) or a boolean array.
"""
return LocIndexer(self)
@property
def iloc(self):
"""
Purely integer-location based indexing for selection by position.
"""
return ILocIndexer(self)
def __getitem__(self, key):
if isinstance(key, Index):
key = key.data
elif isinstance(key, datetime.datetime):
key = miutil.jdatetime(key)
if isinstance(key, (list, tuple, np.NDArray)):
if isinstance(key, np.NDArray):
key = key.aslist()
if isinstance(key[0], datetime.datetime):
key = miutil.jdatetime(key)
r = self._series.getValueByIndex(key)
return Series(series=r)
elif isinstance(key, slice):
if isinstance(key.start, basestring):
sidx = self._index.index(key.start)
if sidx < 0:
sidx = 0
else:
sidx = 0 if key.start is None else key.start
if sidx < 0:
sidx = self.__len__() + sidx
if isinstance(key.stop, basestring):
eidx = self._index.index(key.stop)
if eidx < 0:
eidx = self.__len__()
else:
eidx = self.__len__() - 1 if key.stop is None else key.stop - 1
if eidx < 0:
eidx = self.__len__() + eidx
step = 1 if key.step is None else key.step
rowkey = Range(sidx, eidx, step)
r = self._series.getValues(rowkey)
return Series(series=r)
elif isinstance(key, int):
return self._getitem_iloc(key)
else:
r = self._series.getValueByIndex(key)
if isinstance(r, MISeries):
return Series(series=r)
else:
return r
def __setitem__(self, key, value):
if isinstance(key, Series):
self._series.setValue(key._series, value)
return None
ikey = self.__getkey(key)
self.values.__setitem__(ikey, value)
def _getitem_loc(self, key):
if isinstance(key, Index):
key = key.data
elif isinstance(key, datetime.datetime):
key = miutil.jdatetime(key)
if isinstance(key, (list, tuple, np.NDArray)):
if isinstance(key, np.NDArray):
key = key.aslist()
if isinstance(key[0], datetime.datetime):
key = miutil.jdatetime(key)
r = self._series.getValueByIndex(key)
return Series(series=r)
elif isinstance(key, slice):
if isinstance(key.start, basestring):
sidx = self._index.index(key.start)
if sidx < 0:
sidx = 0
else:
sidx = 0 if key.start is None else key.start
if sidx < 0:
sidx = self.__len__() + sidx
if isinstance(key.stop, basestring):
eidx = self._index.index(key.stop)
if eidx < 0:
eidx = self.__len__()
else:
eidx = self.__len__() - 1 if key.stop is None else key.stop - 1
if eidx < 0:
eidx = self.__len__() + eidx
step = 1 if key.step is None else key.step
rowkey = Range(sidx, eidx, step)
r = self._series.getValues(rowkey)
return Series(series=r)
else:
r = self._series.getValueByIndex(key)
if isinstance(r, MISeries):
return Series(series=r)
else:
return r
def _getitem_iloc(self, key):
if isinstance(key, Index):
key = key.data
if isinstance(key, int):
if key < 0 or key >= self.__len__():
raise KeyError(key)
return self._series.getValue(key)
elif isinstance(key, (list, tuple, np.NDArray)):
if isinstance(key, np.NDArray):
key = key.aslist()
r = self._series.getValues(key)
return Series(series=r)
elif isinstance(key, slice):
if isinstance(key.start, basestring):
sidx = self._index.index(key.start)
if sidx < 0:
sidx = 0
else:
sidx = 0 if key.start is None else key.start
if sidx < 0:
sidx = self.__len__() + sidx
if isinstance(key.stop, basestring):
eidx = self._index.index(key.stop)
if eidx < 0:
eidx = self.__len__()
else:
eidx = self.__len__() - 1 if key.stop is None else key.stop - 1
if eidx < 0:
eidx = self.__len__() + eidx
step = 1 if key.step is None else key.step
rowkey = Range(sidx, eidx, step)
r = self._series.getValues(rowkey)
return Series(series=r)
else:
r = self._series.getValues(key)
if isinstance(r, MISeries):
return Series(series=r)
else:
return r
def __getkey(self, key):
if isinstance(key, basestring):
ikey = self.index.get_loc(key)
if len(ikey) == 1:
ikey = ikey[0]
elif len(ikey) > 1:
ikey = list(ikey)
else:
raise KeyError(key)
return ikey
elif isinstance(key, (list, tuple, np.NDArray)) and isinstance(key[0], basestring):
if isinstance(key, np.NDArray):
key = key.asarray()
ikey = self.index.get_indices(key)
if len(ikey) == 0:
raise KeyError()
else:
ikey = list(ikey)
return ikey
else:
return key
def __iter__(self):
"""
provide iteration over the values of the Series
"""
#return iter(self.values)
#return zip(iter(self.index), iter(self.values))
return iter(self.index)
def iteritems(self):
"""
Lazily iterate over (index, value) tuples
"""
return zip(iter(self.index), iter(self))
def __len__(self):
return self.values.__len__()
def __str__(self):
return self.__repr__()
def __repr__(self):
return self._series.toString()
def __eq__(self, other):
r = Series(series=self._series.equal(other))
return r
def __lt__(self, other):
r = Series(series=self._series.lessThan(other))
return r
def __le__(self, other):
r = Series(series=self._series.lessThanOrEqual(other))
return r
def __gt__(self, other):
r = Series(series=self._series.greaterThan(other))
return r
def __ge__(self, other):
r = Series(series=self._series.greaterThanOrEqual(other))
return r
def head(self, n=5):
"""
Get top rows
:param n: (*int*) row number.
:returns: Top rows
"""
print(self._series.head(n))
def tail(self, n=5):
"""
Get bottom rows
:param n: (*int*) row number.
:returns: Bottom rows
"""
print(self._series.tail(n))
def asarray(self):
"""
Return MI Java Array object
:returns: MI Java Array object
"""
return self.values.asarray()
def replace(self, to_replace, value):
"""
Replace values given in to_replace with value.
:param to_replace: (*object*) The value to be replaced.
:param value: (*object*) The replacing value.
:return: (*Series*) New series with after value replaced.
"""
r = self._series.replace(to_replace, value)
return Series(series=r)
def mean(self):
"""
Return the mean of the values
:returns: Mean value
"""
r = self._series.mean()
if isinstance(r, (MISeries)):
return Series(series=r)
else:
return r
def max(self):
"""
Return the maximum of the values
:returns: Maximum value
"""
r = self._series.max()
if isinstance(r, (MISeries)):
return Series(series=r)
else:
return r
def min(self):
"""
Return the minimum of the values
:returns: Minimum value
"""
r = self._series.min()
if isinstance(r, (MISeries)):
return Series(series=r)
else:
return r
def std(self):
"""
Return the standard deviation of the values
:returns: Standard deviation value
"""
r = self._series.stdDev()
if isinstance(r, (MISeries)):
return Series(series=r)
else:
return r
def groupby(self, by=None):
"""
Group Series.
:param by: Used to determine the groups for the groupby.
:returns: GroupBy object.
"""
gb = self._series.groupBy(by)
return groupby.GroupBy(gb)
def resample(self, by):
"""
Group series by date time index.
:param by: Used to determine the groups for the groupby.
:returns: GroupBy object.
"""
gb = self._series.resample(by)
return groupby.GroupBy(gb)
def to_csv(self, filepath, delimiter=',', date_format=None, \
float_format=None, index=True):
"""
Save the data to an csv file.
:param filepath: (*string*) The file name.
:param delimiter: (*string*) Field delimiter character. Default is ``,``.
:param date_format: (*string*) Date format string. i.e. 'yyyyMMddHH'.
:param float_format: (*string*) Float format string. i.e. '%.2f'.
:param index: (*boolean*) Write index or not.
"""
self._series.saveCSV(filepath, delimiter, date_format, float_format, index)
#################################################################