2021-10-14 14:33:07 +08:00

744 lines
26 KiB
Python

#-----------------------------------------------------
# Author: Yaqiang Wang
# Date: 2014-12-27
# Purpose: MeteoInfo Dataset module
# Note: Jython
#-----------------------------------------------------
from org.meteoinfo.ndarray import Dimension, DimensionType, Range, Array, MAMath
from org.meteoinfo.ndarray.math import ArrayMath, ArrayUtil
from org.meteoinfo.common import PointD
from org.meteoinfo.projection import KnownCoordinateSystems, Reproject
from org.meteoinfo.data.meteodata import Attribute
from ucar.nc2 import Attribute as NCAttribute
from ucar.ma2 import DataType as NCDataType
from ucar.ma2 import ArrayStructure, StructureMembers
from org.meteoinfo.data.meteodata.netcdf import NCUtil
from org.meteoinfo.ndarray import DataType
import mipylib.numeric as np
import mipylib.miutil as miutil
import datetime
import numbers
# Dimension variable
class DimVariable(object):
# variable must be org.meteoinfo.data.meteodata.Variable
# dataset is DimDataFile
def __init__(self, variable=None, dataset=None, ncvariable=None):
self.variable = variable
self.dataset = dataset
self.ncvariable = ncvariable
if not variable is None:
self.name = variable.getName()
self.dtype = np.dtype.fromjava(variable.getDataType())
self.dims = variable.getDimensions()
self.ndim = variable.getDimNumber()
self.attributes = variable.getAttributes()
self.fill_value = variable.getFillValue()
self.scale_factor = variable.getScaleFactor()
self.add_offset = variable.getAddOffset()
elif not ncvariable is None:
self.name = ncvariable.getShortName()
self.dtype = ncvariable.getDataType()
self.dims = ncvariable.getDimensions()
self.ndim = len(self.dims)
self.attributes = list(ncvariable.getAttributes())
else:
self.name = None
self.dtype = None
self.dims = None
self.ndim = 0
self.attributes = None
self.proj = None if dataset is None else dataset.proj
def __len__(self):
len = 1
if not self.variable is None:
for dim in self.variable.getDimensions():
len = len * dim.getLength()
return len
def __str__(self):
r = str(self.dtype) + ' ' + 'None' if self.name is None else self.name + '('
if self.dims is None:
r = r + '):'
else:
for dim in self.dims:
dimname = dim.getShortName()
if dimname is None:
dimname = 'null'
r = r + dimname + ','
r = r[:-1] + '):'
if not self.attributes is None:
for attr in self.attributes:
r = r + '\n\t' + self.name + ': ' + attr.toString()
return r
def __repr__(self):
return self.__str__()
def __getitem__(self, indices):
if self.variable.getDataType() in [DataType.STRUCTURE, DataType.SEQUENCE]:
if isinstance(indices, str): #metadata
return self.member_array(indices)
else:
a = self.dataset.read(self.name)
return StructureArray(a.getArrayObject())
if indices is None:
inds = []
for _ in range(self.ndim):
inds.append(slice(None))
indices = tuple(inds)
if not isinstance(indices, tuple):
indices = [indices]
if len(indices) < self.ndim:
indices = list(indices)
for _ in range(self.ndim - len(indices)):
indices.append(slice(None))
indices = tuple(indices)
if len(indices) != self.ndim:
print 'indices must be ' + str(self.ndim) + ' dimensions!'
return None
if not self.proj is None and not self.proj.isLonLat():
xlim = None
ylim = None
xidx = -1
yidx = -1
for i in range(0, self.ndim):
dim = self.dims[i]
if dim.getDimType() == DimensionType.X:
k = indices[i]
if isinstance(k, basestring):
xlims = k.split(':')
if len(xlims) == 1:
xlim = [float(xlims[0])]
else:
xlim = [float(xlims[0]), float(xlims[1])]
xidx = i
elif dim.getDimType() == DimensionType.Y:
k = indices[i]
if isinstance(k, basestring):
ylims = k.split(':')
if len(ylims) == 1:
ylim = [float(ylims[0])]
else:
ylim = [float(ylims[0]), float(ylims[1])]
yidx = i
if not xlim is None and not ylim is None:
fromproj=KnownCoordinateSystems.geographic.world.WGS1984
inpt = PointD(xlim[0], ylim[0])
outpt1 = Reproject.reprojectPoint(inpt, fromproj, self.proj)
if len(xlim) == 1:
xlim = [outpt1.X]
ylim = [outpt1.Y]
else:
inpt = PointD(xlim[1], ylim[1])
outpt2 = Reproject.reprojectPoint(inpt, fromproj, self.proj)
xlim = [outpt1.X, outpt2.X]
ylim = [outpt1.Y, outpt2.Y]
indices1 = []
for i in range(0, self.ndim):
if i == xidx:
if len(xlim) == 1:
indices1.append(str(xlim[0]))
else:
indices1.append(str(xlim[0]) + ':' + str(xlim[1]))
elif i == yidx:
if len(ylim) == 1:
indices1.append(str(ylim[0]))
else:
indices1.append(str(ylim[0]) + ':' + str(ylim[1]))
else:
indices1.append(indices[i])
indices = indices1
origin = []
size = []
stride = []
ranges = []
dims = []
flips = []
onlyrange = True
for i in range(0, self.ndim):
isrange = True
dimlen = self.dimlen(i)
k = indices[i]
sidx = 0
eidx = 0
step = 1
if isinstance(k, int):
if k < 0:
k = self.dims[i].getLength() + k
sidx = k
eidx = k
step = 1
elif isinstance(k, slice):
if isinstance(k.start, basestring):
sv = float(k.start)
sidx = self.dims[i].getValueIndex(sv)
elif isinstance(k.start, datetime.datetime):
sv = miutil.date2num(k.start)
sidx = self.dims[i].getValueIndex(sv)
else:
sidx = 0 if k.start is None else k.start
if sidx < 0:
sidx = self.dimlen(i) + sidx
if isinstance(k.stop, basestring):
ev = float(k.stop)
eidx = self.dims[i].getValueIndex(ev)
elif isinstance(k.stop, datetime.datetime):
ev = miutil.date2num(k.stop)
eidx = self.dims[i].getValueIndex(ev)
else:
eidx = self.dimlen(i) if k.stop is None else k.stop
if eidx < 0:
eidx = self.dimlen(i) + eidx
eidx -= 1
if isinstance(k.step, basestring):
nv = float(k.step) + self.dims[i].getDimValue()[0]
nidx = self.dims[i].getValueIndex(nv)
step = nidx - sidx
elif isinstance(k.step, datetime.timedelta):
nv = miutil.date2num(k.start + k.step)
nidx = self.dims[i].getValueIndex(nv)
step = nidx - sidx
else:
step = 1 if k.step is None else k.step
elif isinstance(k, list):
onlyrange = False
isrange = False
if not isinstance(k[0], datetime.datetime):
ranges.append(k)
else:
tlist = []
for tt in k:
sv = miutil.date2num(tt)
idx = self.dims[i].getValueIndex(sv)
tlist.append(idx)
ranges.append(tlist)
k = tlist
elif isinstance(k, basestring):
dim = self.variable.getDimension(i)
kvalues = k.split(':')
sv = float(kvalues[0])
sidx = dim.getValueIndex(sv)
if len(kvalues) == 1:
eidx = sidx
step = 1
else:
ev = float(kvalues[1])
eidx = dim.getValueIndex(ev)
if len(kvalues) == 2:
step = 1
else:
step = int(float(kvalues[2]) / dim.getDeltaValue())
if sidx > eidx and step > 0:
step = -step
else:
print k
return None
if isrange:
if sidx >= dimlen or eidx >= dimlen:
print('Index out of range!')
return None
origin.append(sidx)
n = abs(eidx - sidx) + 1
size.append(n)
if n > 1:
dim = self.variable.getDimension(i)
#if dim.isReverse():
# step = -step
dim = dim.extract(sidx, eidx, step)
#dim.setReverse(False)
dims.append(dim)
stride.append(step)
if step < 0:
#step = abs(step)
flips.append(i)
if sidx > eidx:
iidx = eidx
eidx = sidx
sidx = iidx
rr = Range(sidx, eidx, abs(step))
ranges.append(rr)
else:
if len(k) > 1:
dim = self.variable.getDimension(i)
dim = dim.extract(k)
#dim.setReverse(False)
dims.append(dim)
if onlyrange:
rr = self.dataset.dataset.read(self.name, ranges)
else:
rr = self.dataset.dataset.take(self.name, ranges)
if rr.getSize() == 1:
iter = rr.getIndexIterator()
return iter.getObjectNext()
else:
for i in flips:
rr = rr.flip(i)
rr = rr.reduce()
ArrayMath.missingToNaN(rr, self.fill_value)
if len(flips) > 0:
rrr = Array.factory(rr.getDataType(), rr.getShape())
MAMath.copy(rrr, rr)
array = np.array(rrr)
else:
array = np.array(rr)
data = np.DimArray(array, dims, self.fill_value, self.dataset.proj)
return data
def read(self):
'''
Read data array.
:return: (*array*) Data array.
'''
return np.array(self.dataset.read(self.name))
def get_members(self):
'''
Get structure members. Only valid for Structure data type.
:return: Structure members.
'''
a = self.read()
if a._array.getDataType() != DataType.STRUCTURE:
print 'This method is only valid for structure array!'
return None
a = a._array.getArrayObject()
return a.getMembers()
def get_member(self, member):
'''
Get structure members. Only valid for Structure data type.
:param member: (*str*) Member name.
:return: Structure members.
'''
a = self.read()
if a._array.getDataType() != DataType.STRUCTURE:
print 'This method is only valid for structure array!'
return None
a = a._array.getArrayObject()
return a.findMember(member)
def member_array(self, member, indices=None):
'''
Extract member array. Only valid for Structure data type.
:param member: (*string*) Member name.
:param indices: (*slice*) Indices.
:returns: (*array*) Extracted member array.
'''
a = self.read()
if a._array.getDataType() != DataType.STRUCTURE:
print 'This method is only valid for structure array!'
return None
a = a._array.getArrayObject()
if isinstance(member, basestring):
member = a.findMember(member)
if member is None:
raise KeyError('The member %s not exists!' % member)
self.dataset.reopen()
a = a.extractMemberArray(member)
if a.getDataType() in [NCDataType.SEQUENCE, NCDataType.STRUCTURE]:
return StructureArray(a)
a = NCUtil.convertArray(a)
r = np.array(a)
if r.size == 1:
return r[0]
if not indices is None:
r = r.__getitem__(indices)
return r
# get dimension length
def dimlen(self, idx):
return self.dims[idx].getLength()
def dimvalue(self, idx, convert=False):
'''
Get dimension values.
:param idx: (*int*) Dimension index.
:param convert: (*boolean*) If convert to real values (i.e. datetime). Default
is ``False``.
:returns: (*array_like*) Dimension values
'''
dim = self.dims[idx]
if convert:
if dim.getDimType() == DimensionType.T:
return miutil.nums2dates(dim.getDimValue())
else:
return np.array(ArrayUtil.array(self.dims[idx].getDimValue()))
else:
return np.array(ArrayUtil.array(self.dims[idx].getDimValue()))
def attrvalue(self, attr):
'''
Get a global attribute value.
:param attr: (*string or Attribute*) Attribute or Attribute name
'''
if isinstance(attr, str):
attr = self.variable.findAttribute(attr)
if attr is None:
return None
v = np.array(attr.getValues())
return v
def xdim(self):
for dim in self.dims:
if dim.getDimType() == DimensionType.X:
return dim
return None
def ydim(self):
for dim in self.dims:
if dim.getDimType() == DimensionType.Y:
return dim
return None
def zdim(self):
for dim in self.dims:
if dim.getDimType() == DimensionType.Z:
return dim
return None
def tdim(self):
for dim in self.dims:
if dim.getDimType() == DimensionType.T:
return dim
return None
def adddim(self, dimtype, dimvalue):
if isinstance(dimvalue, np.NDArray):
dimvalue = dimvalue.aslist()
self.variable.addDimension(dimtype, dimvalue)
self.ndim = self.variable.getDimNumber()
def setdim(self, dimtype, dimvalue, index=None, reverse=False):
if isinstance(dimvalue, np.NDArray):
dimvalue = dimvalue.aslist()
if index is None:
self.variable.setDimension(dimtype, dimvalue, reverse)
else:
self.variable.setDimension(dimtype, dimvalue, reverse, index)
self.ndim = self.variable.getDimNumber()
def setdimrev(self, idx, reverse):
self.dims[idx].setReverse(reverse)
def addattr(self, attrname, attrvalue):
if self.attributes is None:
self.attributes = []
if isinstance(attrvalue, Array):
attrvalue = np.NDArray(attrvalue)
if isinstance(attrvalue, np.NDArray):
if attrvalue.size == 1:
attrvalue = attrvalue[0]
attr = Attribute(attrname, attrvalue)
else:
attr = Attribute(attrname, attrvalue._array)
else:
attr = Attribute(attrname, attrvalue)
self.attributes.append(attr)
if not self.ncvariable is None:
if isinstance(attrvalue, np.NDArray):
attrvalue = NCUtil.convertArray(attrvalue._array)
ncattr = NCAttribute(attrname, attrvalue)
self.ncvariable.addAttribute(ncattr)
return attr
class StructureArray(object):
def __init__(self, array):
"""
Structure array.
:param array: (*ArrayStructure*) NC ArrayStructure object.
"""
self._array = array
@property
def shape(self):
"""
Get array shape.
:return: (*tuple of int*) Array shape.
"""
return tuple(self._array.getShape())
@property
def dtype(self):
"""
Get array data type.
:return: (*DataType*) Array data type.
"""
return NCUtil.convertDataType(self._array.getDataType())
def get_members(self, rec=0):
"""
Get structure members.
:param rec: (*int*) Record index.
:return: (*Member*) Structure members.
"""
if isinstance(self._array, ArrayStructure):
return self._array.getMembers()
else:
return self._array.getObject(rec).getMembers()
def get_member(self, member, rec=0):
"""
Get structure members.
:param member: (*str*) Member name.
:param rec: (*int*) Record index.
:return: (*Member*) Structure members.
"""
if isinstance(self._array, ArrayStructure):
return self._array.findMember(member)
else:
return self._array.getObject(rec).findMember(member)
def member_array(self, member, indices=None, rec=0):
'''
Extract member array. Only valid for Structure data type.
:param member: (*string*) Member name.
:param indices: (*slice*) Indices.
:param rec: (*int*) Record index.
:returns: (*array*) Extracted member array.
'''
is_structure = isinstance(self._array, ArrayStructure)
if isinstance(member, basestring):
if is_structure:
member = self._array.findMember(member)
else:
member = self._array.getObject(rec).findMember(member)
if member is None:
raise KeyError('The member %s not exists!' % member)
if is_structure:
a = self._array.extractMemberArray(member)
else:
a = self._array.getObject(rec).extractMemberArray(member)
if a.getDataType() in [NCDataType.SEQUENCE, NCDataType.STRUCTURE]:
return StructureArray(a)
a = NCUtil.convertArray(a)
r = np.array(a)
if r.size == 1:
return r[0]
if not indices is None:
r = r.__getitem__(indices)
return r
# Variable in multiple data files (DimDataFiles) - only time dimension is different.
class TDimVariable(object):
# variable must be org.meteoinfo.data.meteodata.Variable
# dataset is DimDataFiles
def __init__(self, variable, dataset):
self.variable = variable
self.dataset = dataset
self.name = variable.getName()
self.dtype = np.dtype.fromjava(variable.getDataType())
self.ndim = variable.getDimNumber()
self.fill_value = variable.getFillValue()
self.scale_factor = variable.getScaleFactor()
self.add_offset = variable.getAddOffset()
dims = variable.getDimensions()
tdim = Dimension(DimensionType.T)
times = []
for t in self.dataset.times:
times.append(miutil.date2num(t))
tdim.setDimValues(times)
dims[0] = tdim
self.dims = dims
self.tnum = len(times)
def __str__(self):
if self.variable is None:
return 'None'
r = str(self.dtype) + ' ' + self.name + '('
for dim in self.dims:
dimname = dim.getShortName()
if dimname is None:
dimname = 'null'
r = r + dimname + ','
r = r[:-1] + '):'
attrs = self.variable.getAttributes()
for attr in attrs:
r = r + '\n\t' + self.name + ': ' + attr.toString()
return r
def __repr__(self):
return self.__str__()
def __getitem__(self, indices):
if not isinstance(indices, tuple):
indices = [indices]
if len(indices) < self.ndim:
indices = list(indices)
for _ in range(self.ndim - len(indices)):
indices.append(slice(None))
indices = tuple(indices)
if len(indices) != self.ndim:
print 'indices must be ' + str(self.ndim) + ' dimensions!'
return None
k = indices[0]
t_list = None
if isinstance(k, int):
if k < 0:
k = self.tnum + k
sidx = k
eidx = k
step = 1
elif isinstance(k, datetime.datetime):
sidx = self.dataset.timeindex(k)
eidx = sidx
step = 1
elif isinstance(k, slice):
sidx = 0 if k.start is None else k.start
if isinstance(sidx, datetime.datetime):
sidx = self.dataset.timeindex(sidx)
if sidx < 0:
sidx = self.tnum + sidx
eidx = self.tnum if k.stop is None else k.stop
if isinstance(eidx, datetime.datetime):
eidx = self.dataset.timeindex(eidx) + 1
if eidx < 0:
eidx = self.tnum + eidx
eidx -= 1
step = 1 if k.step is None else k.step
elif isinstance(k, list):
t_list = []
for t in k:
if isinstance(t, datetime.datetime):
t_list.append(self.dataset.timeindex(t))
else:
t_list.append(t)
times = []
data = None
if t_list is None:
sfidx = self.dataset.datafileindex(sidx)
si = sidx
aa = None
for i in range(sidx, eidx + 1, step):
times.append(miutil.date2num(self.dataset.gettime(i)))
fidx = self.dataset.datafileindex(i)
if fidx > sfidx:
ei = i - step
ddf = self.dataset[sfidx]
var = ddf[self.name]
ii, ssi = self.dataset.dftindex(si)
ii, eei = self.dataset.dftindex(ei)
eei += 1
nindices = list(indices)
nindices[0] = slice(ssi, eei, step)
nindices = tuple(nindices)
aa = var.__getitem__(nindices)
if si == ei:
if isinstance(aa, np.DimArray):
aa.addtdim(self.dataset.gettime(si))
else:
aa = np.array([aa])
aa = np.DimArray(aa)
aa.addtdim(self.dataset.gettime(si))
if data is None:
data = aa
else:
data = np.concatenate([data, aa])
si = i
sfidx = fidx
if si < eidx + 1:
ei = eidx + 1 - step
ddf = self.dataset[sfidx]
var = ddf[self.name]
ii, ssi = self.dataset.dftindex(si)
ii, eei = self.dataset.dftindex(ei)
eei += 1
nindices = list(indices)
nindices[0] = slice(ssi, eei, step)
nindices = tuple(nindices)
aa = var.__getitem__(nindices)
if si == ei and eidx != sidx:
if isinstance(aa, np.DimArray):
aa.addtdim(self.dataset.gettime(si))
else:
aa = np.array([aa])
aa = np.DimArray(aa)
aa.addtdim(self.dataset.gettime(si))
if data is None:
data = aa
else:
data = np.concatenate([data, aa])
if aa is None:
sfidx = self.dataset.datafileindex(sidx)
ddf = self.dataset[sfidx]
var = ddf[self.name]
ii, ssi = self.dataset.dftindex(sidx)
nindices = list(indices)
nindices[0] = slice(ssi, ssi, step)
nindices = tuple(nindices)
aa = var.__getitem__(nindices)
return aa
else:
for i in t_list:
times.append(miutil.date2num(self.dataset.gettime(i)))
fidx, ssi = self.dataset.dftindex(i)
ddf = self.dataset[fidx]
var = ddf[self.name]
nindices = list(indices)
nindices[0] = ssi
nindices = tuple(nindices)
aa = var.__getitem__(nindices)
aa.addtdim(self.dataset.gettime(i))
if data is None:
data = aa
else:
data = np.concatenate([data, aa])
if isinstance(data, np.DimArray):
return data
else:
if isinstance(aa, numbers.Number):
return aa
dims = aa.dims
dims[0].setDimValues(times)
r = np.DimArray(data, dims, aa.fill_value, aa.proj)
return r