871 lines
28 KiB
Python

#-----------------------------------------------------
# Author: Yaqiang Wang
# Date: 2014-12-27
# Purpose: MeteoInfo Dataset module
# Note: Jython
#-----------------------------------------------------
from org.meteoinfo.data.meteodata import MeteoDataType
from org.meteoinfo.data.meteodata.netcdf import NCUtil
from org.meteoinfo.data.dimarray import DimensionType, Dimension
from ucar.ma2 import DataType as NCDataType
from ucar.nc2 import Attribute as NCAttribute
from ucar.nc2 import Variable as NCVariable
from dimvariable import DimVariable, TDimVariable
from mipylib.geolib.milayer import MILayer, MIXYListData
from mipylib.dataframe.dataframe import DataFrame
import mipylib.miutil as miutil
import mipylib.numeric as np
from mipylib.numeric.core._dtype import DataType
from .ncutil import to_dtype
import datetime
from java.lang import Float
import jarray
# Dimension dataset
class DimDataFile(object):
# dataset must be org.meteoinfo.data.meteodata.MeteoDataInfo
def __init__(self, dataset=None, access='r', ncfile=None, arldata=None, bufrdata=None):
self.dataset = dataset
self.access = access
self.ncfile = ncfile
self._variables = []
if not dataset is None:
self.filename = dataset.getFileName()
for v in dataset.getDataInfo().getVariables():
self._variables.append(DimVariable(v))
self.nvar = dataset.getDataInfo().getVariableNum()
self.fill_value = dataset.getMissingValue()
self.proj = dataset.getProjectionInfo()
self.arldata = arldata
self.bufrdata = bufrdata
def __getitem__(self, key):
if isinstance(key, basestring):
var = self.dataset.getDataInfo().getVariable(key)
if var is None:
print key + ' is not a variable name'
raise ValueError()
else:
return DimVariable(self.dataset.getDataInfo().getVariable(key), self)
else:
print key + ' is not a variable name'
raise ValueError()
def __str__(self):
if self.dataset is None:
return 'None'
return self.dataset.getInfoText()
def __repr__(self):
if self.dataset is None:
return 'None'
return self.dataset.getInfoText()
def close(self):
'''
Close the opended dataset
'''
if not self.dataset is None:
self.dataset.close()
elif not self.ncfile is None:
self.ncfile.close()
elif not self.arldata is None:
self.arldata.closeDataFile()
elif not self.bufrdata is None:
self.bufrdata.closeDataFile()
def reopen(self):
"""
Reopen the data file
"""
self.dataset.getDataInfo().reOpen()
def dimensions(self):
'''
Get dimensions
'''
return self.dataset.getDataInfo().getDimensions()
def finddim(self, name):
'''
Find a dimension by name
:name: (*string*) Dimension name
'''
for dim in self.dataset.getDataInfo().getDimensions():
if name == dim.getShortName():
return dim
return None
def attributes(self):
'''
Get global attributes.
'''
return self.dataset.getDataInfo().getGlobalAttributes()
def attrvalue(self, attr):
'''
Get a global attribute value.
:param attr: (*string or Attribute*) Attribute or Attribute name
'''
if isinstance(attr, str):
attr = self.dataset.getDataInfo().findGlobalAttribute(attr)
if attr is None:
return None
v = np.array(attr.getValues())
return v
def variables(self):
'''
Get all variables.
'''
return self._variables
def varnames(self):
'''
Get all variable names.
'''
return self.dataset.getDataInfo().getVariableNames()
def read(self, varname, origin=None, size=None, stride=None):
'''
Read data array from a variable.
:varname: (*string*) Variable name
'''
if origin is None:
return self.dataset.read(varname)
else:
return self.dataset.read(varname, origin, size, stride)
def dump(self):
'''
Print data file information
'''
print self.dataset.getInfoText()
def read_dataframe(self, tidx=None):
'''
Read data frame from dataset.
:param tidx: (*int*) Time index. Default is ``None``.
:returns: (*DataFrame*) The DataFrame.
'''
if tidx is None:
df = self.dataset.getDataInfo().readDataFrame()
else:
df = self.dataset.getDataInfo().readDataFrame(tidx)
return DataFrame(dataframe=df)
def read_table(self):
'''
Read data table from dataset.
'''
dt = self.dataset.getDataInfo().readTable()
return np.datatable(dt)
def griddata(self, varname='var', timeindex=0, levelindex=0, yindex=None, xindex=None):
if self.dataset.isGridData():
self.dataset.setTimeIndex(timeindex)
self.dataset.setLevelIndex(levelindex)
gdata = PyGridData(self.dataset.getGridData(varname))
return gdata
else:
return None
def stationdata(self, varname='var', timeindex=0, levelindex=0):
if self.dataset.isStationData():
self.dataset.setTimeIndex(timeindex)
self.dataset.setLevelIndex(levelindex)
sdata = PyStationData(self.dataset.getStationData(varname))
return sdata
else:
return None
def stinfodata(self):
'''
Get station info data
'''
if self.dataset.isStationData():
sidata = self.dataset.getStationInfoData()
return sidata
else:
return None
def smodeldata(self, timeindex=0, levelindex=0):
'''
Get station model data.
'''
if self.dataset.isStationData():
self.dataset.setTimeIndex(timeindex)
self.dataset.setLevelIndex(levelindex)
smdata = self.dataset.getStationModelData()
return smdata
else:
return None
def trajlayer(self):
'''
Create trajectory polyline layer.
'''
if self.dataset.isTrajData():
return MILayer(self.dataset.getDataInfo().createTrajLineLayer())
else:
return None
def trajplayer(self):
'''
Create trajectory point layer.
'''
if self.dataset.isTrajData():
return MILayer(self.dataset.getDataInfo().createTrajPointLayer())
else:
return None
def trajsplayer(self):
'''
Create trajectory start point layer.
'''
if self.dataset.isTrajData():
return MILayer(self.dataset.getDataInfo().createTrajStartPointLayer())
else:
return None
def trajvardata(self, varidx, hourx=False):
'''
Get trajectory variable data.
'''
if self.dataset.isTrajData():
if hourx:
return MIXYListData(self.dataset.getDataInfo().getXYDataset_HourX(varidx))
else:
return MIXYListData(self.dataset.getDataInfo().getXYDataset(varidx))
else:
return None
def timenum(self):
"""
Get time dimension length
:returns: (*int*) Time dimension length.
"""
return self.dataset.getDataInfo().getTimeNum()
def gettime(self, idx):
"""
Get time by index.
:param idx: (*int*) Time index.
:returns: (*datetime*) The time
"""
t = self.dataset.getDataInfo().getTimes().get(idx)
if t is None:
return None
t = miutil.pydate(t)
return t
def gettimes(self):
"""
Get time list.
"""
tt = self.dataset.getDataInfo().getTimes()
if tt is None:
return None
times = []
for t in tt:
times.append(miutil.pydate(t))
return times
def bigendian(self, big_endian):
'''
Set dataset as big_endian or little_endian. Only for GrADS binary data.
:param big_endian: (*boolean*) Big endian or not.
'''
datatype = self.dataset.getDataInfo().getDataType()
if datatype.isGrADS() or datatype == MeteoDataType.HYSPLIT_Conc:
self.dataset.getDataInfo().setBigEndian(big_endian)
def tostation(self, varname, x, y, z, t):
'''
Interpolate data to a point.
'''
if isinstance(t, datetime.datetime):
t = miutil.jdate(t)
if z is None:
return self.dataset.toStation(varname, x, y, t)
else:
return self.dataset.toStation(varname, x, y, z, t)
####################################################################
#Write netCDF data
def adddim(self, dimname, dimsize, group=None):
'''
Add a dimension.
:param dimname: (*string*) Dimension name.
:param dimsize: (*int*) Dimension size.
:param group: None means global dimension.
'''
return self.ncfile.addDimension(group, dimname, dimsize)
def addgroupattr(self, attrname, attrvalue, group=None, float=False):
'''
Add a global attribute.
:param attrname: (*string*) Attribute name.
:param attrvalue: (*object*) Attribute value.
:param group: None means global attribute.
:param float: (*boolean*) Transfer data as float or not.
'''
if float:
if isinstance(attrvalue, (list, tuple)):
for i in range(len(attrvalue)):
attrvalue[i] = Float(attrvalue[i])
else:
attrvalue = Float(attrvalue)
if isinstance(attrvalue, np.NDArray):
attrvalue = NCUtil.convertArray(attrvalue._array)
return self.ncfile.addGroupAttribute(group, NCAttribute(attrname, attrvalue))
def __getdatatype(self, datatype):
if isinstance(datatype, str):
if datatype == 'string':
dt = NCDataType.STRING
elif datatype == 'int':
dt = NCDataType.INT
elif datatype == 'long':
dt = NCDataType.LONG
elif datatype == 'float':
dt = NCDataType.FLOAT
elif datatype == 'double':
dt = NCDataType.DOUBLE
elif datatype == 'char':
dt = NCDataType.CHAR
else:
dt = NCDataType.STRING
return dt
else:
if isinstance(datatype, DataType):
datatype = NCUtil.convertDataType(datatype._dtype)
return datatype
def addvar(self, varname, datatype, dims, attrs=None, group=None):
'''
Add a variable.
:param varname: (*string*) Variable name.
:param datatype: (*string*) Data type [string | int | long | float | double |
char].
:param dims: (*list*) Dimensions.
:param attrs: (*dict*) Attributes.
'''
dt = self.__getdatatype(datatype)
if len(dims) > 0:
if isinstance(dims[0], Dimension):
ncdims = []
for dim in dims:
ncdims.append(self.ncfile.findDimension(dim.getName()))
var = DimVariable(ncvariable=self.ncfile.addVariable(group, varname, dt, ncdims))
else:
var = DimVariable(ncvariable=self.ncfile.addVariable(group, varname, dt, dims))
else:
var = DimVariable(ncvariable=self.ncfile.addVariable(group, varname, dt, dims))
if not attrs is None:
for key in attrs:
var.addattr(key, attrs[key])
return var
def create(self):
'''
Create a netCDF data file according the settings of dimensions, global attributes
and variables
'''
self.ncfile.create()
def nc_define(self, dims, gattrs, vars, write_dimvars=True):
'''
Define dimensions, global attributes, variables of the netcdf file
:param dims: (*list of Dimension*) The dimensions
:param gattrs: (*list of Attribute*) The global attributes
:param vars: (*list of DimVariable*) The variables
:param write_dimvars: (*bool*) Write dimension variables value or not. Default is ``True``
'''
#Add dimensions
ncdims = []
for dim in dims:
ncdims.append(self.adddim(dim.getName(), dim.getLength()))
#Add global attributes
if not gattrs is None:
for key in gattrs:
self.addgroupattr(key, gattrs[key])
#Add dimension variables
dimvars = []
wdims = []
if write_dimvars:
for dim,midim in zip(ncdims,dims):
dimtype = midim.getDimType()
dimname = dim.getShortName()
if dimtype == DimensionType.T:
var = self.addvar(dimname, 'int', [dim])
var.addattr('units', 'hours since 1900-01-01 00:00:0.0')
var.addattr('long_name', 'Time')
var.addattr('standard_name', 'time')
var.addattr('axis', 'T')
tvar = var
elif dimtype == DimensionType.Z:
var = self.addvar(dimname, 'float', [dim])
var.addattr('axis', 'Z')
elif dimtype == DimensionType.Y:
var = self.addvar(dimname, 'float', [dim])
var.addattr('axis', 'Y')
elif dimtype == DimensionType.X:
var = self.addvar(dimname, 'float', [dim])
var.addattr('axis', 'X')
else:
var = None
if not var is None:
dimvars.append(var)
wdims.append(midim)
#Add variables
for v in vars:
var = self.addvar(v.name, v.dtype, v.dims)
v.ncvariable = var
if v.attributes is None:
var.addattr('name', v.name)
else:
for attr in v.attributes:
var.addattr(attr.getName(), attr.getValue())
#Create netCDF file
self.ncfile.create()
#Write dimension variable data
if write_dimvars:
for dimvar, dim in zip(dimvars, wdims):
if dim.getDimType() == DimensionType.T:
sst = datetime.datetime(1900,1,1)
tt = miutil.nums2dates(dim.getDimValue())
hours = []
for t in tt:
hours.append((t - sst).total_seconds() // 3600)
self.write(dimvar, np.array(hours))
else:
self.write(dimvar, np.array(dim.getDimValue()))
def write(self, variable, value, origin=None):
'''
Write variable value.
:param variable: (*Variable*) Variable object.
:param value: (*array_like*) Data array to be write.
:param origin: (*list*) Dimensions origin indices. None means all from 0.
'''
if isinstance(value, (list, tuple)):
value = np.array(value)
if isinstance(variable, DimVariable):
if self.access == 'c':
ncvariable = variable.ncvariable
if ncvariable is None:
ncvariable = self.ncfile.findVariable(variable.name)
if ncvariable is None:
ncvariable = variable.name
else:
ncvariable = self.dataset.getDataInfo().findNCVariable(variable.name)
elif isinstance(variable, basestring):
ncvariable = self.ncfile.findVariable(variable)
else:
ncvariable = variable
if isinstance(value, np.NDArray) and isinstance(ncvariable, NCVariable):
dtype = to_dtype(ncvariable.getDataType())
if value.dtype != dtype:
value = value.astype(dtype)
value = NCUtil.convertArray(value._array)
if origin is None:
self.ncfile.write(ncvariable, value)
else:
origin = jarray.array(origin, 'i')
self.ncfile.write(ncvariable, origin, value)
def flush(self):
'''
Flush the data.
'''
self.ncfile.flush()
def largefile(self, islarge=True):
'''
Set the netCDF file is large file (more than 2G) nor not.
:param islarge: (*boolean*) Is large file or not.
'''
self.ncfile.setLargeFile(islarge)
##################################################################
# Write ARL data
def setx(self, x):
'''
Set x (longitude) dimension value.
:param x: (*array_like*) X dimension value.
'''
self.arldata.setX(x.aslist())
def sety(self, y):
'''
Set y (latitude) dimension value.
:param y: (*array_like*) Y dimension value.
'''
self.arldata.setY(y.aslist())
def setlevels(self, levels):
'''
Set vertical levels.
:param leveles: (*list*) Vertical levels.
'''
if isinstance(levels, np.NDArray):
levels = levels.aslist()
if levels[0] != 1:
levels.insert(0, 1)
self.arldata.levels = levels
def set2dvar(self, vnames):
'''
Set surface variables (2 dimensions ignore time dimension).
:param vnames: (*list*) Variable names.
'''
self.arldata.LevelVarList.add(vnames)
def set3dvar(self, vnames):
'''
Set level variables (3 dimensions ignore time dimension).
:param vnames: (*list*) Variable names.
'''
self.arldata.LevelVarList.add(vnames)
def getdatahead(self, proj, model, vertical, icx=0, mn=0):
'''
Get data head.
:param proj: (*ProjectionInfo*) Projection information.
:param model: (*string*) Model name with 4 characters.
:param vertical: (*int*) Vertical coordinate system flag. 1-sigma (fraction);
2-pressure (mb); 3-terrain (fraction); 4-hybrid (mb: offset.fraction)
:param icx: (*int*) Forecast hour (>99 the header forecast hr = 99)
:param mn: (*int*) Minutes associated with data time.
'''
return self.arldata.getDataHead(proj, model, vertical, icx, mn)
def diff_origin_pack(self, data):
'''
Get difference between the original data and the packed data.
:param data: (*array*) The original data.
:return: (*array*) Difference.
'''
r = self.arldata.diffOriginPack(data._array)
return np.NDArray(r)
def writeindexrec(self, t, datahead, ksums=None):
'''
Write index record.
:param t: (*datatime*) The time of the data.
:param datahead: (*DataHeader') Data header of the record.
:param ksums: (*list*) Check sum list.
'''
t = miutil.jdate(t)
self.arldata.writeIndexRecord(t, datahead, ksums)
def writedatarec(self, t, lidx, vname, fhour, grid, data):
'''
Write data record.
:param t: (*datetime*) The time of the data.
:param lidx: (*int*) Level index.
:param vname: (*string*) Variable name.
:param fhour: (*int*) Forecasting hour.
:param grid: (*int*) Grid id to check if the data grid is bigger than 999. Header
record does not support grids of more than 999, therefore in those situations
the grid number is converted to character to represent the 1000s digit,
e.g. @(64)=<1000, A(65)=1000, B(66)=2000, etc.
:param data: (*array_like*) Data array.
:returns: (*int*) Check sum of the record data.
'''
t = miutil.jdate(t)
ksum = self.arldata.writeGridData(t, lidx, vname, fhour, grid, data.asarray())
return ksum
########################################################################
# Write Bufr data
def write_indicator(self, bufrlen, edition=3):
'''
Write indicator section with arbitrary length.
:param bufrlen: (*int*) The total length of the message.
:param edition: (*int*) Bruf edition.
:returns: (*int*) Indicator section length.
'''
return self.bufrdata.writeIndicatorSection(bufrlen, edition)
def rewrite_indicator(self, bufrlen, edition=3):
'''
Write indicator section with correct length.
:param bufrlen: (*int*) The total length of the message.
:param edition: (*int*) Bruf edition.
'''
self.bufrdata.reWriteIndicatorSection(bufrlen, edition)
def write_identification(self, **kwargs):
'''
Write identification section.
:param length: (*int*) Section length
:param master_table: (*int*) Master table
:param subcenter_id: (*int*) Subcenter id
:param center_id: (*int*) Center id
:param update: (*int*) Update sequency
:param optional: (*int*) Optional
:param category: (*int*) Category
:param sub_category: (*int*) Sub category
:param master_table_version: (*int*) Master table version
:param local_table_version: (*int*) Local table version
:param year: (*int*) Year
:param month: (*int*) Month
:param day: (*int*) Day
:param hour: (*int*) Hour
:param minute: (*int*) Minute
:returns: (*int*) Section length
'''
length = kwargs.pop('length', 18)
master_table = kwargs.pop('master_table', 0)
subcenter_id = kwargs.pop('subcenter_id', 0)
center_id = kwargs.pop('center_id', 74)
update = kwargs.pop('update', 0)
optional = kwargs.pop('optional', 0)
category = kwargs.pop('category', 7)
sub_category = kwargs.pop('sub_category', 0)
master_table_version = kwargs.pop('master_table_version', 11)
local_table_version = kwargs.pop('local_table_version', 1)
year = kwargs.pop('year', 2016)
month = kwargs.pop('month', 1)
day = kwargs.pop('day', 1)
hour = kwargs.pop('hour', 0)
minute = kwargs.pop('minute', 0)
return self.bufrdata.writeIdentificationSection(length, master_table, subcenter_id, center_id,\
update, optional, category, sub_category, master_table_version,\
local_table_version, year, month, day, hour, minute)
def write_datadescription(self, n, datatype, descriptors):
'''
Write data description section
:param n: (*int*) Numer of dataset.
:param datatype: (*int*) Data type.
:param descriptors: (*list*) Data descriptors.
'''
return self.bufrdata.writeDataDescriptionSection(n, datatype, descriptors)
def write_datahead(self, len):
'''
Write data header with arbitrary data length.
:param len: (*int*) Data section length.
:returns: (*int*) Data section head length - always 4.
'''
return self.bufrdata.writeDataSectionHead(len)
def rewrite_datahead(self, len):
'''
Write data header with correct data length.
:param len: (*int*) Data section length.
'''
self.bufrdata.reWriteDataSectionHead(len)
def write_data(self, value, nbits=None):
'''
Write data.
:param value: (*int*) Value.
:param nbits: (*int*) Bit number.
:returns: (*int*) Data value length.
'''
return self.bufrdata.write(value, nbits)
def write_end(self):
'''
Write end section ('7777').
:returns: (*int*) End section length - always 4.
'''
return self.bufrdata.writeEndSection()
#*********************************************
# Created by addfiles function in midata module - multiple data files with difference only
# on time dimension.
class DimDataFiles(list):
# dataset must be list of DimDataFile
def __init__(self, dataset=[]):
list.__init__([])
ndataset = []
ftimes = []
for ds in dataset:
if len(ndataset) == 0:
ndataset.append(ds)
ftimes.append(ds.gettime(0))
else:
idx = len(ndataset)
ftime = ds.gettime(0)
for i in range(len(ndataset)):
if ftime < ftimes[i]:
idx = i
break
ndataset.insert(idx, ds)
ftimes.insert(idx, ftime)
self.extend(ndataset)
self.times = []
self.tnums = []
self.tnum = 0
for ds in ndataset:
tts = ds.gettimes()
self.times.extend(tts)
self.tnums.append(len(tts))
self.tnum += len(tts)
def append(self, ddf):
self.append(ddf)
tts = ddf.gettimes()
self.times.extend(tts)
self.tnums.append(len(tts))
self.tnum += len(tts)
def __getitem__(self, key):
if isinstance(key, str):
#print key
return TDimVariable(self[0].dataset.getDataInfo().getVariable(key), self)
else:
return list.__getitem__(self, key)
def filenames(self):
'''
Get file names.
:returns: File name list
'''
fns = []
for df in self:
fns.append(df.filename)
return fns
def datafileindex(self, t):
"""
Get data file by time
:param t: (*datetime or idx*) Time value of index.
:returns: (*int*) Data file index
"""
if isinstance(t, datetime.datetime):
t = self.timeindex(t)
nn = 0
idx = 0
for n in self.tnums:
nn += n
if t < nn:
break
idx += 1
return idx
def datafile(self, t):
"""
Get data file by time
:param t: (*datetime or idx*) Time value of index.
:returns: (*DimDataFile*) Data file
"""
idx = self.datafileindex(t)
return self[idx]
def dftindex(self, t):
'''
Get data file index and time index of it.
:param t: (*datetime or idx*) Time value of index.
:returns: (*list of int*) Data file index and time index of it.
'''
if isinstance(t, datetime.datetime):
t = self.timeindex(t)
nn = 0
dfidx = 0
tidx = 0
sn = 0
for n in self.tnums:
nn += n
if t < nn:
tidx = t - sn
break
dfidx += 1
sn = nn
return dfidx, tidx
def timeindex(self, t):
'''
Get time index.
:param t: (*datetime*) Given time
:returns: (*int*) Time index
'''
idx = 0
for tt in self.times:
if t <= tt:
break
idx += 1
return idx if idx < self.tnum else idx - 1
def gettime(self, idx):
'''
Get time by index.
:param idx: (*int*) Time index.
:returns: (*datetime*) The time
'''
return self.times[idx]
def varnames(self):
'''
Get variable names
'''
return self[0].varnames()
#############################################