add reindex method in DataFrame class

This commit is contained in:
wyq 2023-04-18 10:35:41 +08:00
parent ef5cae235b
commit 9e55114aa8
6 changed files with 208 additions and 1 deletions

View File

@ -2024,6 +2024,115 @@ public class DataFrame implements Iterable {
}
}
/**
* Reindex by row and column ranges
*
* @param rowKeys Row keys
* @param rowRange Row range
* @param colRange Column indices
* @param colNames Column names
* @return Reindex data frame or series
* @throws org.meteoinfo.ndarray.InvalidRangeException
*/
public Object reIndex(List rowKeys, List<Integer> rowRange, List<Integer> colRange,
List<String> colNames) throws InvalidRangeException {
ColumnIndex cols = new ColumnIndex();
int idx = 0;
for (int i : colRange) {
if (i >= 0) {
cols.add(this.columns.get(i));
} else {
cols.add(new Column(colNames.get(idx), DataType.FLOAT));
}
idx += 1;
}
Object r;
if (this.array2D) {
int n = ((Array) data).getShape()[1];
int rn = rowRange.size();
int cn = colRange.size();
DataType dtype = ((Array) data).getDataType();
r = Array.factory(dtype, new int[]{rn, cn});
String format = this.columns.get(0).getFormat();
Object v = DataTypeUtil.convertTo(null, dtype, format);
idx = 0;
int jj = 0;
for (int j : colRange) {
if (j < 0) {
int ii = 0;
for (int i : rowRange) {
idx = ii * cn + jj;
((Array) r).setObject(idx, v);
ii += 1;
}
} else {
int ii = 0;
for (int i : rowRange) {
idx = ii * cn + jj;
if (i < 0) {
((Array) r).setObject(idx, v);
} else {
((Array) r).setObject(idx, ((Array) data).getObject(i * n + j));
}
ii += 1;
}
}
jj += 1;
}
} else {
r = new ArrayList<>();
int rn = rowRange.size();
for (int j : colRange) {
Array rr;
idx = 0;
if (j < 0) {
rr = Array.factory(DataType.FLOAT, new int[]{rn});
for (int i : rowRange) {
rr.setObject(idx, Float.NaN);
idx += 1;
}
} else {
DataType dtype = this.columns.get(j).getDataType();
String format = this.columns.get(j).getFormat();
rr = Array.factory(dtype, new int[]{rn});
Array mr = ((List<Array>) this.data).get(j);
Object v = DataTypeUtil.convertTo(null, dtype, format);
for (int i : rowRange) {
if (i < 0) {
rr.setObject(idx, v);
} else {
rr.setObject(idx, mr.getObject(i));
}
idx += 1;
}
}
((ArrayList) r).add(rr);
}
if (cols.size() == 1) {
r = ((ArrayList) r).get(0);
}
}
if (r == null) {
return null;
} else {
Index rIndex = Index.factory(rowKeys);
if (cols.size() == 1 && this.columns.size() > 1) {
Series s = new Series((Array) r, rIndex, cols.get(0).getName());
return s;
} else {
DataFrame df;
if (r instanceof Array) {
df = new DataFrame((Array) r, rIndex, cols);
} else {
df = new DataFrame((ArrayList) r, rIndex, cols);
}
return df;
}
}
}
/**
* Extract DataFrame by row and column ranges
*

View File

@ -34,5 +34,5 @@
</Font>
<LookFeel DockWindowDecorated="true" LafDecorated="true" Name="FlatDarkLaf"/>
<Figure DoubleBuffering="true"/>
<Startup MainFormLocation="-7,0" MainFormSize="1408,779"/>
<Startup MainFormLocation="-7,0" MainFormSize="1393,791"/>
</MeteoInfo>

View File

@ -801,6 +801,88 @@ class DataFrame(object):
ascending = [ascending] * len(by)
df = self._dataframe.sortBy(by, ascending)
return DataFrame(dataframe=df)
def reindex(self, index=None, columns=None, axis=None):
"""
Conform DataFrame to new index with optional filling logic.
:param index: (*array-like*) New labels for the index. Preferably an Index object to avoid
duplicating data.
:param columns: (*array-like*) New labels for the columns. Preferably an Index object to
avoid duplicating data.
:param axis: (*int or str*) Axis to target. Can be either the axis name (index, columns)
or number (0, 1).
:return: DataFrame with changed index.
"""
if index is None:
index = slice(None)
k = index
rkeys = index
if isinstance(k, slice):
sidx = 0 if k.start is None else self._index.index(k.start)
if sidx < 0:
raise KeyError(key)
eidx = self.shape[0] - 1 if k.stop is None else self._index.index(k.stop)
if eidx < 0:
raise KeyError(key)
step = 1 if k.step is None else k.step
rowkey = Range(sidx, eidx, step)
else:
rowkey = self._index.get_indexer(k)
if len(rowkey) == 0:
raise KeyError(key)
k = columns
if k is None:
colkey = range(0, self.shape[1], 1)
else:
if isinstance(k, slice):
sidx = 0 if k.start is None else self.columns.indexOfName(k.start)
if sidx < 0:
raise KeyError(key)
eidx = self.shape[1] - 1 if k.stop is None else self.columns.indexOfName(k.stop)
if eidx < 0:
raise KeyError(key)
step = 1 if k.step is None else k.step
colkey = Range(sidx, eidx, step)
elif isinstance(k, list):
colkey = self.columns.indexOfName(k)
elif isinstance(k, basestring):
col = self.columns.indexOfName(k)
if col < 0:
raise KeyError(key)
colkey = [col]
else:
return None
if isinstance(rowkey, (int, Range)):
r = self._dataframe.select(rowkey, colkey)
else:
if isinstance(colkey, Range):
ncol = colkey.length()
else:
ncol = len(colkey)
if rkeys is None:
r = self._dataframe.select(rowkey, colkey)
else:
if not isinstance(rkeys, list):
rkeys = [rkeys]
if columns is None:
columns = self.columns.names
r = self._dataframe.reIndex(rkeys, rowkey, colkey, columns)
if r is None:
return None
if isinstance(r, MISeries):
r = series.Series(series=r)
else:
r = DataFrame(dataframe=r)
return r
def groupby(self, by):
"""

View File

@ -129,6 +129,22 @@ class Index(object):
def fill_keylist(self, rdata, rfdata):
return self._index.fillKeyList(rdata.asarray(), rfdata)
def get_indexer(self, key):
"""
Compute indexer and mask for new index given the current index.
:param key: Index.
:return: (*array*) Integers from 0 to n - 1 indicating that the index at these positions matches
the corresponding target values. Missing values in the target are marked by -1.
"""
if isinstance(key, np.NDArray):
r = self._index.getIndices(key.asarray())
else:
r = self._index.getIndices(key)
return list(r[2])
def get_format(self):
"""
Get value to string format.