diff --git a/meteoinfo-dataframe/src/main/java/org/meteoinfo/dataframe/DataFrame.java b/meteoinfo-dataframe/src/main/java/org/meteoinfo/dataframe/DataFrame.java index f1e2ed2f..d76e9c09 100644 --- a/meteoinfo-dataframe/src/main/java/org/meteoinfo/dataframe/DataFrame.java +++ b/meteoinfo-dataframe/src/main/java/org/meteoinfo/dataframe/DataFrame.java @@ -2024,6 +2024,115 @@ public class DataFrame implements Iterable { } } + /** + * Reindex by row and column ranges + * + * @param rowKeys Row keys + * @param rowRange Row range + * @param colRange Column indices + * @param colNames Column names + * @return Reindex data frame or series + * @throws org.meteoinfo.ndarray.InvalidRangeException + */ + public Object reIndex(List rowKeys, List rowRange, List colRange, + List colNames) throws InvalidRangeException { + ColumnIndex cols = new ColumnIndex(); + int idx = 0; + for (int i : colRange) { + if (i >= 0) { + cols.add(this.columns.get(i)); + } else { + cols.add(new Column(colNames.get(idx), DataType.FLOAT)); + } + idx += 1; + } + + Object r; + if (this.array2D) { + int n = ((Array) data).getShape()[1]; + int rn = rowRange.size(); + int cn = colRange.size(); + DataType dtype = ((Array) data).getDataType(); + r = Array.factory(dtype, new int[]{rn, cn}); + String format = this.columns.get(0).getFormat(); + Object v = DataTypeUtil.convertTo(null, dtype, format); + idx = 0; + int jj = 0; + for (int j : colRange) { + if (j < 0) { + int ii = 0; + for (int i : rowRange) { + idx = ii * cn + jj; + ((Array) r).setObject(idx, v); + ii += 1; + } + } else { + int ii = 0; + for (int i : rowRange) { + idx = ii * cn + jj; + if (i < 0) { + ((Array) r).setObject(idx, v); + } else { + ((Array) r).setObject(idx, ((Array) data).getObject(i * n + j)); + } + ii += 1; + } + } + jj += 1; + } + } else { + r = new ArrayList<>(); + int rn = rowRange.size(); + for (int j : colRange) { + Array rr; + idx = 0; + if (j < 0) { + rr = Array.factory(DataType.FLOAT, new int[]{rn}); + for (int i : rowRange) { + rr.setObject(idx, Float.NaN); + idx += 1; + } + } else { + DataType dtype = this.columns.get(j).getDataType(); + String format = this.columns.get(j).getFormat(); + rr = Array.factory(dtype, new int[]{rn}); + Array mr = ((List) this.data).get(j); + Object v = DataTypeUtil.convertTo(null, dtype, format); + for (int i : rowRange) { + if (i < 0) { + rr.setObject(idx, v); + } else { + rr.setObject(idx, mr.getObject(i)); + } + idx += 1; + } + } + ((ArrayList) r).add(rr); + } + if (cols.size() == 1) { + r = ((ArrayList) r).get(0); + } + } + + if (r == null) { + return null; + } else { + Index rIndex = Index.factory(rowKeys); + if (cols.size() == 1 && this.columns.size() > 1) { + Series s = new Series((Array) r, rIndex, cols.get(0).getName()); + return s; + } else { + DataFrame df; + if (r instanceof Array) { + df = new DataFrame((Array) r, rIndex, cols); + } else { + df = new DataFrame((ArrayList) r, rIndex, cols); + } + return df; + } + } + } + /** * Extract DataFrame by row and column ranges * diff --git a/meteoinfo-lab/milconfig.xml b/meteoinfo-lab/milconfig.xml index 8fe14af0..80021901 100644 --- a/meteoinfo-lab/milconfig.xml +++ b/meteoinfo-lab/milconfig.xml @@ -34,5 +34,5 @@
- + diff --git a/meteoinfo-lab/pylib/mipylib/dataframe/dataframe$py.class b/meteoinfo-lab/pylib/mipylib/dataframe/dataframe$py.class index 2df6dd34..08c6e3fd 100644 Binary files a/meteoinfo-lab/pylib/mipylib/dataframe/dataframe$py.class and b/meteoinfo-lab/pylib/mipylib/dataframe/dataframe$py.class differ diff --git a/meteoinfo-lab/pylib/mipylib/dataframe/dataframe.py b/meteoinfo-lab/pylib/mipylib/dataframe/dataframe.py index ed6873b2..deed7449 100644 --- a/meteoinfo-lab/pylib/mipylib/dataframe/dataframe.py +++ b/meteoinfo-lab/pylib/mipylib/dataframe/dataframe.py @@ -801,6 +801,88 @@ class DataFrame(object): ascending = [ascending] * len(by) df = self._dataframe.sortBy(by, ascending) return DataFrame(dataframe=df) + + def reindex(self, index=None, columns=None, axis=None): + """ + Conform DataFrame to new index with optional filling logic. + + :param index: (*array-like*) New labels for the index. Preferably an Index object to avoid + duplicating data. + :param columns: (*array-like*) New labels for the columns. Preferably an Index object to + avoid duplicating data. + :param axis: (*int or str*) Axis to target. Can be either the axis name (‘index’, ‘columns’) + or number (0, 1). + + :return: DataFrame with changed index. + """ + if index is None: + index = slice(None) + + k = index + rkeys = index + if isinstance(k, slice): + sidx = 0 if k.start is None else self._index.index(k.start) + if sidx < 0: + raise KeyError(key) + eidx = self.shape[0] - 1 if k.stop is None else self._index.index(k.stop) + if eidx < 0: + raise KeyError(key) + step = 1 if k.step is None else k.step + rowkey = Range(sidx, eidx, step) + else: + rowkey = self._index.get_indexer(k) + if len(rowkey) == 0: + raise KeyError(key) + + k = columns + if k is None: + colkey = range(0, self.shape[1], 1) + else: + if isinstance(k, slice): + sidx = 0 if k.start is None else self.columns.indexOfName(k.start) + if sidx < 0: + raise KeyError(key) + eidx = self.shape[1] - 1 if k.stop is None else self.columns.indexOfName(k.stop) + if eidx < 0: + raise KeyError(key) + step = 1 if k.step is None else k.step + colkey = Range(sidx, eidx, step) + elif isinstance(k, list): + colkey = self.columns.indexOfName(k) + elif isinstance(k, basestring): + col = self.columns.indexOfName(k) + if col < 0: + raise KeyError(key) + colkey = [col] + else: + return None + + if isinstance(rowkey, (int, Range)): + r = self._dataframe.select(rowkey, colkey) + else: + if isinstance(colkey, Range): + ncol = colkey.length() + else: + ncol = len(colkey) + + if rkeys is None: + r = self._dataframe.select(rowkey, colkey) + else: + if not isinstance(rkeys, list): + rkeys = [rkeys] + if columns is None: + columns = self.columns.names + r = self._dataframe.reIndex(rkeys, rowkey, colkey, columns) + + if r is None: + return None + + if isinstance(r, MISeries): + r = series.Series(series=r) + else: + r = DataFrame(dataframe=r) + + return r def groupby(self, by): """ diff --git a/meteoinfo-lab/pylib/mipylib/dataframe/index$py.class b/meteoinfo-lab/pylib/mipylib/dataframe/index$py.class index 1b34c24e..be06d694 100644 Binary files a/meteoinfo-lab/pylib/mipylib/dataframe/index$py.class and b/meteoinfo-lab/pylib/mipylib/dataframe/index$py.class differ diff --git a/meteoinfo-lab/pylib/mipylib/dataframe/index.py b/meteoinfo-lab/pylib/mipylib/dataframe/index.py index c324d6ff..5622c021 100644 --- a/meteoinfo-lab/pylib/mipylib/dataframe/index.py +++ b/meteoinfo-lab/pylib/mipylib/dataframe/index.py @@ -129,6 +129,22 @@ class Index(object): def fill_keylist(self, rdata, rfdata): return self._index.fillKeyList(rdata.asarray(), rfdata) + def get_indexer(self, key): + """ + Compute indexer and mask for new index given the current index. + + :param key: Index. + + :return: (*array*) Integers from 0 to n - 1 indicating that the index at these positions matches + the corresponding target values. Missing values in the target are marked by -1. + """ + if isinstance(key, np.NDArray): + r = self._index.getIndices(key.asarray()) + else: + r = self._index.getIndices(key) + + return list(r[2]) + def get_format(self): """ Get value to string format.