improve drop function in DataFrame to support rows drop

This commit is contained in:
wyq 2022-04-19 12:24:49 +08:00
parent a796e5fbc4
commit e57ad304f5
10 changed files with 284 additions and 39 deletions

View File

@ -24,6 +24,8 @@ import org.meteoinfo.data.dimarray.DimArray;
import org.meteoinfo.ndarray.Array;
import org.meteoinfo.data.dimarray.Dimension;
import org.meteoinfo.ndarray.InvalidRangeException;
import org.meteoinfo.ndarray.Range;
import org.meteoinfo.ndarray.math.ArrayMath;
import org.meteoinfo.projection.KnownCoordinateSystems;
import org.meteoinfo.projection.ProjectionInfo;
@ -576,6 +578,20 @@ import org.meteoinfo.projection.ProjectionInfo;
}
}
/**
* Read dimension array data
*
* @param varName Variable name
* @param ranges Range list
* @return Dimension array
*/
public DimArray readDimArray(String varName, List<Range> ranges) {
int[] origin = new int[1], size = new int[1], stride = new int[1];
ArrayMath.rangesToSection(ranges, origin, size, stride);
return readDimArray(varName, origin, size, stride);
}
/**
* Get global attributes
* @return Global attributes

View File

@ -1162,6 +1162,22 @@ public class Variable {
this.addDimension(dim);
}
/**
* Get stagger dimension index
* @return Stagger dimension index
*/
public int getStaggerDimIndex() {
int i = 0;
for (Dimension dim : this.dimensions) {
if (dim.isStagger()) {
return i;
}
i += 1;
}
return -1;
}
/**
* Add an attribute
*

View File

@ -3,6 +3,7 @@ package org.meteoinfo.data.meteodata.util;
import org.meteoinfo.data.dimarray.DimArray;
import org.meteoinfo.data.dimarray.Dimension;
import org.meteoinfo.data.meteodata.DataInfo;
import org.meteoinfo.data.meteodata.Variable;
import org.meteoinfo.ndarray.Array;
import org.meteoinfo.ndarray.DataType;
import org.meteoinfo.ndarray.InvalidRangeException;
@ -89,4 +90,30 @@ public class WRFUtil {
return new DimArray(gpm, ph.getDimensions());
}
/**
* Get geopotential 1-D height array - meter
* @param dataInfo The WRF data info
* @return Geopotential height
*/
public static DimArray getGPM1D(DataInfo dataInfo) {
Variable variable = dataInfo.getVariable("PH");
List<Range> ranges = new ArrayList<>();
for (Dimension dimension : variable.getDimensions()) {
if (dimension.isStagger()) {
ranges.add(new Range(dimension.getLength()));
} else {
ranges.add(new Range(1));
}
}
DimArray ph = dataInfo.readDimArray("PH", ranges);
ph = deStagger(ph);
DimArray phb = dataInfo.readDimArray("PHB", ranges);
phb = deStagger(phb);
ranges.remove(1);
DimArray hgt = dataInfo.readDimArray("HGT", ranges);
Array gpm = ArrayMath.sub(ArrayMath.div(ArrayMath.add(ph.getArray(), phb.getArray()), 9.81), hgt.getArray());
return new DimArray(gpm, ph.getDimensions());
}
}

View File

@ -92,7 +92,7 @@ public class ColumnIndex extends Index<Column> {
* @return Indices
*/
@Override
public Integer[] indices(final Object[] names) {
public int[] indices(final Object[] names) {
return indices(Arrays.asList(names));
}
@ -102,9 +102,9 @@ public class ColumnIndex extends Index<Column> {
* @return Indices
*/
@Override
public Integer[] indices(final List<Object> names) {
public int[] indices(final List<Object> names) {
final int size = names.size();
final Integer[] indices = new Integer[size];
final int[] indices = new int[size];
for (int i = 0; i < size; i++) {
indices[i] = indexOfName(names.get(i).toString());
}

View File

@ -25,6 +25,7 @@ import java.time.temporal.TemporalAmount;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
/**
*
@ -1113,7 +1114,7 @@ public class DataFrame implements Iterable {
* @return a shallow copy of the data frame with the columns removed
*/
public DataFrame drop(List<String> colNames) {
return drop(columns.indexOfName(colNames).toArray(new Integer[colNames.size()]));
return drop(columns.indexOfName(colNames).stream().mapToInt(Integer::valueOf).toArray());
}
/**
@ -1132,7 +1133,7 @@ public class DataFrame implements Iterable {
* @param cols the indices of the columns to be removed
* @return a shallow copy of the data frame with the columns removed
*/
public DataFrame drop(final Integer... cols) {
public DataFrame drop(final int... cols) {
final List<String> colnames = new ArrayList<>(columns.getNames());
final List<String> todrop = new ArrayList<>(cols.length);
for (final int col : cols) {
@ -1156,6 +1157,53 @@ public class DataFrame implements Iterable {
}
}
/**
* Drop rows
*
* @param dropIndex Rows index for dropping
* @return Dropped DataFrame
*/
public DataFrame dropRows(List dropIndex) {
int[] idx = this.index.indices(dropIndex);
List<Integer> dropRowList = Arrays.stream(idx).boxed().collect(Collectors.toList());
List<Integer> rowRange = new ArrayList<>();
for (int i = 0; i < this.index.size(); i++) {
if (!dropRowList.contains(i))
rowRange.add(i);
}
return (DataFrame) this.select(rowRange);
}
/**
* Drop DataFrame by index and columns
* @param dropIndex Drop index
* @param dropCols Drop columns
* @return Dropped DataFrame
*/
public DataFrame drop(List dropIndex, List dropCols) {
if (dropIndex == null) {
return drop(dropCols);
} else if (dropCols == null) {
return dropRows(dropIndex);
}
List<Integer> dropRowList = Arrays.stream(this.index.indices(dropIndex)).boxed().collect(Collectors.toList());
List<Integer> dropColList = Arrays.stream(this.columns.indices(dropCols)).boxed().collect(Collectors.toList());
List<Integer> rowRange = new ArrayList<>();
for (int i = 0; i < this.index.size(); i++) {
if (!dropRowList.contains(i))
rowRange.add(i);
}
List<Integer> colRange = new ArrayList<>();
for (int i = 0; i < this.columns.size(); i++) {
if (!dropColList.contains(i))
colRange.add(i);
}
return extract(rowRange, colRange);
}
/**
* Remove null or NaN values - any value is null or NaN in a row or column
* @param row Remove rows or columns
@ -1188,7 +1236,7 @@ public class DataFrame implements Iterable {
colRange.add(i);
}
}
return drop(colRange.toArray(new Integer[colRange.size()]));
return drop(colRange.stream().mapToInt(Integer::valueOf).toArray());
}
}
@ -1224,7 +1272,7 @@ public class DataFrame implements Iterable {
colRange.add(i);
}
}
return drop(colRange.toArray(new Integer[colRange.size()]));
return drop(colRange.stream().mapToInt(Integer::valueOf).toArray());
}
}
@ -1256,7 +1304,7 @@ public class DataFrame implements Iterable {
*/
public DataFrame retain(final Integer... cols) {
final Set<Integer> keep = new HashSet<>(Arrays.asList(cols));
final Integer[] todrop = new Integer[size() - keep.size()];
final int[] todrop = new int[size() - keep.size()];
for (int i = 0, c = 0; c < size(); c++) {
if (!keep.contains(c)) {
todrop[i++] = c;
@ -1510,9 +1558,8 @@ public class DataFrame implements Iterable {
*
* @param rowRange Row range
* @return Selected data frame or series
* @throws org.meteoinfo.ndarray.InvalidRangeException
*/
public Object select(List<Integer> rowRange) throws InvalidRangeException {
public Object select(List<Integer> rowRange) {
ColumnIndex cols = new ColumnIndex();
for (int i = 0; i < this.size(); i ++) {
cols.add(this.columns.get(i));
@ -1522,7 +1569,7 @@ public class DataFrame implements Iterable {
if (this.array2D) {
List ranges = new ArrayList<>();
ranges.add(rowRange);
ranges.add(new Range(0, cols.size() - 1, 1));
ranges.add(new Range(cols.size()));
r = ArrayMath.take((Array) this.data, ranges);
} else {
r = new ArrayList<>();
@ -1567,9 +1614,8 @@ public class DataFrame implements Iterable {
* @param rowRange Row range
* @param colRange Column range
* @return Selected data frame or series
* @throws org.meteoinfo.ndarray.InvalidRangeException
*/
public Object select(List<Integer> rowRange, Range colRange) throws InvalidRangeException {
public Object select(List<Integer> rowRange, Range colRange) {
ColumnIndex cols = new ColumnIndex();
for (int i = colRange.first(); i <= colRange.last(); i += colRange.stride()) {
cols.add(this.columns.get(i));
@ -1579,7 +1625,7 @@ public class DataFrame implements Iterable {
if (this.array2D) {
List ranges = new ArrayList<>();
ranges.add(rowRange);
ranges.add(new Range(colRange.first(), colRange.last(), colRange.stride()));
ranges.add(new Range(colRange));
r = ArrayMath.take((Array) this.data, ranges);
} else {
r = new ArrayList<>();
@ -1978,6 +2024,73 @@ public class DataFrame implements Iterable {
}
}
/**
* Extract DataFrame by row and column ranges
*
* @param rowRange Row range
* @param colRange Column range
* @return Extracted DataFrame
*/
public DataFrame extract(List<Integer> rowRange, List<Integer> colRange) {
ColumnIndex cols = new ColumnIndex();
for (int i : colRange) {
cols.add(this.columns.get(i));
}
Object r;
if (this.array2D) {
int n = ((Array) data).getShape()[1];
int rn = rowRange.size();
int cn = colRange.size();
DataType dtype = ((Array) data).getDataType();
r = Array.factory(dtype, new int[]{rn, cn});
String format = this.columns.get(0).getFormat();
int idx, jj = 0;
for (int j : colRange) {
int ii = 0;
for (int i : rowRange) {
idx = ii * cn + jj;
if (i < 0) {
((Array) r).setObject(idx, DataTypeUtil.convertTo(null, dtype, format));
} else {
((Array) r).setObject(idx, ((Array) data).getObject(i * n + j));
}
ii += 1;
}
jj += 1;
}
} else {
r = new ArrayList<>();
int rn = rowRange.size();
for (int j : colRange) {
Array rr = Array.factory(this.columns.get(j).getDataType(), new int[]{rn});
Array mr = ((List<Array>) this.data).get(j);
int idx = 0;
for (int i : rowRange) {
rr.setObject(idx, mr.getObject(i));
idx += 1;
}
((ArrayList) r).add(rr);
}
if (cols.size() == 1) {
r = ((ArrayList) r).get(0);
}
}
if (r == null) {
return null;
} else {
Index rIndex = this.index.subIndex(rowRange);
DataFrame df;
if (r instanceof Array) {
df = new DataFrame((Array) r, rIndex, cols);
} else {
df = new DataFrame((ArrayList) r, rIndex, cols);
}
return df;
}
}
/**
* Transpose
*
@ -2794,7 +2907,7 @@ public class DataFrame implements Iterable {
* @return The grouping
*/
public DataFrameGroupBy groupBy(final Object... columns) {
Integer[] icols = this.columns.indices(columns);
int[] icols = this.columns.indices(columns);
return groupBy(icols);
}
@ -2805,7 +2918,7 @@ public class DataFrame implements Iterable {
* @return The grouping
*/
public DataFrameGroupBy groupBy(final List<Object> columns) {
Integer[] icols = this.columns.indices(columns);
int[] icols = this.columns.indices(columns);
return groupBy(icols);
}

View File

@ -247,7 +247,7 @@ public class Index<V> implements Iterable<V>{
* @param names Names
* @return Indices
*/
public Integer[] indices(final Object[] names) {
public int[] indices(final Object[] names) {
return indices(Arrays.asList(names));
}
@ -256,9 +256,9 @@ public class Index<V> implements Iterable<V>{
* @param names Names
* @return Indices
*/
public Integer[] indices(final List<Object> names) {
public int[] indices(final List<Object> names) {
final int size = names.size();
final Integer[] indices = new Integer[size];
final int[] indices = new int[size];
for (int i = 0; i < size; i++) {
indices[i] = indexOf(names.get(i));
}

View File

@ -1,32 +1,36 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<MeteoInfo File="milconfig.xml" Type="configurefile">
<Path OpenPath="D:\Working\MIScript\Jython\mis\satellite\smap">
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math\interpolate"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\meteo"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\meteo\calc"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\3d"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl"/>
<Path OpenPath="D:\Working\MIScript\Jython\mis\array">
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\LaSW"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\volume"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\meteo\wrf"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\chart"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\chart\subplot"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\hdf"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\satellite"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\satellite\smap"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\dataframe\series"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math\fitting"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\dataframe"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math\stats"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\array"/>
</Path>
<File>
<OpenedFiles>
<OpenedFile File="D:\Working\MIScript\Jython\mis\LaSW\typhoon_map_volume.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\meteo\wrf\wrf_eta2height.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\satellite\smap\pro_SMAP_L4_with_MI.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\dataframe\drop_1.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\common_math\stats\pearsonr.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\common_math\stats\spearmanr.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\array\delnan_1.py"/>
</OpenedFiles>
<RecentFiles>
<RecentFile File="D:\Working\MIScript\Jython\mis\LaSW\typhoon_map_volume.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\meteo\wrf\wrf_eta2height.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\satellite\smap\pro_SMAP_L4_with_MI.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\dataframe\drop_1.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\common_math\stats\pearsonr.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\common_math\stats\spearmanr.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\array\delnan_1.py"/>
</RecentFiles>
</File>
<Font>
@ -34,5 +38,5 @@
</Font>
<LookFeel DockWindowDecorated="true" LafDecorated="true" Name="FlatDarkLaf"/>
<Figure DoubleBuffering="true"/>
<Startup MainFormLocation="-7,0" MainFormSize="1435,830"/>
<Startup MainFormLocation="-7,0" MainFormSize="1397,803"/>
</MeteoInfo>

View File

@ -678,15 +678,39 @@ class DataFrame(object):
value = value._array
self._dataframe.addColumn(loc, column, value)
def drop(self, columns=None):
'''
def drop(self, labels=None, axis=0, index=None, columns=None):
"""
Drop specified labels from rows or columns.
:param columns: (*list like*) Column labels.
'''
if isinstance(columns, basestring):
:param labels: (*single label or list-like*) Index or column labels to drop.
:param axis: (*0 or index, 1 or columns*) Whether to drop labels from the index (0 or index)
or columns (1 or columns).
:param index: (*single label or list-like*) Alternative to specifying axis (labels, axis=0 is
equivalent to index=labels).
:param columns: (*single label or list-like*) Alternative to specifying axis (labels, axis=1 is
equivalent to columns=labels).
:return: (*DataFrame*) Dropped DataFrame.
"""
drop_index = index is not None
drop_col = columns is not None
if index is None and columns is None:
if labels is None:
print('At least one argument needed: labels, index, columns!')
raise IndexError
if axis == 0:
index = labels
else:
columns = labels
if not index is None and not isinstance(index, (list, tuple)):
index = [index]
if not columns is None and not isinstance(columns, (list, tuple)):
columns = [columns]
r = self._dataframe.drop(columns)
r = self._dataframe.drop(index, columns)
return DataFrame(dataframe=r)
def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False):

View File

@ -4218,7 +4218,7 @@ public class ArrayMath {
* Remove NaN values in arrays
*
* @param a The arrays
* @return The array withou NaN values
* @return The array without NaN values
*/
public static Array[] removeNaN(Array... a) {
if (a.length == 1) {
@ -5524,6 +5524,51 @@ public class ArrayMath {
return r;
}
/**
* Range list to section arrays
* @param ranges The Range list
* @param origin Origin array
* @param size Size array
* @param stride Stride array
*/
public static void rangesToSection(List<Range> ranges, int[] origin, int[] size, int[] stride) {
int n = ranges.size();
origin = new int[n];
size = new int[n];
stride = new int[n];
for (int i = 0; i < n; i++) {
origin[i] = ranges.get(i).first();
size[i] = ranges.get(i).last() - ranges.get(i).first() + 1;
stride[i] = ranges.get(i).stride();
}
}
/**
* Section arrays to range list
* @param origin Origin array
* @param size Size array
* @param stride Stride array
* @return Range list
*/
public static List<Range> sectionToRanges(int[] origin, int[] size, int[] stride) {
List<Range> ranges = new ArrayList<>(origin.length);
if (stride == null) {
stride = new int[origin.length];
for (int i = 0; i < stride.length; i++) {
stride[i] = 1;
}
}
for (int i = 0; i < origin.length; i++) {
try {
ranges.add(new Range(origin[i], origin[i] + stride[i] * size[i] - 1, stride[i]));
} catch (InvalidRangeException e) {
e.printStackTrace();
}
}
return ranges;
}
/**
* Section array
*