add replace function in DataFrame and Series classes

This commit is contained in:
wyq 2022-03-16 12:57:59 +08:00
parent d5933827c4
commit 92a8f19558
9 changed files with 220 additions and 6 deletions

View File

@ -186,6 +186,35 @@ public class MIMath {
}
}
/**
* Check whether an object is NaN - only valid for double and float object
*
* @param o The object
* @return Whether is NaN
*/
public static boolean isNaN(Object o) {
if (o instanceof Double) {
return Double.isNaN((double) o);
} else if (o instanceof Float) {
return Float.isNaN((float) o);
}
return false;
}
/**
* Check whether an object is null or NaN - only valid for double and float object
*
* @param o The object
* @return Whether is null or NaN
*/
public static boolean isNullOrNaN(Object o) {
if (o == null) {
return true;
} else {
return isNaN(o);
}
}
/**
* Get extent from PointF array
*

View File

@ -5,6 +5,7 @@
*/
package org.meteoinfo.dataframe;
import org.meteoinfo.common.MIMath;
import org.meteoinfo.common.util.GlobalUtil;
import org.meteoinfo.common.util.JDateUtil;
import org.meteoinfo.common.util.TypeUtils;
@ -1140,7 +1141,11 @@ public class DataFrame implements Iterable {
colnames.removeAll(todrop);
if (this.array2D) {
return null;
List ranges = new ArrayList<>();
ranges.add(new Range(this.length()));
ranges.add(columns.indexOfName(colnames));
Array r = ArrayMath.take((Array) this.data, ranges);
return new DataFrame(r, index.getValues(), colnames);
} else {
final List<Array> keep = new ArrayList<>(colnames.size());
for (final String col : colnames) {
@ -1151,6 +1156,78 @@ public class DataFrame implements Iterable {
}
}
/**
* Remove null or NaN values - any value is null or NaN in a row or column
* @param row Remove rows or columns
* @return Dropped DataFrame
* @throws InvalidRangeException
*/
public DataFrame dropNAAny(boolean row) throws InvalidRangeException {
int size = this.size();
int length = this.length();
if (row) {
List<Integer> rowRange = new ArrayList<>();
for (int i = 0; i < length; i++) {
boolean noNA = true;
for (int j = 0; j < size; j++) {
if (MIMath.isNullOrNaN(this.getValue(i, j))) {
noNA = false;
break;
}
}
if (noNA) {
rowRange.add(i);
}
}
return (DataFrame) this.select(rowRange);
} else {
List<Integer> colRange = new ArrayList<>();
for (int i = 0; i < size; i++) {
Array a = this.getColumnData(i);
if (ArrayMath.containsNaN(a)) {
colRange.add(i);
}
}
return drop(colRange.toArray(new Integer[colRange.size()]));
}
}
/**
* Remove null or NaN values - any value is null or NaN in a row or column
* @param row Remove rows or columns
* @return Dropped DataFrame
* @throws InvalidRangeException
*/
public DataFrame dropNAAll(boolean row) throws InvalidRangeException {
int size = this.size();
int length = this.length();
if (row) {
List<Integer> rowRange = new ArrayList<>();
for (int i = 0; i < length; i++) {
boolean allNA = true;
for (int j = 0; j < size; j++) {
if (!MIMath.isNullOrNaN(this.getValue(i, j))) {
allNA = false;
break;
}
}
if (!allNA) {
rowRange.add(i);
}
}
return (DataFrame) this.select(rowRange);
} else {
List<Integer> colRange = new ArrayList<>();
for (int i = 0; i < size; i++) {
Array a = this.getColumnData(i);
if (ArrayMath.allNaN(a)) {
colRange.add(i);
}
}
return drop(colRange.toArray(new Integer[colRange.size()]));
}
}
/**
* Create a new data frame containing only the specified columns.
*
@ -1979,6 +2056,29 @@ public class DataFrame implements Iterable {
return toString(rn - n, rn);
}
/**
* Replace value
* @param toReplace The value to be replaced
* @param value The replacing value
* @return new DataFrame
*/
public DataFrame replace(Object toReplace, Object value) {
if (this.array2D) {
Array a = ((Array) this.data).copy();
ArrayMath.replaceValue(a, toReplace, value);
return new DataFrame(a, (Index) this.index.clone(), (ColumnIndex) this.columns.clone());
} else {
List<Array> arrays = (List<Array>) this.data;
List<Array> r = new ArrayList<>();
for (Array arr : arrays) {
Array a = arr.copy();
ArrayMath.replaceValue(a, toReplace, value);
r.add(a);
}
return new DataFrame(r, (Index) this.index.clone(), (ColumnIndex) this.columns.clone());
}
}
@Override
public String toString() {
return head(100);

View File

@ -357,6 +357,19 @@ public class Series implements Iterable {
return this.index.size();
}
/**
* Replace value
*
* @param toReplace The value to be replaced
* @param value The replacing value
* @return new Series
*/
public Series replace(Object toReplace, Object value) {
Array a = this.data.copy();
ArrayMath.replaceValue(a, toReplace, value);
return new Series(a, (Index) this.index.clone(), this.name);
}
/**
* Group the series rows using the specified key function.
*

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<MeteoInfo File="milconfig.xml" Type="configurefile">
<Path OpenPath="D:\Working\MIScript\Jython\mis\map\webmap">
<Path OpenPath="D:\Working\MIScript\Jython\mis\dataframe">
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\map"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\map\projection"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\city"/>
@ -14,19 +14,21 @@
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\satellite"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\satellite\FY"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\dataframe"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\map\webmap"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\dataframe"/>
</Path>
<File>
<OpenedFiles>
<OpenedFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\plot\traj_3d.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\3d_earth\CALIPSO_L1_3d_axis.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\map\webmap\plotweb_2.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\dataframe\dropna_1.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\dataframe\series\replace_1.py"/>
</OpenedFiles>
<RecentFiles>
<RecentFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\plot\traj_3d.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\3d_earth\CALIPSO_L1_3d_axis.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\map\webmap\plotweb_2.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\dataframe\dropna_1.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\dataframe\series\replace_1.py"/>
</RecentFiles>
</File>
<Font>

View File

@ -688,6 +688,40 @@ class DataFrame(object):
columns = [columns]
r = self._dataframe.drop(columns)
return DataFrame(dataframe=r)
def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False):
"""
Remove missing values.
:param axis: (*int*) {0 or index, 1 or columns}, default 0. Determine if rows or columns which contain
missing values are removed.
:param how: (*str*) {any, all}, default any. Determine if row or column is removed from DataFrame,
when we have at least one NA or all NA.
:param thresh: (*int*) Optional. Require that many non-NA values.
:param subset: (*list*) column label or sequence of labels, optional. Labels along other axis to consider,
e.g. if you are dropping rows these would be a list of columns to include.
:param inplace: (*bool*) default False. If True, do operation inplace and return None.
:return: (*DataFrame*) DataFrame with NA entries dropped from it or None if inplace=True.
"""
row = (axis == 0 or axis == 'index')
any = (how == 'any')
if any:
r = self._dataframe.dropNAAny(row)
else:
r = self._dataframe.dropNAAll(row)
return DataFrame(dataframe=r)
def replace(self, to_replace, value):
"""
Replace values given in to_replace with value.
:param to_replace: (*object*) The value to be replaced.
:param value: (*object*) The replacing value.
:return: (*DataFrame*) New data frame with after value replaced.
"""
r = self._dataframe.replace(to_replace, value)
return DataFrame(dataframe=r)
def append(self, other):
'''

View File

@ -343,6 +343,17 @@ class Series(object):
:returns: MI Java Array object
'''
return self.values.asarray()
def replace(self, to_replace, value):
"""
Replace values given in to_replace with value.
:param to_replace: (*object*) The value to be replaced.
:param value: (*object*) The replacing value.
:return: (*Series*) New series with after value replaced.
"""
r = self._series.replace(to_replace, value)
return Series(series=r)
def mean(self):
'''

View File

@ -4152,11 +4152,36 @@ public class ArrayMath {
return false;
}
/**
* Check if the array values are all NaN
*
* @param a Input array
* @return Boolean
*/
public static boolean allNaN(Array a) {
if (a.getIndexPrivate().isFastIterator()) {
for (int i = 0; i < a.getSize(); i++) {
if (!Double.isNaN(a.getDouble(i))) {
return false;
}
}
} else {
IndexIterator iterA = a.getIndexIterator();
while (iterA.hasNext()) {
if (!Double.isNaN(iterA.getDoubleNext())) {
return false;
}
}
}
return true;
}
/**
* Remove NaN values in an array
*
* @param a The array
* @return The array withou NaN values
* @return The array without NaN values
*/
public static Array removeNaN(Array a) {
List d = new ArrayList<>();