update ttest function supporting n-dim array with axis argument

This commit is contained in:
wyq 2025-10-03 19:58:32 +08:00
parent 07e26a4fc3
commit cb72da8da3
4 changed files with 337 additions and 57 deletions

View File

@ -1,11 +1,9 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<MeteoInfo File="milconfig.xml" Type="configurefile">
<Path OpenPath="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf">
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\map\maskout"/>
<Path OpenPath="D:\Working\MIScript\Jython\mis\common_math\stats">
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\map"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\map\topology"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\ascii"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math\linalg"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\funny"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types"/>
@ -16,15 +14,17 @@
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\io"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\io\matlab"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math\stats"/>
</Path>
<File>
<OpenedFiles>
<OpenedFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf\shitshape_ice_cream_cone.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf\shitshiape_bouquet.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\common_math\stats\ttest_ind_precipitation_anomaly_1.py"/>
</OpenedFiles>
<RecentFiles>
<RecentFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf\shitshape_ice_cream_cone.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf\shitshiape_bouquet.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\common_math\stats\ttest_ind_precipitation_anomaly_1.py"/>
</RecentFiles>
</File>
<Font>

View File

@ -319,10 +319,9 @@ def pearsonr(x, y, axis=None):
:returns: Pearsons correlation coefficient and 2-tailed p-value.
"""
if isinstance(x, list):
x = np.array(x)
if isinstance(y, list):
y = np.array(y)
x = np.asanyarray(x)
y = np.asanyarray(y)
if axis is None:
r = StatsUtil.pearsonr(x.asarray(), y.asarray())
return r[0], r[1]
@ -514,63 +513,108 @@ def percentile(a, q, axis=None):
return r
def ttest_1samp(a, popmean):
def ttest_1samp(a, popmean, axis=0):
"""
Calculate the T-test for the mean of ONE group of scores.
This is a two-sided test for the null hypothesis that the expected value (mean) of
a sample of independent observations a is equal to the given population mean, popmean.
:param a: (*array_like*) Sample observation.
:param popmean: (*float*) Expected value in null hypothesis.
:returns: t-statistic and p-value
Parameters
----------
a : array_like
Sample observation.
popmean : float or array_like
Expected value in null hypothesis.
Returns
-------
result : t-statistic and p-value
"""
if isinstance(a, list):
a = np.array(x)
r = StatsUtil.tTest(a.asarray(), popmean)
return r[0], r[1]
a = np.asanyarray(a)
if a.ndim == 1:
r = StatsUtil.tTestOneSample(a.asarray(), popmean)
return r[0], r[1]
else:
if isinstance(popmean, (list, tuple)):
popmean = np.array(popmean)
if isinstance(popmean, np.NDArray):
popmean = popmean._array
r = StatsUtil.tTestOneSample(a._array, popmean, axis)
return np.array(r[0]), np.array(r[1])
def ttest_rel(a, b):
def ttest_rel(a, b, axis=0):
"""
Calculates the T-test on TWO RELATED samples of scores, a and b.
This is a two-sided test for the null hypothesis that 2 related or repeated samples
have identical average (expected) values.
:param a: (*array_like*) Sample data a.
:param b: (*array_like*) Sample data b.
:returns: t-statistic and p-value
Parameters
----------
a, b : array_like
The arrays must have the same shape, except in the dimension
corresponding to `axis` (the first, by default).
axis : int or None, optional
Axis along which to compute test. If None, compute over the whole
arrays, `a`, and `b`.
Returns
-------
result : t-statistic and p-value
"""
if isinstance(a, list):
a = np.array(a)
if isinstance(b, list):
b = np.array(b)
r = StatsUtil.pairedTTest(a.asarray(), b.asarray())
return r[0], r[1]
a = np.asanyarray(a)
b = np.asanyarray(b)
if a.ndim == 1:
r = StatsUtil.pairedTTest(a.asarray(), b.asarray())
return r[0], r[1]
else:
r = StatsUtil.pairedTTest(a._array, b._array, axis)
return np.array(r[0]), np.array(r[1])
def ttest_ind(a, b):
def ttest_ind(a, b, axis=0, equal_var=True):
"""
Calculates the T-test for the means of TWO INDEPENDENT samples of scores.
Calculate the T-test for the means of *two independent* samples of scores.
This is a two-sided test for the null hypothesis that 2 independent samples have
identical average (expected) values. This test assumes that the populations have
identical variances.
This is a test for the null hypothesis that 2 independent samples
have identical average (expected) values. This test assumes that the
populations have identical variances by default.
:param a: (*array_like*) Sample data a.
:param b: (*array_like*) Sample data b.
:returns: t-statistic and p-value
Parameters
----------
a, b : array_like
The arrays must have the same shape, except in the dimension
corresponding to `axis` (the first, by default).
axis : int or None, optional
Axis along which to compute test. If None, compute over the whole
arrays, `a`, and `b`.
equal_var : bool, optional
If True (default), perform a standard independent 2 sample test
that assumes equal population variances [1]_.
If False, perform Welch's t-test, which does not assume equal
population variance [2]_.
Returns
-------
result : t-statistic and p-value
References
----------
.. [1] https://en.wikipedia.org/wiki/T-test#Independent_two-sample_t-test
.. [2] https://en.wikipedia.org/wiki/Welch%27s_t-test
"""
if isinstance(a, list):
a = np.array(a)
if isinstance(b, list):
b = np.array(b)
r = StatsUtil.tTest(a.asarray(), b.asarray())
return r[0], r[1]
a = np.asanyarray(a)
b = np.asanyarray(b)
if a.ndim == 1 or axis is None:
r = StatsUtil.tTest(a._array, b._array, equal_var)
return r[0], r[1]
else:
r = StatsUtil.tTest(a._array, b._array, axis, equal_var)
return np.array(r[0]), np.array(r[1])
def chisquare(f_obs, f_exp=None):

View File

@ -15,6 +15,7 @@ import org.apache.commons.math4.legacy.stat.correlation.KendallsCorrelation;
import org.apache.commons.math4.legacy.stat.correlation.PearsonsCorrelation;
import org.apache.commons.math4.legacy.stat.correlation.SpearmansCorrelation;
import org.apache.commons.math4.legacy.stat.inference.InferenceTestUtils;
import org.apache.commons.math4.legacy.stat.inference.TTest;
import org.apache.commons.math4.legacy.stat.regression.OLSMultipleLinearRegression;
import org.meteoinfo.ndarray.*;
import org.meteoinfo.ndarray.math.ArrayMath;
@ -453,47 +454,282 @@ public class StatsUtil {
* @param mu Expected value in null hypothesis
* @return t_statistic and p_value
*/
public static double[] tTest(Array a, double mu){
double[] ad = (double[]) ArrayUtil.copyToNDJavaArray_Double(a);
public static double[] tTestOneSample(Array a, double mu){
double[] ad = (double[]) a.get1DJavaArray(double.class);
double s = InferenceTestUtils.t(mu, ad);
double p = InferenceTestUtils.tTest(mu, ad);
return new double[]{s, p};
}
/**
* One sample t test
*
* @param a Sample a
* @param mu Expected value in null hypothesis
* @param axis Axis
* @return t_statistic and p_value
*/
public static Array[] tTestOneSample(Array a, double mu, int axis) throws InvalidRangeException {
int[] aShape = a.getShape();
int[] shape = new int[aShape.length - 1];
int idx;
for (int i = 0; i < aShape.length; i++) {
idx = i;
if (idx == axis) {
continue;
} else if (idx > axis) {
idx -= 1;
}
shape[idx] = aShape[i];
}
Array tStatistic = Array.factory(DataType.DOUBLE, shape);
Array pValue = Array.factory(DataType.DOUBLE, shape);
Index indexr = tStatistic.getIndex();
int[] current;
for (int i = 0; i < tStatistic.getSize(); i++) {
current = indexr.getCurrentCounter();
List<Range> aRanges = new ArrayList<>();
for (int j = 0; j < aShape.length; j++) {
if (j == axis) {
aRanges.add(new Range(0, aShape[j] - 1, 1));
} else {
idx = j;
if (idx > axis) {
idx -= 1;
}
aRanges.add(new Range(current[idx], current[idx], 1));
}
}
Array xx = ArrayMath.section(a, aRanges);
double[] rp = tTestOneSample(xx, mu);
tStatistic.setDouble(i, rp[0]);
pValue.setDouble(i, rp[1]);
indexr.incr();
}
return new Array[]{tStatistic, pValue};
}
/**
* One sample t test
*
* @param a Sample a
* @param mu Expected value in null hypothesis
* @param axis Axis
* @return t_statistic and p_value
*/
public static Array[] tTestOneSample(Array a, Array mu, int axis) throws InvalidRangeException {
mu = mu.copyIfView();
int[] aShape = a.getShape();
int[] shape = new int[aShape.length - 1];
int idx;
for (int i = 0; i < aShape.length; i++) {
idx = i;
if (idx == axis) {
continue;
} else if (idx > axis) {
idx -= 1;
}
shape[idx] = aShape[i];
}
Array tStatistic = Array.factory(DataType.DOUBLE, shape);
Array pValue = Array.factory(DataType.DOUBLE, shape);
Index indexr = tStatistic.getIndex();
int[] current;
for (int i = 0; i < tStatistic.getSize(); i++) {
current = indexr.getCurrentCounter();
List<Range> aRanges = new ArrayList<>();
for (int j = 0; j < aShape.length; j++) {
if (j == axis) {
aRanges.add(new Range(0, aShape[j] - 1, 1));
} else {
idx = j;
if (idx > axis) {
idx -= 1;
}
aRanges.add(new Range(current[idx], current[idx], 1));
}
}
Array xx = ArrayMath.section(a, aRanges);
double[] rp = tTestOneSample(xx, mu.getDouble(i));
tStatistic.setDouble(i, rp[0]);
pValue.setDouble(i, rp[1]);
indexr.incr();
}
return new Array[]{tStatistic, pValue};
}
/**
* unpaired, two-sided, two-sample t-test.
*
* @param a Sample a.
* @param b Sample b.
* @param a Sample a
* @param b Sample b
* @return t_statistic and p_value
*/
public static double[] tTest(Array a, Array b) {
double[] ad = (double[]) ArrayUtil.copyToNDJavaArray_Double(a);
double[] bd = (double[]) ArrayUtil.copyToNDJavaArray_Double(b);
double[] ad = (double[]) a.get1DJavaArray(double.class);
double[] bd = (double[]) b.get1DJavaArray(double.class);
double s = InferenceTestUtils.t(ad, bd);
double p = InferenceTestUtils.tTest(ad, bd);
return new double[]{s, p};
}
/**
* unpaired, two-sided, two-sample t-test.
*
* @param a Sample a
* @param b Sample b
* @param equalVariance Equal variance or not
* @return t_statistic and p_value
*/
public static double[] tTest(Array a, Array b, boolean equalVariance) {
double[] ad = (double[]) a.get1DJavaArray(double.class);
double[] bd = (double[]) b.get1DJavaArray(double.class);
TTest tTest = new TTest();
double tStatistic, pValue;
if (equalVariance) {
tStatistic = tTest.homoscedasticT(ad, bd);
pValue = tTest.homoscedasticTTest(ad, bd);
} else {
// Welch's t-test
tStatistic = tTest.t(ad, bd);
pValue = tTest.tTest(ad, bd);
}
return new double[]{tStatistic, pValue};
}
/**
* unpaired, two-sided, two-sample t-test.
*
* @param a Sample a
* @param b Sample b
* @param axis Axis
* @param equalVariance Equal variance or not
* @return t_statistic and p_value
*/
public static Array[] tTest(Array a, Array b, int axis, boolean equalVariance) throws InvalidRangeException {
int[] aShape = a.getShape();
int[] bShape = b.getShape();
int[] shape = new int[aShape.length - 1];
int idx;
for (int i = 0; i < aShape.length; i++) {
idx = i;
if (idx == axis) {
continue;
} else if (idx > axis) {
idx -= 1;
}
shape[idx] = aShape[i];
}
Array tStatistic = Array.factory(DataType.DOUBLE, shape);
Array pValue = Array.factory(DataType.DOUBLE, shape);
Index indexr = tStatistic.getIndex();
int[] current;
for (int i = 0; i < tStatistic.getSize(); i++) {
current = indexr.getCurrentCounter();
List<Range> aRanges = new ArrayList<>();
List<Range> bRanges = new ArrayList<>();
for (int j = 0; j < aShape.length; j++) {
if (j == axis) {
aRanges.add(new Range(0, aShape[j] - 1, 1));
bRanges.add(new Range(0, bShape[j] - 1, 1));
} else {
idx = j;
if (idx > axis) {
idx -= 1;
}
aRanges.add(new Range(current[idx], current[idx], 1));
bRanges.add(new Range(current[idx], current[idx], 1));
}
}
Array xx = ArrayMath.section(a, aRanges);
Array yy = ArrayMath.section(b, bRanges);
double[] rp = tTest(xx, yy, equalVariance);
tStatistic.setDouble(i, rp[0]);
pValue.setDouble(i, rp[1]);
indexr.incr();
}
return new Array[]{tStatistic, pValue};
}
/**
* Paired test evaluating the null hypothesis that the mean difference
* between corresponding (paired) elements of the double[] arrays sample1
* and sample2 is zero.
*
* @param a Sample a.
* @param b Sample b.
* @param a Sample a
* @param b Sample b
* @return t_statistic and p_value
*/
public static double[] pairedTTest(Array a, Array b) {
double[] ad = (double[]) ArrayUtil.copyToNDJavaArray_Double(a);
double[] bd = (double[]) ArrayUtil.copyToNDJavaArray_Double(b);
double[] ad = (double[]) a.get1DJavaArray(double.class);
double[] bd = (double[]) b.get1DJavaArray(double.class);
double s = InferenceTestUtils.pairedT(ad, bd);
double p = InferenceTestUtils.pairedTTest(ad, bd);
return new double[]{s, p};
}
/**
* Paired test evaluating the null hypothesis that the mean difference
* between corresponding (paired) elements of the double[] arrays sample1
* and sample2 is zero.
*
* @param a Sample a
* @param b Sample b
* @param axis Axis
* @return t_statistic and p_value
*/
public static Array[] pairedTTest(Array a, Array b, int axis) throws InvalidRangeException {
int[] aShape = a.getShape();
int[] bShape = b.getShape();
int[] shape = new int[aShape.length - 1];
int idx;
for (int i = 0; i < aShape.length; i++) {
idx = i;
if (idx == axis) {
continue;
} else if (idx > axis) {
idx -= 1;
}
shape[idx] = aShape[i];
}
Array tStatistic = Array.factory(DataType.DOUBLE, shape);
Array pValue = Array.factory(DataType.DOUBLE, shape);
Index indexr = tStatistic.getIndex();
int[] current;
for (int i = 0; i < tStatistic.getSize(); i++) {
current = indexr.getCurrentCounter();
List<Range> aRanges = new ArrayList<>();
List<Range> bRanges = new ArrayList<>();
for (int j = 0; j < aShape.length; j++) {
if (j == axis) {
aRanges.add(new Range(0, aShape[j] - 1, 1));
bRanges.add(new Range(0, bShape[j] - 1, 1));
} else {
idx = j;
if (idx > axis) {
idx -= 1;
}
aRanges.add(new Range(current[idx], current[idx], 1));
bRanges.add(new Range(current[idx], current[idx], 1));
}
}
Array xx = ArrayMath.section(a, aRanges);
Array yy = ArrayMath.section(b, bRanges);
double[] rp = pairedTTest(xx, yy);
tStatistic.setDouble(i, rp[0]);
pValue.setDouble(i, rp[1]);
indexr.incr();
}
return new Array[]{tStatistic, pValue};
}
/**
* Chi-square test
@ -503,8 +739,8 @@ public class StatsUtil {
* @return Chi-square_statistic and p_value
*/
public static double[] chiSquareTest(Array e, Array o) {
double[] ed = (double[]) ArrayUtil.copyToNDJavaArray_Double(e);
long[] od = (long[]) ArrayUtil.copyToNDJavaArray_Long(o);
double[] ed = (double[]) e.get1DJavaArray(double.class);
long[] od = (long[]) o.get1DJavaArray(double.class);
double s = InferenceTestUtils.chiSquare(ed, od);
double p = InferenceTestUtils.chiSquareTest(ed, od);