update ttest function supporting n-dim array with axis argument

2026-01-25 16:26:03 +00:00 · 2025-10-03 19:58:32 +08:00 · 2025-10-03 19:58:32 +08:00 · cb72da8da3
commit cb72da8da3
parent 07e26a4fc3
4 changed files with 337 additions and 57 deletions
--- a/meteoinfo-lab/milconfig.xml
+++ b/meteoinfo-lab/milconfig.xml
@ -1,11 +1,9 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <MeteoInfo File="milconfig.xml" Type="configurefile">
-  <Path OpenPath="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf">
-    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\map\maskout"/>
+  <Path OpenPath="D:\Working\MIScript\Jython\mis\common_math\stats">
    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\map"/>
    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\map\topology"/>
    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\ascii"/>
-    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math"/>
    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math\linalg"/>
    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\funny"/>
    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types"/>
@ -16,15 +14,17 @@
    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\io"/>
    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\io\matlab"/>
    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf"/>
+    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math"/>
+    <RecentFolder Folder="D:\Working\MIScript\Jython\mis\common_math\stats"/>
  </Path>
  <File>
    <OpenedFiles>
      <OpenedFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf\shitshape_ice_cream_cone.py"/>
-      <OpenedFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf\shitshiape_bouquet.py"/>
+      <OpenedFile File="D:\Working\MIScript\Jython\mis\common_math\stats\ttest_ind_precipitation_anomaly_1.py"/>
    </OpenedFiles>
    <RecentFiles>
      <RecentFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf\shitshape_ice_cream_cone.py"/>
-      <RecentFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\surf\shitshiape_bouquet.py"/>
+      <RecentFile File="D:\Working\MIScript\Jython\mis\common_math\stats\ttest_ind_precipitation_anomaly_1.py"/>
    </RecentFiles>
  </File>
  <Font>
--- a/meteoinfo-lab/pylib/mipylib/numeric/stats/stats$py.class
+++ b/meteoinfo-lab/pylib/mipylib/numeric/stats/stats$py.class
--- a/meteoinfo-lab/pylib/mipylib/numeric/stats/stats.py
+++ b/meteoinfo-lab/pylib/mipylib/numeric/stats/stats.py
@ -319,10 +319,9 @@ def pearsonr(x, y, axis=None):
    
    :returns: Pearson’s correlation coefficient and 2-tailed p-value.
    """
-    if isinstance(x, list):
-        x = np.array(x)
-    if isinstance(y, list):
-        y = np.array(y)
+    x = np.asanyarray(x)
+    y = np.asanyarray(y)
+
    if axis is None:
        r = StatsUtil.pearsonr(x.asarray(), y.asarray())
        return r[0], r[1]
@ -514,63 +513,108 @@ def percentile(a, q, axis=None):
    return r


-def ttest_1samp(a, popmean):
+def ttest_1samp(a, popmean, axis=0):
    """
    Calculate the T-test for the mean of ONE group of scores.

    This is a two-sided test for the null hypothesis that the expected value (mean) of 
    a sample of independent observations a is equal to the given population mean, popmean.
-    
-    :param a: (*array_like*) Sample observation.
-    :param popmean: (*float*) Expected value in null hypothesis.
-    
-    :returns: t-statistic and p-value
+
+    Parameters
+    ----------
+    a : array_like
+        Sample observation.
+    popmean : float or array_like
+        Expected value in null hypothesis.
+
+    Returns
+    -------
+    result : t-statistic and p-value
    """
-    if isinstance(a, list):
-        a = np.array(x)
-    r = StatsUtil.tTest(a.asarray(), popmean)
-    return r[0], r[1]
+    a = np.asanyarray(a)
+    if a.ndim == 1:
+        r = StatsUtil.tTestOneSample(a.asarray(), popmean)
+        return r[0], r[1]
+    else:
+        if isinstance(popmean, (list, tuple)):
+            popmean = np.array(popmean)
+        if isinstance(popmean, np.NDArray):
+            popmean = popmean._array
+
+        r = StatsUtil.tTestOneSample(a._array, popmean, axis)
+        return np.array(r[0]), np.array(r[1])


-def ttest_rel(a, b):
+def ttest_rel(a, b, axis=0):
    """
    Calculates the T-test on TWO RELATED samples of scores, a and b.

    This is a two-sided test for the null hypothesis that 2 related or repeated samples 
    have identical average (expected) values.
    
-    :param a: (*array_like*) Sample data a.
-    :param b: (*array_like*) Sample data b.
-    
-    :returns: t-statistic and p-value
+    Parameters
+    ----------
+    a, b : array_like
+        The arrays must have the same shape, except in the dimension
+        corresponding to `axis` (the first, by default).
+    axis : int or None, optional
+        Axis along which to compute test. If None, compute over the whole
+        arrays, `a`, and `b`.
+
+    Returns
+    -------
+    result : t-statistic and p-value
    """
-    if isinstance(a, list):
-        a = np.array(a)
-    if isinstance(b, list):
-        b = np.array(b)
-    r = StatsUtil.pairedTTest(a.asarray(), b.asarray())
-    return r[0], r[1]
+    a = np.asanyarray(a)
+    b = np.asanyarray(b)
+    if a.ndim == 1:
+        r = StatsUtil.pairedTTest(a.asarray(), b.asarray())
+        return r[0], r[1]
+    else:
+        r = StatsUtil.pairedTTest(a._array, b._array, axis)
+        return np.array(r[0]), np.array(r[1])


-def ttest_ind(a, b):
+def ttest_ind(a, b, axis=0, equal_var=True):
    """
-    Calculates the T-test for the means of TWO INDEPENDENT samples of scores.
+    Calculate the T-test for the means of *two independent* samples of scores.

-    This is a two-sided test for the null hypothesis that 2 independent samples have 
-    identical average (expected) values. This test assumes that the populations have 
-    identical variances.
+    This is a test for the null hypothesis that 2 independent samples
+    have identical average (expected) values. This test assumes that the
+    populations have identical variances by default.
    
-    :param a: (*array_like*) Sample data a.
-    :param b: (*array_like*) Sample data b.
-    
-    :returns: t-statistic and p-value
+    Parameters
+    ----------
+    a, b : array_like
+        The arrays must have the same shape, except in the dimension
+        corresponding to `axis` (the first, by default).
+    axis : int or None, optional
+        Axis along which to compute test. If None, compute over the whole
+        arrays, `a`, and `b`.
+    equal_var : bool, optional
+        If True (default), perform a standard independent 2 sample test
+        that assumes equal population variances [1]_.
+        If False, perform Welch's t-test, which does not assume equal
+        population variance [2]_.
+
+    Returns
+    -------
+    result : t-statistic and p-value
+
+     References
+    ----------
+    .. [1] https://en.wikipedia.org/wiki/T-test#Independent_two-sample_t-test
+
+    .. [2] https://en.wikipedia.org/wiki/Welch%27s_t-test
    """
-    if isinstance(a, list):
-        a = np.array(a)
-    if isinstance(b, list):
-        b = np.array(b)
-    r = StatsUtil.tTest(a.asarray(), b.asarray())
-    return r[0], r[1]
+    a = np.asanyarray(a)
+    b = np.asanyarray(b)
+    if a.ndim == 1 or axis is None:
+        r = StatsUtil.tTest(a._array, b._array, equal_var)
+        return r[0], r[1]
+    else:
+        r = StatsUtil.tTest(a._array, b._array, axis, equal_var)
+        return np.array(r[0]), np.array(r[1])


 def chisquare(f_obs, f_exp=None):
--- a/meteoinfo-math/src/main/java/org/meteoinfo/math/stats/StatsUtil.java
+++ b/meteoinfo-math/src/main/java/org/meteoinfo/math/stats/StatsUtil.java
@ -15,6 +15,7 @@ import org.apache.commons.math4.legacy.stat.correlation.KendallsCorrelation;
 import org.apache.commons.math4.legacy.stat.correlation.PearsonsCorrelation;
 import org.apache.commons.math4.legacy.stat.correlation.SpearmansCorrelation;
 import org.apache.commons.math4.legacy.stat.inference.InferenceTestUtils;
+import org.apache.commons.math4.legacy.stat.inference.TTest;
 import org.apache.commons.math4.legacy.stat.regression.OLSMultipleLinearRegression;
 import org.meteoinfo.ndarray.*;
 import org.meteoinfo.ndarray.math.ArrayMath;
@ -453,47 +454,282 @@ public class StatsUtil {
     * @param mu Expected value in null hypothesis
     * @return t_statistic and p_value
     */
-    public static double[] tTest(Array a, double mu){
-        double[] ad = (double[]) ArrayUtil.copyToNDJavaArray_Double(a);
+    public static double[] tTestOneSample(Array a, double mu){
+        double[] ad = (double[]) a.get1DJavaArray(double.class);
        double s = InferenceTestUtils.t(mu, ad);
        double p = InferenceTestUtils.tTest(mu, ad);
        
        return new double[]{s, p};
    }
+
+    /**
+     * One sample t test
+     *
+     * @param a Sample a
+     * @param mu Expected value in null hypothesis
+     * @param axis Axis
+     * @return t_statistic and p_value
+     */
+    public static Array[] tTestOneSample(Array a, double mu, int axis) throws InvalidRangeException {
+        int[] aShape = a.getShape();
+        int[] shape = new int[aShape.length - 1];
+        int idx;
+        for (int i = 0; i < aShape.length; i++) {
+            idx = i;
+            if (idx == axis) {
+                continue;
+            } else if (idx > axis) {
+                idx -= 1;
+            }
+            shape[idx] = aShape[i];
+        }
+        Array tStatistic = Array.factory(DataType.DOUBLE, shape);
+        Array pValue = Array.factory(DataType.DOUBLE, shape);
+        Index indexr = tStatistic.getIndex();
+        int[] current;
+        for (int i = 0; i < tStatistic.getSize(); i++) {
+            current = indexr.getCurrentCounter();
+            List<Range> aRanges = new ArrayList<>();
+            for (int j = 0; j < aShape.length; j++) {
+                if (j == axis) {
+                    aRanges.add(new Range(0, aShape[j] - 1, 1));
+                } else {
+                    idx = j;
+                    if (idx > axis) {
+                        idx -= 1;
+                    }
+                    aRanges.add(new Range(current[idx], current[idx], 1));
+                }
+            }
+            Array xx = ArrayMath.section(a, aRanges);
+            double[] rp = tTestOneSample(xx, mu);
+            tStatistic.setDouble(i, rp[0]);
+            pValue.setDouble(i, rp[1]);
+            indexr.incr();
+        }
+
+        return new Array[]{tStatistic, pValue};
+    }
+
+    /**
+     * One sample t test
+     *
+     * @param a Sample a
+     * @param mu Expected value in null hypothesis
+     * @param axis Axis
+     * @return t_statistic and p_value
+     */
+    public static Array[] tTestOneSample(Array a, Array mu, int axis) throws InvalidRangeException {
+        mu = mu.copyIfView();
+        int[] aShape = a.getShape();
+        int[] shape = new int[aShape.length - 1];
+        int idx;
+        for (int i = 0; i < aShape.length; i++) {
+            idx = i;
+            if (idx == axis) {
+                continue;
+            } else if (idx > axis) {
+                idx -= 1;
+            }
+            shape[idx] = aShape[i];
+        }
+        Array tStatistic = Array.factory(DataType.DOUBLE, shape);
+        Array pValue = Array.factory(DataType.DOUBLE, shape);
+        Index indexr = tStatistic.getIndex();
+        int[] current;
+        for (int i = 0; i < tStatistic.getSize(); i++) {
+            current = indexr.getCurrentCounter();
+            List<Range> aRanges = new ArrayList<>();
+            for (int j = 0; j < aShape.length; j++) {
+                if (j == axis) {
+                    aRanges.add(new Range(0, aShape[j] - 1, 1));
+                } else {
+                    idx = j;
+                    if (idx > axis) {
+                        idx -= 1;
+                    }
+                    aRanges.add(new Range(current[idx], current[idx], 1));
+                }
+            }
+            Array xx = ArrayMath.section(a, aRanges);
+            double[] rp = tTestOneSample(xx, mu.getDouble(i));
+            tStatistic.setDouble(i, rp[0]);
+            pValue.setDouble(i, rp[1]);
+            indexr.incr();
+        }
+
+        return new Array[]{tStatistic, pValue};
+    }
    
    /**
     * unpaired, two-sided, two-sample t-test.
     * 
-     * @param a Sample a.
-     * @param b Sample b.
+     * @param a Sample a
+     * @param b Sample b
     * @return t_statistic and p_value
     */
    public static double[] tTest(Array a, Array b) {
-        double[] ad = (double[]) ArrayUtil.copyToNDJavaArray_Double(a);
-        double[] bd = (double[]) ArrayUtil.copyToNDJavaArray_Double(b);
+        double[] ad = (double[]) a.get1DJavaArray(double.class);
+        double[] bd = (double[]) b.get1DJavaArray(double.class);
        double s = InferenceTestUtils.t(ad, bd);
        double p = InferenceTestUtils.tTest(ad, bd);
        
        return new double[]{s, p};
    }
+
+    /**
+     * unpaired, two-sided, two-sample t-test.
+     *
+     * @param a Sample a
+     * @param b Sample b
+     * @param equalVariance Equal variance or not
+     * @return t_statistic and p_value
+     */
+    public static double[] tTest(Array a, Array b, boolean equalVariance) {
+        double[] ad = (double[]) a.get1DJavaArray(double.class);
+        double[] bd = (double[]) b.get1DJavaArray(double.class);
+        TTest tTest = new TTest();
+        double tStatistic, pValue;
+        if (equalVariance) {
+            tStatistic = tTest.homoscedasticT(ad, bd);
+            pValue = tTest.homoscedasticTTest(ad, bd);
+        } else {
+            // （Welch's t-test）
+            tStatistic = tTest.t(ad, bd);
+            pValue = tTest.tTest(ad, bd);
+        }
+
+        return new double[]{tStatistic, pValue};
+    }
+
+    /**
+     * unpaired, two-sided, two-sample t-test.
+     *
+     * @param a Sample a
+     * @param b Sample b
+     * @param axis Axis
+     * @param equalVariance Equal variance or not
+     * @return t_statistic and p_value
+     */
+    public static Array[] tTest(Array a, Array b, int axis, boolean equalVariance) throws InvalidRangeException {
+        int[] aShape = a.getShape();
+        int[] bShape = b.getShape();
+        int[] shape = new int[aShape.length - 1];
+        int idx;
+        for (int i = 0; i < aShape.length; i++) {
+            idx = i;
+            if (idx == axis) {
+                continue;
+            } else if (idx > axis) {
+                idx -= 1;
+            }
+            shape[idx] = aShape[i];
+        }
+        Array tStatistic = Array.factory(DataType.DOUBLE, shape);
+        Array pValue = Array.factory(DataType.DOUBLE, shape);
+        Index indexr = tStatistic.getIndex();
+        int[] current;
+        for (int i = 0; i < tStatistic.getSize(); i++) {
+            current = indexr.getCurrentCounter();
+            List<Range> aRanges = new ArrayList<>();
+            List<Range> bRanges = new ArrayList<>();
+            for (int j = 0; j < aShape.length; j++) {
+                if (j == axis) {
+                    aRanges.add(new Range(0, aShape[j] - 1, 1));
+                    bRanges.add(new Range(0, bShape[j] - 1, 1));
+                } else {
+                    idx = j;
+                    if (idx > axis) {
+                        idx -= 1;
+                    }
+                    aRanges.add(new Range(current[idx], current[idx], 1));
+                    bRanges.add(new Range(current[idx], current[idx], 1));
+                }
+            }
+            Array xx = ArrayMath.section(a, aRanges);
+            Array yy = ArrayMath.section(b, bRanges);
+            double[] rp = tTest(xx, yy, equalVariance);
+            tStatistic.setDouble(i, rp[0]);
+            pValue.setDouble(i, rp[1]);
+            indexr.incr();
+        }
+
+        return new Array[]{tStatistic, pValue};
+    }
    
    /**
     * Paired test evaluating the null hypothesis that the mean difference 
     * between corresponding (paired) elements of the double[] arrays sample1 
     * and sample2 is zero.
     * 
-     * @param a Sample a.
-     * @param b Sample b.
+     * @param a Sample a
+     * @param b Sample b
     * @return t_statistic and p_value
     */
    public static double[] pairedTTest(Array a, Array b) {
-        double[] ad = (double[]) ArrayUtil.copyToNDJavaArray_Double(a);
-        double[] bd = (double[]) ArrayUtil.copyToNDJavaArray_Double(b);
+        double[] ad = (double[]) a.get1DJavaArray(double.class);
+        double[] bd = (double[]) b.get1DJavaArray(double.class);
        double s = InferenceTestUtils.pairedT(ad, bd);
        double p = InferenceTestUtils.pairedTTest(ad, bd);
        
        return new double[]{s, p};
    }
+
+    /**
+     * Paired test evaluating the null hypothesis that the mean difference
+     * between corresponding (paired) elements of the double[] arrays sample1
+     * and sample2 is zero.
+     *
+     * @param a Sample a
+     * @param b Sample b
+     * @param axis Axis
+     * @return t_statistic and p_value
+     */
+    public static Array[] pairedTTest(Array a, Array b, int axis) throws InvalidRangeException {
+        int[] aShape = a.getShape();
+        int[] bShape = b.getShape();
+        int[] shape = new int[aShape.length - 1];
+        int idx;
+        for (int i = 0; i < aShape.length; i++) {
+            idx = i;
+            if (idx == axis) {
+                continue;
+            } else if (idx > axis) {
+                idx -= 1;
+            }
+            shape[idx] = aShape[i];
+        }
+        Array tStatistic = Array.factory(DataType.DOUBLE, shape);
+        Array pValue = Array.factory(DataType.DOUBLE, shape);
+        Index indexr = tStatistic.getIndex();
+        int[] current;
+        for (int i = 0; i < tStatistic.getSize(); i++) {
+            current = indexr.getCurrentCounter();
+            List<Range> aRanges = new ArrayList<>();
+            List<Range> bRanges = new ArrayList<>();
+            for (int j = 0; j < aShape.length; j++) {
+                if (j == axis) {
+                    aRanges.add(new Range(0, aShape[j] - 1, 1));
+                    bRanges.add(new Range(0, bShape[j] - 1, 1));
+                } else {
+                    idx = j;
+                    if (idx > axis) {
+                        idx -= 1;
+                    }
+                    aRanges.add(new Range(current[idx], current[idx], 1));
+                    bRanges.add(new Range(current[idx], current[idx], 1));
+                }
+            }
+            Array xx = ArrayMath.section(a, aRanges);
+            Array yy = ArrayMath.section(b, bRanges);
+            double[] rp = pairedTTest(xx, yy);
+            tStatistic.setDouble(i, rp[0]);
+            pValue.setDouble(i, rp[1]);
+            indexr.incr();
+        }
+
+        return new Array[]{tStatistic, pValue};
+    }
    
    /**
     * Chi-square test
@ -503,8 +739,8 @@ public class StatsUtil {
     * @return Chi-square_statistic and p_value
     */
    public static double[] chiSquareTest(Array e, Array o) {
-        double[] ed = (double[]) ArrayUtil.copyToNDJavaArray_Double(e);
-        long[] od = (long[]) ArrayUtil.copyToNDJavaArray_Long(o);
+        double[] ed = (double[]) e.get1DJavaArray(double.class);
+        long[] od = (long[]) o.get1DJavaArray(double.class);
        double s = InferenceTestUtils.chiSquare(ed, od);
        double p = InferenceTestUtils.chiSquareTest(ed, od);