support npy and pnz load and save functions

This commit is contained in:
wyq 2023-12-22 17:09:25 +08:00
parent 21b0e80ffd
commit 087a811810
48 changed files with 5521 additions and 10 deletions

View File

@ -39,6 +39,7 @@ import java.time.LocalDateTime;
import java.time.Duration;
import java.util.ArrayList;
import org.meteoinfo.data.meteodata.numpy.NumpyDataInfo;
import org.meteoinfo.data.meteodata.radar.CMARadarBaseDataInfo;
import org.meteoinfo.ndarray.math.ArrayMath;
import org.meteoinfo.projection.ProjectionInfo;
@ -884,6 +885,17 @@ public class MeteoDataInfo {
_infoText = dataInfo.generateInfoText();
}
/**
* Open numpy data file
*
* @param fileName File path
*/
public void openNumpyData(String fileName) {
dataInfo = new NumpyDataInfo();
dataInfo.readDataInfo(fileName);
_infoText = dataInfo.generateInfoText();
}
/**
* Open MICAPS data
*

View File

@ -0,0 +1,130 @@
package org.meteoinfo.data.meteodata.numpy;
import org.meteoinfo.data.GridArray;
import org.meteoinfo.data.GridData;
import org.meteoinfo.data.dimarray.Dimension;
import org.meteoinfo.data.meteodata.Attribute;
import org.meteoinfo.data.meteodata.DataInfo;
import org.meteoinfo.data.meteodata.IGridDataInfo;
import org.meteoinfo.data.meteodata.Variable;
import org.meteoinfo.ndarray.Array;
import org.meteoinfo.ndarray.InvalidRangeException;
import org.meteoinfo.ndarray.io.npy.Npy;
import org.meteoinfo.ndarray.io.npy.NpyArray;
import org.meteoinfo.ndarray.io.npy.NpyUtil;
import java.io.*;
public class NumpyDataInfo extends DataInfo implements IGridDataInfo {
@Override
public boolean isValidFile(RandomAccessFile raf) {
return false;
}
@Override
public void readDataInfo(String fileName) {
this.setFileName(fileName);
File file = new File(fileName);
NpyArray npyArray = Npy.read(file);
Array array = NpyUtil.toMIArray(npyArray);
this.addAttribute(new Attribute("File type", "Numpy"));
int[] shape = npyArray.shape();
String name = "a";
Variable variable = new Variable();
variable.setName(name);
variable.setDataType(NpyUtil.toMIDataType(npyArray.dataType()));
variable.setCachedData(array);
for (int i = 0; i < shape.length; i++) {
Dimension dim = new Dimension();
dim.setName("dim_" + name + "_" + String.valueOf(i));
dim.setLength(shape[i]);
this.addDimension(dim);
variable.addDimension(dim);
}
this.addVariable(variable);
}
@Override
public Array read(String varName) {
Variable var = this.getVariable(varName);
int n = var.getDimNumber();
int[] origin = new int[n];
int[] size = new int[n];
int[] stride = new int[n];
for (int i = 0; i < n; i++) {
origin[i] = 0;
size[i] = var.getDimLength(i);
stride[i] = 1;
}
Array r = read(varName, origin, size, stride);
return r;
}
@Override
public Array read(String varName, int[] origin, int[] size, int[] stride) {
try {
Variable variable = this.getVariable(varName);
return variable.getCachedData().section(origin, size, stride).copy();
} catch (InvalidRangeException e) {
return null;
}
}
@Override
public GridArray getGridArray(String varName) {
return null;
}
@Override
public GridData getGridData_LonLat(int timeIdx, String varName, int levelIdx) {
return null;
}
@Override
public GridData getGridData_TimeLat(int lonIdx, String varName, int levelIdx) {
return null;
}
@Override
public GridData getGridData_TimeLon(int latIdx, String varName, int levelIdx) {
return null;
}
@Override
public GridData getGridData_LevelLat(int lonIdx, String varName, int timeIdx) {
return null;
}
@Override
public GridData getGridData_LevelLon(int latIdx, String varName, int timeIdx) {
return null;
}
@Override
public GridData getGridData_LevelTime(int latIdx, String varName, int lonIdx) {
return null;
}
@Override
public GridData getGridData_Time(int lonIdx, int latIdx, String varName, int levelIdx) {
return null;
}
@Override
public GridData getGridData_Level(int lonIdx, int latIdx, String varName, int timeIdx) {
return null;
}
@Override
public GridData getGridData_Lon(int timeIdx, int latIdx, String varName, int levelIdx) {
return null;
}
@Override
public GridData getGridData_Lat(int timeIdx, int lonIdx, String varName, int levelIdx) {
return null;
}
}

View File

@ -1,7 +1,6 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<MeteoInfo File="milconfig.xml" Type="configurefile">
<Path OpenPath="D:\Working\MIScript\Jython\mis\io\matlab">
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\contour"/>
<Path OpenPath="D:\Working\MIScript\Jython\mis\io\numpy">
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\3d"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\city"/>
@ -12,21 +11,26 @@
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\LaSW"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\LaSW\airship"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\text"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\io"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\array"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\io\matlab"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\io"/>
<RecentFolder Folder="D:\Working\MIScript\Jython\mis\io\numpy"/>
</Path>
<File>
<OpenedFiles>
<OpenedFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\plot\plot3_multi_color_chinese.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\io\matlab\test_mfl_lizard_1.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\io\matlab\mat_lizard_1.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\io\numpy\test_npy_1.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\io\numpy\load_2.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\io\numpy\load_npz_2.py"/>
<OpenedFile File="D:\Working\MIScript\Jython\mis\io\numpy\savez_1.py"/>
</OpenedFiles>
<RecentFiles>
<RecentFile File="D:\Working\MIScript\Jython\mis\plot_types\3d\jogl\plot\plot3_multi_color_chinese.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\io\matlab\test_mfl_lizard_1.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\io\matlab\mat_lizard_1.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\io\numpy\test_npy_1.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\io\numpy\load_2.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\io\numpy\load_npz_2.py"/>
<RecentFile File="D:\Working\MIScript\Jython\mis\io\numpy\savez_1.py"/>
</RecentFiles>
</File>
<Font>
@ -34,5 +38,5 @@
</Font>
<LookFeel DockWindowDecorated="true" LafDecorated="true" Name="FlatDarkLaf"/>
<Figure DoubleBuffering="true"/>
<Startup MainFormLocation="-7,0" MainFormSize="1405,788"/>
<Startup MainFormLocation="-7,0" MainFormSize="1420,834"/>
</MeteoInfo>

View File

@ -32,7 +32,7 @@ import mipylib.migl as migl
__all__ = [
'addfile','addfiles','addfile_arl','addfile_ascii_grid','addfile_awx','addfile_geotiff',
'addfile_grads','addfile_hyconc','addfile_hytraj','addfile_hypart','addfile_lonlat',
'addfile_matlab','addfile_micaps','addfile_mm5','addfile_nc','addfile_grib','addfile_surfer',
'addfile_matlab','addfile_micaps','addfile_mm5','addfile_nc','addfile_numpy','addfile_grib','addfile_surfer',
'add_bufr_lookup', 'addtimedim','joinncfile','asciiread','asciiwrite','bincreate','binread',
'binwrite', 'numasciicol','numasciirow','readtable','convert2nc','grads2nc','ncwrite'
]
@ -285,6 +285,22 @@ def addfile_matlab(fname, getfn=True):
meteodata.openMatLabData(fname)
datafile = DimDataFile(meteodata)
return datafile
def addfile_numpy(fname, getfn=True):
"""
Add a numpy data file.
:param fname: (*string*) The numpy file name.
:param getfn: (*string*) If run ``__getfilename`` function or not. Default is ``True``.
:returns: (*DimDataFile*) Opened file object.
"""
if getfn:
fname, isweb = __getfilename(fname)
meteodata = MeteoDataInfo()
meteodata.openNumpyData(fname)
datafile = DimDataFile(meteodata)
return datafile
def addfile_lonlat(fname, getfn=True, missingv=-9999.0):
"""

View File

@ -9,6 +9,7 @@ from .index_tricks import *
from .stride_tricks import *
from .type_check import *
from .arraysetops import *
from npyio import *
__all__ = []
__all__ += shape_base.__all__
@ -16,4 +17,5 @@ __all__ += function_base.__all__
__all__ += index_tricks.__all__
__all__ += stride_tricks.__all__
__all__ += type_check.__all__
__all__ += arraysetops.__all__
__all__ += arraysetops.__all__
__all__ += npyio.__all__

View File

@ -0,0 +1,165 @@
from org.meteoinfo.ndarray.io.npy import Npy, Npz
from ..core._ndarray import NDArray
__all__ = ['load', 'save', 'savez']
class NpzFile:
"""
NpzFile(fid)
A dictionary-like object with lazy-loading of files in the zipped
archive provided on construction.
`NpzFile` is used to load files in the NumPy ``.npz`` data archive
format. It assumes that files in the archive have a ``.npy`` extension,
other files are ignored.
Attributes
----------
files : list of str
List of all files in the archive with a ``.npy`` extension.
zip_file : ZipFile instance
The ZipFile object initialized with the zipped archive.
"""
zip_file = None
_MAX_REPR_ARRAY_COUNT = 5
def __init__(self, file):
self.filename = file
self.zip_file = Npz.open(file)
self._files = list(Npz.entries(self.zip_file))
self.files = []
for x in self._files:
if x.endswith('.npy'):
self.files.append(x[:-4])
else:
self.files.append(x)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def close(self):
"""
Close the zip file.
"""
if self.zip_file is not None:
self.zip_file.close()
self.zip_file = None
def __del__(self):
self.close()
# Implement the Mapping ABC
def __iter__(self):
return iter(self.files)
def __len__(self):
return len(self.files)
def __contains__(self, key):
return (key in self._files or key in self.files)
def __repr__(self):
# Get the name of arrays
array_names = ', '.join(self.files[:self._MAX_REPR_ARRAY_COUNT])
if len(self.files) > self._MAX_REPR_ARRAY_COUNT:
array_names += "..."
return "NpzFile {} with keys: {}".format(self.filename, array_names)
def __getitem__(self, key):
member = False
if key in self._files:
member = True
elif key in self.files:
member = True
key += '.npy'
if member:
a = Npz.load(self.zip_file, key)
return NDArray(a)
else:
raise KeyError("{} is not a file in the archive".format(key))
def load(file):
"""
Load arrays from `npy` or `npz` data file.
:param file: (*str*) Data file path.
:return: Array or diction of arrays.
"""
# Code to distinguish from NumPy binary files and pickles.
MAGIC_PREFIX = b'\x93NUMPY'
_ZIP_PREFIX = b'PK\x03\x04'
_ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this
fid = open(file, 'rb')
N = len(MAGIC_PREFIX)
magic = fid.read(N)
fid.close()
if not magic:
raise EOFError("No data left in file")
if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
return NpzFile(file)
else:
a = Npy.load(file)
return NDArray(a)
def save(file, arr):
"""
Save an array to a binary file in NumPy .npy format.
:param file: (*str*) Npy file path.
:param arr: (*array*) Array data to be saved.
"""
if not file.endswith('.npy'):
file = file + '.npy'
Npy.save(file, arr._array)
def savez(file, *args, **kwds):
"""
Save several arrays into a single file in uncompressed ``.npz`` format.
Provide arrays as keyword arguments to store them under the
corresponding name in the output file: ``savez(fn, x=x, y=y)``.
If arrays are specified as positional arguments, i.e., ``savez(fn,
x, y)``, their names will be `arr_0`, `arr_1`, etc.
Parameters
----------
file : str
The filename (string) where the data will be saved. If file is a string or a Path, the
``.npz`` extension will be appended to the filename if it is not
already there.
args : Arguments, optional
Arrays to save to the file. Please use keyword arguments (see
`kwds` below) to assign names to arrays. Arrays specified as
args will be named "arr_0", "arr_1", and so on.
kwds : Keyword arguments, optional
Arrays to save to the file. Each array will be saved to the
output file with its corresponding keyword name.
"""
if not file.endswith('.npz'):
file = file + '.npz'
namedict = kwds
for i, val in enumerate(args):
key = 'arr_{}'.format(i)
if key in namedict.keys():
raise ValueError(
"Cannot use un-named variables and keyword {}".format(key))
namedict[key] = val
outstream = Npz.create(file)
for key, val in namedict.items():
fname = key + '.npy'
Npz.write(outstream, key, val._array)
outstream.close()

View File

@ -0,0 +1,36 @@
package org.meteoinfo.ndarray.io.npy;
import java.util.Arrays;
import java.util.Objects;
abstract class AbstractNpyArray<T> implements NpyArray<T> {
protected final int[] shape;
protected final T data;
protected final boolean fortranOrder;
protected AbstractNpyArray(int[] shape, T data, boolean fortranOrder) {
this.shape = Objects.requireNonNull(shape);
this.data = Objects.requireNonNull(data);
this.fortranOrder = fortranOrder;
}
@Override
public final int[] shape() {
return shape;
}
@Override
public final boolean hasColumnOrder() {
return fortranOrder;
}
@Override
public T data() {
return data;
}
protected final int[] copyShape() {
return Arrays.copyOf(shape, shape.length);
}
}

View File

@ -0,0 +1,416 @@
package org.meteoinfo.ndarray.io.npy;
import org.meteoinfo.ndarray.io.npy.dict.NpyHeaderDict;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.util.Arrays;
public final class Array2d {
private Array2d() {
}
/**
* Checks if the given array is a valid 2-dimensional array. We do not check
* this in the other utility methods of this class. So if you are not sure
* if an array is a 2D array you should call this method to check this before
* calling the other methods of this class.
*
* @param array the NPY array to check
* @return {@code true} if the array has 2 dimensions and each dimensions is
* {@code > 1}.
*/
public static boolean isValid(NpyArray<?> array) {
if (array == null || array.shape() == null)
return false;
int[] shape = array.shape();
int rowCount = shape[0];
int colCount = shape[1];
return rowCount > 0
&& colCount > 0
&& rowCount * colCount == array.size();
}
public static int rowCountOf(NpyArray<?> array) {
return array.shape()[0];
}
public static int columnCountOf(NpyArray<?> array) {
return array.shape()[1];
}
public static int indexOf(NpyArray<?> array, int row, int col) {
if (array.hasColumnOrder()) {
int rows = array.shape()[0];
return col * rows + row;
} else {
int cols = array.shape()[1];
return row * cols + col;
}
}
public static boolean get(NpyBooleanArray array, int row, int col) {
int i = indexOf(array, row, col);
return array.data[i];
}
public static boolean[] getRow(NpyBooleanArray array, int row) {
int cols = array.shape[1];
if (!array.hasColumnOrder()) {
int offset = row * cols;
return Arrays.copyOfRange(array.data, offset, offset + cols);
}
int rows = array.shape[0];
boolean[] values = new boolean[cols];
int offset = 0;
for (int col = 0; col < cols; col++) {
values[col] = array.data[offset + row];
offset += rows;
}
return values;
}
public static boolean[] getColumn(NpyBooleanArray array, int col) {
int rows = array.shape[0];
if (array.hasColumnOrder()) {
int offset = col * rows;
return Arrays.copyOfRange(array.data, offset, offset + rows);
}
int cols = array.shape[1];
boolean[] values = new boolean[rows];
int offset = 0;
for (int row = 0; row < rows; row++) {
values[row] = array.data[offset + col];
offset += cols;
}
return values;
}
public static byte get(NpyByteArray array, int row, int col) {
int i = indexOf(array, row, col);
return array.data[i];
}
public static byte[] getRow(NpyByteArray array, int row) {
int cols = array.shape[1];
if (!array.hasColumnOrder()) {
int offset = row * cols;
return Arrays.copyOfRange(array.data, offset, offset + cols);
}
int rows = array.shape[0];
byte[] values = new byte[cols];
int offset = 0;
for (int col = 0; col < cols; col++) {
values[col] = array.data[offset + row];
offset += rows;
}
return values;
}
public static byte[] getColumn(NpyByteArray array, int col) {
int rows = array.shape[0];
if (array.hasColumnOrder()) {
int offset = col * rows;
return Arrays.copyOfRange(array.data, offset, offset + rows);
}
int cols = array.shape[1];
byte[] values = new byte[rows];
int offset = 0;
for (int row = 0; row < rows; row++) {
values[row] = array.data[offset + col];
offset += cols;
}
return values;
}
public static double get(NpyDoubleArray array, int row, int col) {
int i = indexOf(array, row, col);
return array.data[i];
}
public static double[] getRow(NpyDoubleArray array, int row) {
int cols = array.shape[1];
if (!array.hasColumnOrder()) {
int offset = row * cols;
return Arrays.copyOfRange(array.data, offset, offset + cols);
}
int rows = array.shape[0];
double[] values = new double[cols];
int offset = 0;
for (int col = 0; col < cols; col++) {
values[col] = array.data[offset + row];
offset += rows;
}
return values;
}
public static double[] getColumn(NpyDoubleArray array, int col) {
int rows = array.shape[0];
if (array.hasColumnOrder()) {
int offset = col * rows;
return Arrays.copyOfRange(array.data, offset, offset + rows);
}
int cols = array.shape[1];
double[] values = new double[rows];
int offset = 0;
for (int row = 0; row < rows; row++) {
values[row] = array.data[offset + col];
offset += cols;
}
return values;
}
public static float get(NpyFloatArray array, int row, int col) {
int i = indexOf(array, row, col);
return array.data[i];
}
public static float[] getRow(NpyFloatArray array, int row) {
int cols = array.shape[1];
if (!array.hasColumnOrder()) {
int offset = row * cols;
return Arrays.copyOfRange(array.data, offset, offset + cols);
}
int rows = array.shape[0];
float[] values = new float[cols];
int offset = 0;
for (int col = 0; col < cols; col++) {
values[col] = array.data[offset + row];
offset += rows;
}
return values;
}
public static float[] getColumn(NpyFloatArray array, int col) {
int rows = array.shape[0];
if (array.hasColumnOrder()) {
int offset = col * rows;
return Arrays.copyOfRange(array.data, offset, offset + rows);
}
int cols = array.shape[1];
float[] values = new float[rows];
int offset = 0;
for (int row = 0; row < rows; row++) {
values[row] = array.data[offset + col];
offset += cols;
}
return values;
}
public static int get(NpyIntArray array, int row, int col) {
int i = indexOf(array, row, col);
return array.data[i];
}
public static int[] getRow(NpyIntArray array, int row) {
int cols = array.shape[1];
if (!array.hasColumnOrder()) {
int offset = row * cols;
return Arrays.copyOfRange(array.data, offset, offset + cols);
}
int rows = array.shape[0];
int[] values = new int[cols];
int offset = 0;
for (int col = 0; col < cols; col++) {
values[col] = array.data[offset + row];
offset += rows;
}
return values;
}
public static int[] getColumn(NpyIntArray array, int col) {
int rows = array.shape[0];
if (array.hasColumnOrder()) {
int offset = col * rows;
return Arrays.copyOfRange(array.data, offset, offset + rows);
}
int cols = array.shape[1];
int[] values = new int[rows];
int offset = 0;
for (int row = 0; row < rows; row++) {
values[row] = array.data[offset + col];
offset += cols;
}
return values;
}
public static long get(NpyLongArray array, int row, int col) {
int i = indexOf(array, row, col);
return array.data[i];
}
public static long[] getRow(NpyLongArray array, int row) {
int cols = array.shape[1];
if (!array.hasColumnOrder()) {
int offset = row * cols;
return Arrays.copyOfRange(array.data, offset, offset + cols);
}
int rows = array.shape[0];
long[] values = new long[cols];
int offset = 0;
for (int col = 0; col < cols; col++) {
values[col] = array.data[offset + row];
offset += rows;
}
return values;
}
public static long[] getColumn(NpyLongArray array, int col) {
int rows = array.shape[0];
if (array.hasColumnOrder()) {
int offset = col * rows;
return Arrays.copyOfRange(array.data, offset, offset + rows);
}
int cols = array.shape[1];
long[] values = new long[rows];
int offset = 0;
for (int row = 0; row < rows; row++) {
values[row] = array.data[offset + col];
offset += cols;
}
return values;
}
public static short get(NpyShortArray array, int row, int col) {
int i = indexOf(array, row, col);
return array.data[i];
}
public static short[] getRow(NpyShortArray array, int row) {
int cols = array.shape[1];
if (!array.hasColumnOrder()) {
int offset = row * cols;
return Arrays.copyOfRange(array.data, offset, offset + cols);
}
int rows = array.shape[0];
short[] values = new short[cols];
int offset = 0;
for (int col = 0; col < cols; col++) {
values[col] = array.data[offset + row];
offset += rows;
}
return values;
}
public static short[] getColumn(NpyShortArray array, int col) {
int rows = array.shape[0];
if (array.hasColumnOrder()) {
int offset = col * rows;
return Arrays.copyOfRange(array.data, offset, offset + rows);
}
int cols = array.shape[1];
short[] values = new short[rows];
int offset = 0;
for (int row = 0; row < rows; row++) {
values[row] = array.data[offset + col];
offset += cols;
}
return values;
}
public static NpyArray<?> readRow(File file, int row) {
try (RandomAccessFile raf = new RandomAccessFile(file, "r");
java.nio.channels.FileChannel channel = raf.getChannel()) {
NpyHeader header = NpyHeader.read(channel);
return readRow(raf, header, row);
} catch (IOException e) {
throw new RuntimeException(
"failed to read a row " + row + " from NPY file " + file, e);
}
}
public static NpyArray<?> readRow(
RandomAccessFile file, NpyHeader header, int row) {
NpyHeaderDict dict = header.dict();
int rows = dict.sizeOfDimension(0);
int columns = dict.sizeOfDimension(1);
return dict.hasFortranOrder()
? Npy.readElements(file, header, columns, row, rows)
: Npy.readRange(file, header, columns, row * columns);
}
public static NpyArray<?> readColumn(File file, int column) {
try (RandomAccessFile raf = new RandomAccessFile(file, "r");
java.nio.channels.FileChannel channel = raf.getChannel()) {
NpyHeader header = NpyHeader.read(channel);
return readColumn(raf, header, column);
} catch (IOException e) {
throw new RuntimeException(
"failed to read a column " + column + " from NPY file " + file, e);
}
}
public static NpyArray<?> readColumn(
RandomAccessFile file, NpyHeader header, int column) {
NpyHeaderDict dict = header.dict();
int rows = dict.sizeOfDimension(0);
int columns = dict.sizeOfDimension(1);
return dict.hasFortranOrder()
? Npy.readRange(file, header, rows, column * rows)
: Npy.readElements(file, header, rows, column, columns);
}
public static <T extends NpyArray<?>> T switchOrder(T array) {
return OrderSwitch2d.of(array);
}
/**
* Read the diagonal of the 2d-array (matrix) stored in the given NPY file.
*
* @param file the NPY file
* @return the diagonal of the matrix
*/
public static NpyArray<?> readDiag(File file) {
try (RandomAccessFile raf = new RandomAccessFile(file, "r");
java.nio.channels.FileChannel channel = raf.getChannel()) {
NpyHeader header = NpyHeader.read(channel);
return readDiag(raf, header);
} catch (IOException e) {
throw new RuntimeException(
"failed to read diagonal from NPY file " + file, e);
}
}
public static NpyArray<?> readDiag(RandomAccessFile file, NpyHeader header) {
NpyHeaderDict dict = header.dict();
int elemSize = dict.dataType() == NpyDataType.U
? 4
: Math.max(dict.dataType().size(), 1);
int rows = dict.sizeOfDimension(0);
int cols = dict.sizeOfDimension(1);
int n = Math.min(rows, cols);
if (n < 1)
throw new IndexOutOfBoundsException(String.valueOf(n));
try {
NpyArrayReader reader = NpyArrayReader.of(Npy.shape1d(dict, n));
ByteBuffer buffer = ByteBuffer.allocate(elemSize);
buffer.order(header.byteOrder());
java.nio.channels.FileChannel channel = file.getChannel();
long pos = header.dataOffset();
long seekDist = dict.hasFortranOrder()
? (long) (rows + 1) * elemSize
: (long) (cols + 1) * elemSize;
for (int i = 0; i < n; i++) {
file.seek(pos);
channel.read(buffer);
buffer.flip();
reader.readNextFrom(buffer);
pos += seekDist;
buffer.clear();
}
return reader.finish();
} catch (IOException e) {
throw new RuntimeException(
"failed to read diagonal from NPY file: " + file, e);
}
}
}

View File

@ -0,0 +1,49 @@
package org.meteoinfo.ndarray.io.npy;
import org.meteoinfo.ndarray.Array;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
class ArrayChannelReader {
private static final int MAX_BUFFER_SIZE = 8 * 1024;
private final ReadableByteChannel channel;
private final NpyHeader header;
private ArrayChannelReader(ReadableByteChannel channel, NpyHeader header) {
this.channel = channel;
this.header = header;
}
static Array read(ReadableByteChannel channel, NpyHeader header)
throws IOException, NpyFormatException {
return new ArrayChannelReader(channel, header).read();
}
private Array read() throws IOException, NpyFormatException {
long totalBytes = header.dict().dataSize();
int bufferSize = totalBytes > 0 && totalBytes < ((long) MAX_BUFFER_SIZE)
? (int) totalBytes
: MAX_BUFFER_SIZE;
ArrayReader builder = ArrayReader.of(header.dict());
ByteBuffer buffer = ByteBuffer.allocate(bufferSize);
buffer.order(header.byteOrder());
long readBytes = 0;
while (readBytes < totalBytes) {
int n = channel.read(buffer);
if (n <= 0)
break;
buffer.flip();
builder.readAllFrom(buffer);
buffer.clear();
readBytes += n;
}
return builder.finish();
}
}

View File

@ -0,0 +1,310 @@
package org.meteoinfo.ndarray.io.npy;
import org.meteoinfo.ndarray.Array;
import org.meteoinfo.ndarray.io.npy.dict.NpyHeaderDict;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.util.function.ToIntFunction;
import java.util.function.ToLongFunction;
abstract class ArrayReader {
protected final NpyHeaderDict dict;
protected final int elementCount;
private final int elementSize;
private int pos;
private ArrayReader(NpyHeaderDict dict) {
this.dict = dict;
NpyDataType type = dict.dataType();
this.elementCount = type == NpyDataType.S || type == NpyDataType.U
? dict.typeSize()
: dict.numberOfElements();
this.elementSize = type == NpyDataType.S ? 1
: type == NpyDataType.U ? 4 : type.size();
this.pos = 0;
}
static ArrayReader of(NpyHeaderDict dict) throws NpyFormatException {
switch (dict.dataType()) {
case bool:
return new BooleanBuilder(dict);
case f2:
return new FloatBuilder(dict, NpyUtil::f2ToFloat);
case f4:
return new FloatBuilder(dict, ByteBuffer::getFloat);
case f8:
return new DoubleBuilder(dict);
case i1:
return new ByteBuilder(dict);
case i2:
return new ShortBuilder(dict, ByteBuffer::getShort);
case i4:
return new IntBuilder(dict, ByteBuffer::getInt);
case i8:
return new LongBuilder(dict, ByteBuffer::getLong);
case u1:
return new ShortBuilder(dict, NpyUtil::u1ToShort);
case u2:
return new IntBuilder(dict, NpyUtil::u2ToInt);
case u4:
return new LongBuilder(dict, NpyUtil::u4ToLong);
case u8:
return new BigIntBuilder(dict);
case S:
return new AsciiBuilder(dict);
case U:
return new UnicodeBuilder(dict);
default:
throw new NpyFormatException(
"unsupported data type: " + dict.dataType());
}
}
final void readAllFrom(ByteBuffer buffer) {
while (pos != elementCount && buffer.remaining() >= elementSize) {
nextInto(buffer, pos);
pos++;
}
}
final void readNextFrom(ByteBuffer buffer) {
nextInto(buffer, pos);
pos++;
}
abstract void nextInto(ByteBuffer buffer, int pos);
abstract Object getData();
Array finish() {
return Array.factory(NpyUtil.toMIDataType(dict.dataType()), dict.shape(), getData());
}
private static final class BooleanBuilder extends ArrayReader {
private final boolean[] data;
private BooleanBuilder(NpyHeaderDict dict) {
super(dict);
this.data = new boolean[elementCount];
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = buffer.get() != 0;
}
@Override
Object getData() {
return data;
}
}
private static final class ByteBuilder extends ArrayReader {
private final byte[] data;
private ByteBuilder(NpyHeaderDict dict) {
super(dict);
this.data = new byte[elementCount];
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = buffer.get();
}
@Override
Object getData() {
return data;
}
}
private static final class DoubleBuilder extends ArrayReader {
private final double[] data;
private DoubleBuilder(NpyHeaderDict dict) {
super(dict);
this.data = new double[elementCount];
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = buffer.getDouble();
}
@Override
Object getData() {
return data;
}
}
private static final class FloatBuilder extends ArrayReader {
private final float[] data;
private final ToFloatFunction<ByteBuffer> fn;
private FloatBuilder(NpyHeaderDict dict, ToFloatFunction<ByteBuffer> fn) {
super(dict);
this.data = new float[elementCount];
this.fn = fn;
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = fn.applyAsFloat(buffer);
}
@Override
Object getData() {
return data;
}
}
private static final class IntBuilder extends ArrayReader {
private final int[] data;
private final ToIntFunction<ByteBuffer> fn;
private IntBuilder(NpyHeaderDict dict, ToIntFunction<ByteBuffer> fn) {
super(dict);
this.data = new int[elementCount];
this.fn = fn;
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = fn.applyAsInt(buffer);
}
@Override
Object getData() {
return data;
}
}
private static final class ShortBuilder extends ArrayReader {
private final short[] data;
private final ToShortFunction<ByteBuffer> fn;
private ShortBuilder(NpyHeaderDict dict, ToShortFunction<ByteBuffer> fn) {
super(dict);
this.data = new short[elementCount];
this.fn = fn;
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = fn.applyAsShort(buffer);
}
@Override
Object getData() {
return data;
}
}
private static final class LongBuilder extends ArrayReader {
private final long[] data;
private final ToLongFunction<ByteBuffer> fn;
private LongBuilder(NpyHeaderDict dict, ToLongFunction<ByteBuffer> fn) {
super(dict);
this.data = new long[elementCount];
this.fn = fn;
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = fn.applyAsLong(buffer);
}
@Override
Object getData() {
return data;
}
}
private static final class BigIntBuilder extends ArrayReader {
private final BigInteger[] data;
private BigIntBuilder(NpyHeaderDict dict) {
super(dict);
this.data = new BigInteger[elementCount];
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = NpyUtil.u8ToBigInteger(buffer);
}
@Override
Object getData() {
return data;
}
}
private static final class AsciiBuilder extends ArrayReader {
private final CharBuffer chars;
private char[] data;
private boolean terminated = false;
private AsciiBuilder(NpyHeaderDict dict) {
super(dict);
this.chars = CharBuffer.allocate(elementCount);
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
if (terminated)
return;
char next = (char)buffer.get();
if (next == 0) {
terminated = true;
return;
}
chars.put(next);
}
@Override
Object getData() {
if (chars.remaining() == 0) {
data = chars.array();
} else {
chars.flip();
data = new char[chars.limit()];
chars.get(data, 0, chars.limit());
}
return data;
}
}
private static final class UnicodeBuilder extends ArrayReader {
private final int[] data;
private UnicodeBuilder(NpyHeaderDict dict) {
super(dict);
this.data = new int[elementCount];
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = buffer.getInt();
}
@Override
Object getData() {
return data;
}
}
}

View File

@ -0,0 +1,47 @@
package org.meteoinfo.ndarray.io.npy;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
class ChannelReader {
private static final int MAX_BUFFER_SIZE = 8 * 1024;
private final ReadableByteChannel channel;
private final NpyHeader header;
private ChannelReader(ReadableByteChannel channel, NpyHeader header) {
this.channel = channel;
this.header = header;
}
static NpyArray<?> read(ReadableByteChannel channel, NpyHeader header)
throws IOException, NpyFormatException {
return new ChannelReader(channel, header).read();
}
private NpyArray<?> read() throws IOException, NpyFormatException {
long totalBytes = header.dict().dataSize();
int bufferSize = totalBytes > 0 && totalBytes < ((long) MAX_BUFFER_SIZE)
? (int) totalBytes
: MAX_BUFFER_SIZE;
NpyArrayReader builder = NpyArrayReader.of(header.dict());
ByteBuffer buffer = ByteBuffer.allocate(bufferSize);
buffer.order(header.byteOrder());
long readBytes = 0;
while (readBytes < totalBytes) {
int n = channel.read(buffer);
if (n <= 0)
break;
buffer.flip();
builder.readAllFrom(buffer);
buffer.clear();
readBytes += n;
}
return builder.finish();
}
}

View File

@ -0,0 +1,419 @@
package org.meteoinfo.ndarray.io.npy;
import org.meteoinfo.ndarray.Array;
import org.meteoinfo.ndarray.io.npy.dict.NpyHeaderDict;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.util.function.BiConsumer;
public class Npy {
/**
* Load the content of the given file into NPY array instance.
*
* @param file the NPY file to read
* @return the mapped NPY array
* @throws NpyFormatException if the NPY format is invalid or unsupported
* @throws RuntimeException IO exceptions are wrapped in runtime exceptions
*/
public static Array load(String fileName) {
File file = new File(fileName);
return load(file);
}
/**
* Load the content of the given file into NPY array instance.
*
* @param file the NPY file to read
* @return the mapped NPY array
* @throws NpyFormatException if the NPY format is invalid or unsupported
* @throws RuntimeException IO exceptions are wrapped in runtime exceptions
*/
public static NpyArray<?> read(File file) {
try (RandomAccessFile f = new RandomAccessFile(file, "r");
FileChannel channel = f.getChannel()) {
NpyHeader header = NpyHeader.read(channel);
return ChannelReader.read(channel, header);
} catch (IOException e) {
throw new RuntimeException("failed to read file: " + file, e);
}
}
/**
* Load the content of the given file into NPY array instance.
*
* @param file the NPY file to read
* @return the mapped NPY array
* @throws NpyFormatException if the NPY format is invalid or unsupported
* @throws RuntimeException IO exceptions are wrapped in runtime exceptions
*/
public static Array load(File file) {
try (RandomAccessFile f = new RandomAccessFile(file, "r");
FileChannel channel = f.getChannel()) {
NpyHeader header = NpyHeader.read(channel);
return ArrayChannelReader.read(channel, header);
} catch (IOException e) {
throw new RuntimeException("failed to read file: " + file, e);
}
}
/**
* Load the content of the given file into NPY array instance.
*
* @param channel the NPY file to read
* @return the mapped NPY array
* @throws NpyFormatException if the NPY format is invalid or unsupported
* @throws RuntimeException IO exceptions are wrapped in runtime exceptions
*/
public static Array load(ReadableByteChannel channel) {
try {
NpyHeader header = NpyHeader.read(channel);
return ArrayChannelReader.read(channel, header);
} catch (IOException e) {
throw new RuntimeException("failed to read NPY array from channel", e);
}
}
public static NpyArray<?> read(ReadableByteChannel channel) {
try {
NpyHeader header = NpyHeader.read(channel);
return ChannelReader.read(channel, header);
} catch (IOException e) {
throw new RuntimeException("failed to read NPY array from channel", e);
}
}
/**
* Opens the given file as a random access file and reads the NPY header. It
* calls the given consumer with the opened file and header and closes the
* file when the consumer returns. This is useful when you want to do multiple
* operations on an NPY file, e.g. read multiple columns.
*
* @param file the NPY file
* @param fn a consumer of the opened random access file and NPY header
*/
public static void use(File file, BiConsumer<RandomAccessFile, NpyHeader> fn) {
try (RandomAccessFile raf = new RandomAccessFile(file, "r");
FileChannel channel = raf.getChannel()) {
NpyHeader header = NpyHeader.read(channel);
fn.accept(raf, header);
} catch (IOException e) {
throw new RuntimeException("failed to use NPY file: " + file, e);
}
}
/**
* Reads a range of {@code n} elements from an array in an NPY file. Say you
* have an NPY file with an array {@code [1, 2, 3, 4]}, then
* {@code readRange(file, 2, 1)} would read {@code 2} elements starting from
* an offset of {@code 1} and thus would return {@code [2, 3]}.
*
* @param file a NPY file
* @param n the number of elements that should be read from the file
* @param offset the 0-based position of the first element of the range
* @return a one-dimensional array with {@code n} elements
*/
public static NpyArray<?> readRange(File file, int n, int offset) {
try (RandomAccessFile raf = new RandomAccessFile(file, "r");
FileChannel channel = raf.getChannel()) {
NpyHeader header = NpyHeader.read(channel);
return readRange(raf, header, n, offset);
} catch (IOException e) {
throw new RuntimeException(
"failed to read a range of " +
n + " elements from NPY file " + file, e);
}
}
/**
* Same as {@link #readRange(File, int, int)} but with an opened NPY file.
*
* @param file the opened NPY file
* @param header the NPY header of the file
* @param n the number of elements that should be read from the file
* @param offset the 0-based position of the first element of the range
* @return a one-dimensional array with {@code n} elements
*/
public static NpyArray<?> readRange(
RandomAccessFile file, NpyHeader header, int n, int offset) {
NpyHeaderDict dict = header.dict();
int elemSize = dict.dataType() == NpyDataType.U
? 4
: Math.max(dict.dataType().size(), 1);
try {
// seek to the reading position and read the data
long start = header.dataOffset();
if (offset > 0) {
start += (long) elemSize * (long) offset;
}
file.seek(start);
int byteCount = n * elemSize;
ByteBuffer buffer = ByteBuffer.allocate(byteCount)
.order(dict.byteOrder().toJava());
if (file.getChannel().read(buffer) < byteCount) {
throw new IndexOutOfBoundsException(
"failed to read " + n + " elements from file");
}
buffer.flip();
// read the range into an array
NpyHeaderDict rangeDict = shape1d(dict, n);
NpyArrayReader reader = NpyArrayReader.of(rangeDict);
reader.readAllFrom(buffer);
return reader.finish();
} catch (IOException e) {
throw new RuntimeException(
"failed to read range from NPY file: " + file, e);
}
}
public static NpyArray<?> readElements(File file, int n, int offset, int inc) {
try (RandomAccessFile raf = new RandomAccessFile(file, "r");
FileChannel channel = raf.getChannel()) {
NpyHeader header = NpyHeader.read(channel);
return readElements(raf, header, n, offset, inc);
} catch (IOException e) {
throw new RuntimeException(
"failed to read " + n + " elements from NPY file " + file, e);
}
}
public static NpyArray<?> readElements(
RandomAccessFile file, NpyHeader header, int n, int offset, int inc) {
if (inc == 1)
return readRange(file, header, n, offset);
NpyHeaderDict dict = header.dict();
int elemSize = dict.dataType() == NpyDataType.U
? 4
: Math.max(dict.dataType().size(), 1);
try {
// read the data
ByteBuffer buffer = ByteBuffer.allocate(n * elemSize)
.order(dict.byteOrder().toJava());
long fileOffset = header.dataOffset();
if (offset > 0) {
fileOffset += (long) elemSize * (long) offset;
}
FileChannel channel = file.getChannel();
for (int i = 0; i < n; i++) {
file.seek(fileOffset + (long) i * inc * elemSize);
buffer.limit(buffer.position() + elemSize);
if (channel.read(buffer) < elemSize) {
throw new IndexOutOfBoundsException(
"failed to read " + n + " elements from file");
}
}
buffer.flip();
// read the range into an array
NpyHeaderDict rangeDict = shape1d(dict, n);
NpyArrayReader reader = NpyArrayReader.of(rangeDict);
reader.readAllFrom(buffer);
return reader.finish();
} catch (IOException e) {
throw new RuntimeException(
"failed to read elements from NPY file: " + file, e);
}
}
static NpyHeaderDict shape1d(NpyHeaderDict dict, int n) {
return NpyHeaderDict.of(dict.dataType())
.withTypeSize(dict.typeSize())
.withByteOrder(dict.byteOrder())
.withFortranOrder(dict.hasFortranOrder())
.withShape(new int[]{n})
.create();
}
/**
* Save array data to a npy file
*
* @param fileName The file path
* @param array The data array
*/
public static void save(String fileName, Array array) {
File file = new File(fileName);
save(file, array);
}
/**
* Save array data to a npy file
*
* @param file The file
* @param array The data array
*/
public static void save(File file, Array array) {
try (RandomAccessFile f = new RandomAccessFile(file, "rw");
FileChannel channel = f.getChannel()) {
save(channel, array);
} catch (IOException e) {
throw new RuntimeException("failed to write array to file " + file, e);
}
}
/**
* Save array data to a npy file
*
* @param channel The file channel
* @param array The data array
*/
public static void save(WritableByteChannel channel, Array array) {
try {
NpyDataType dataType = NpyUtil.toNpyDataType(array.getDataType());
// write the header
NpyHeaderDict dict = NpyHeaderDict.of(dataType)
.withShape(array.getShape())
.withFortranOrder(false)
.withByteOrder(NpyByteOrder.LITTLE_ENDIAN)
.create();
channel.write(ByteBuffer.wrap(dict.toNpyHeader()));
// allocate a buffer
ByteBuffer buffer = array.getDataAsByteBuffer(ByteOrder.LITTLE_ENDIAN);
channel.write(buffer);
} catch (IOException e) {
throw new RuntimeException("failed to write NPY array to channel", e);
}
}
public static void save(OutputStream outputStream, Array array) {
WritableByteChannel channel = Channels.newChannel(outputStream);
save(channel, array);
}
public static void write(File file, NpyHeaderDict dict, byte[] data) {
try (RandomAccessFile f = new RandomAccessFile(file, "rw");
FileChannel channel = f.getChannel()) {
write(channel, dict, data);
} catch (IOException e) {
throw new RuntimeException("failed to write npy data to file " + file, e);
}
}
private static void write(
WritableByteChannel channel, NpyHeaderDict dict, byte[] data) {
try {
byte[] header = dict.toNpyHeader();
channel.write(ByteBuffer.wrap(header));
channel.write(ByteBuffer.wrap(data));
} catch (Exception e) {
throw new RuntimeException("failed to write npy data", e);
}
}
public static void write(File file, NpyArray<?> array) {
try (RandomAccessFile f = new RandomAccessFile(file, "rw");
FileChannel channel = f.getChannel()) {
write(channel, array);
} catch (IOException e) {
throw new RuntimeException("failed to write array to file " + file, e);
}
}
public static void write(WritableByteChannel channel, NpyArray<?> array) {
try {
NpyDataType dataType = array.dataType();
// handle strings
if (array.isCharArray()) {
NpyCharArray charArray = array.asCharArray();
NpyHeaderDict dict = NpyHeaderDict.of(dataType)
.withByteOrder(dataType == NpyDataType.S
? NpyByteOrder.NOT_APPLICABLE
: NpyByteOrder.LITTLE_ENDIAN)
.withTypeSize(charArray.size())
.create();
byte[] bytes = charArray.asByteArray().data();
Npy.write(channel, dict, bytes);
return;
}
// write the header
NpyHeaderDict dict = NpyHeaderDict.of(dataType)
.withShape(array.shape())
.withFortranOrder(array.hasColumnOrder())
.withByteOrder(NpyByteOrder.LITTLE_ENDIAN)
.create();
channel.write(ByteBuffer.wrap(dict.toNpyHeader()));
// allocate a buffer
long totalBytes = dict.dataSize();
int maxBufferSize = 8 * 1024;
int bufferSize = totalBytes < maxBufferSize
? (int) totalBytes
: maxBufferSize;
ByteBuffer buffer = ByteBuffer.allocate(bufferSize)
.order(ByteOrder.LITTLE_ENDIAN);
// write data to the channel
for (int i = 0; i < array.size(); i++) {
array.writeElementTo(i, buffer);
if (!buffer.hasRemaining()) {
buffer.flip();
channel.write(buffer);
buffer.clear();
}
}
if (buffer.position() > 0) {
buffer.flip();
channel.write(buffer);
}
} catch (IOException e) {
throw new RuntimeException("failed to write NPY array to channel", e);
}
}
public static void write(OutputStream stream, NpyArray<?> array) {
// do not close the channel here because it would
// close the underlying output stream which is
// not the idea of this function.
WritableByteChannel channel = Channels.newChannel(stream);
write(channel, array);
}
public static NpyArray<?> memmap(File file) {
try (RandomAccessFile f = new RandomAccessFile(file, "r");
FileChannel channel = f.getChannel()) {
NpyHeader header = NpyHeader.read(channel);
long dataSize = header.dict().dataSize();
// only a buffer of size < Integer.MAX_VALUE can be mapped
// into memory. if the size of the stored array is larger
// we take the normal reader currently
long max = Integer.MAX_VALUE;
if (dataSize >= max)
return ChannelReader.read(channel, header);
java.nio.MappedByteBuffer buffer = channel.map(
FileChannel.MapMode.READ_ONLY, header.dataOffset(), dataSize);
buffer.order(header.byteOrder());
NpyArrayReader builder = NpyArrayReader.of(header.dict());
builder.readAllFrom(buffer);
return builder.finish();
} catch (IOException e) {
throw new RuntimeException("failed to memmap NPY file: " + file, e);
}
}
}

View File

@ -0,0 +1,124 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.ByteBuffer;
public interface NpyArray<T> {
T data();
int[] shape();
/**
* Returns {@code true} when this array is stored in column-major order
* (Fortran order).
*/
boolean hasColumnOrder();
/**
* Returns {@code true} when this array is stored in row-major order (C order).
*/
default boolean hasRowOrder() {
return !hasColumnOrder();
}
NpyDataType dataType();
/**
* Get the element by index.
*
* @param i Index
*
* @return The element
*/
Object getElement(int i);
/**
* Writes the element {@code i} of this array to the given buffer.
*
* @param i the 0-based position of the element in this array that should
* be written to the buffer; must have a value between {@code 0}
* inclusively and {@link #size()} exclusively.
* @param buffer the byte buffer to which the element should be written
*/
void writeElementTo(int i, ByteBuffer buffer);
/**
* Returns the size of this array. That is the number of elements of this
* array.
*
* @return the number of elements of this array
*/
int size();
default boolean isBigIntegerArray() {
return false;
}
default boolean isBooleanArray() {
return false;
}
NpyBooleanArray asBooleanArray();
/**
* Returns true if this array is an instance of {@link NpyByteArray}.
*/
default boolean isByteArray() {
return false;
}
/**
* Converts this array into an instance of {@link NpyByteArray}. If this
* array is already such an instance it is directly returned without copying.
* Otherwise the values of this array are casted into a new
* {@link NpyByteArray}. Note that such casting can result in data loss.
*
* @return this array as an instance of {@link NpyByteArray}
*/
NpyByteArray asByteArray();
default boolean isDoubleArray() {
return false;
}
/**
* Convert this array into a double array. If this array is already a double
* array it is directly returned without making a copy of it.
*
* @return this array if it is a double array, otherwise a converted array
*/
NpyDoubleArray asDoubleArray();
default boolean isFloatArray() {
return false;
}
NpyFloatArray asFloatArray();
default boolean isIntArray() {
return false;
}
NpyIntArray asIntArray();
default boolean isLongArray() {
return false;
}
NpyLongArray asLongArray();
default boolean isShortArray() {
return false;
}
NpyShortArray asShortArray();
default boolean isCharArray() {
return false;
}
default NpyCharArray asCharArray() {
return asIntArray().asCharArray();
}
}

View File

@ -0,0 +1,306 @@
package org.meteoinfo.ndarray.io.npy;
import org.meteoinfo.ndarray.io.npy.dict.NpyHeaderDict;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.util.function.ToIntFunction;
import java.util.function.ToLongFunction;
abstract class NpyArrayReader {
protected final NpyHeaderDict dict;
protected final int elementCount;
private final int elementSize;
private int pos;
private NpyArrayReader(NpyHeaderDict dict) {
this.dict = dict;
NpyDataType type = dict.dataType();
this.elementCount = type == NpyDataType.S || type == NpyDataType.U
? dict.typeSize()
: dict.numberOfElements();
this.elementSize = type == NpyDataType.S ? 1
: type == NpyDataType.U ? 4 : type.size();
this.pos = 0;
}
static NpyArrayReader of(NpyHeaderDict dict) throws NpyFormatException {
switch (dict.dataType()) {
case bool:
return new BooleanBuilder(dict);
case f2:
return new FloatBuilder(dict, NpyUtil::f2ToFloat);
case f4:
return new FloatBuilder(dict, ByteBuffer::getFloat);
case f8:
return new DoubleBuilder(dict);
case i1:
return new ByteBuilder(dict);
case i2:
return new ShortBuilder(dict, ByteBuffer::getShort);
case i4:
return new IntBuilder(dict, ByteBuffer::getInt);
case i8:
return new LongBuilder(dict, ByteBuffer::getLong);
case u1:
return new ShortBuilder(dict, NpyUtil::u1ToShort);
case u2:
return new IntBuilder(dict, NpyUtil::u2ToInt);
case u4:
return new LongBuilder(dict, NpyUtil::u4ToLong);
case u8:
return new BigIntBuilder(dict);
case S:
return new AsciiBuilder(dict);
case U:
return new UnicodeBuilder(dict);
default:
throw new NpyFormatException(
"unsupported data type: " + dict.dataType());
}
}
final void readAllFrom(ByteBuffer buffer) {
while (pos != elementCount && buffer.remaining() >= elementSize) {
nextInto(buffer, pos);
pos++;
}
}
final void readNextFrom(ByteBuffer buffer) {
nextInto(buffer, pos);
pos++;
}
abstract void nextInto(ByteBuffer buffer, int pos);
abstract NpyArray<?> finish();
private static final class BooleanBuilder extends NpyArrayReader {
private final boolean[] data;
private BooleanBuilder(NpyHeaderDict dict) {
super(dict);
this.data = new boolean[elementCount];
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = buffer.get() != 0;
}
@Override
NpyBooleanArray finish() {
return new NpyBooleanArray(dict.shape(), data, dict.hasFortranOrder());
}
}
private static final class ByteBuilder extends NpyArrayReader {
private final byte[] data;
private ByteBuilder(NpyHeaderDict dict) {
super(dict);
this.data = new byte[elementCount];
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = buffer.get();
}
@Override
NpyByteArray finish() {
return new NpyByteArray(dict.shape(), data, dict.hasFortranOrder());
}
}
private static final class DoubleBuilder extends NpyArrayReader {
private final double[] data;
private DoubleBuilder(NpyHeaderDict dict) {
super(dict);
this.data = new double[elementCount];
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = buffer.getDouble();
}
@Override
NpyDoubleArray finish() {
return new NpyDoubleArray(dict.shape(), data, dict.hasFortranOrder());
}
}
private static final class FloatBuilder extends NpyArrayReader {
private final float[] data;
private final ToFloatFunction<ByteBuffer> fn;
private FloatBuilder(NpyHeaderDict dict, ToFloatFunction<ByteBuffer> fn) {
super(dict);
this.data = new float[elementCount];
this.fn = fn;
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = fn.applyAsFloat(buffer);
}
@Override
NpyFloatArray finish() {
return new NpyFloatArray(dict.shape(), data, dict.hasFortranOrder());
}
}
private static final class IntBuilder extends NpyArrayReader {
private final int[] data;
private final ToIntFunction<ByteBuffer> fn;
private IntBuilder(NpyHeaderDict dict, ToIntFunction<ByteBuffer> fn) {
super(dict);
this.data = new int[elementCount];
this.fn = fn;
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = fn.applyAsInt(buffer);
}
@Override
NpyIntArray finish() {
return new NpyIntArray(dict.shape(), data, dict.hasFortranOrder());
}
}
private static final class ShortBuilder extends NpyArrayReader {
private final short[] data;
private final ToShortFunction<ByteBuffer> fn;
private ShortBuilder(NpyHeaderDict dict, ToShortFunction<ByteBuffer> fn) {
super(dict);
this.data = new short[elementCount];
this.fn = fn;
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = fn.applyAsShort(buffer);
}
@Override
NpyShortArray finish() {
return new NpyShortArray(dict.shape(), data, dict.hasFortranOrder());
}
}
private static final class LongBuilder extends NpyArrayReader {
private final long[] data;
private final ToLongFunction<ByteBuffer> fn;
private LongBuilder(NpyHeaderDict dict, ToLongFunction<ByteBuffer> fn) {
super(dict);
this.data = new long[elementCount];
this.fn = fn;
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = fn.applyAsLong(buffer);
}
@Override
NpyLongArray finish() {
return new NpyLongArray(dict.shape(), data, dict.hasFortranOrder());
}
}
private static final class BigIntBuilder extends NpyArrayReader {
private final BigInteger[] data;
private BigIntBuilder(NpyHeaderDict dict) {
super(dict);
this.data = new BigInteger[elementCount];
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = NpyUtil.u8ToBigInteger(buffer);
}
@Override
NpyBigIntArray finish() {
return new NpyBigIntArray(dict.shape(), data, dict.hasFortranOrder());
}
}
private static final class AsciiBuilder extends NpyArrayReader {
private final CharBuffer chars;
private boolean terminated = false;
private AsciiBuilder(NpyHeaderDict dict) {
super(dict);
this.chars = CharBuffer.allocate(elementCount);
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
if (terminated)
return;
char next = (char)buffer.get();
if (next == 0) {
terminated = true;
return;
}
chars.put(next);
}
@Override
NpyCharArray finish() {
char[] data;
if (chars.remaining() == 0) {
data = chars.array();
} else {
chars.flip();
data = new char[chars.limit()];
chars.get(data, 0, chars.limit());
}
return new NpyCharArray(dict.shape(), data, dict.hasFortranOrder());
}
}
private static final class UnicodeBuilder extends NpyArrayReader {
private final int[] data;
private UnicodeBuilder(NpyHeaderDict dict) {
super(dict);
this.data = new int[elementCount];
}
@Override
void nextInto(ByteBuffer buffer, int pos) {
data[pos] = buffer.getInt();
}
@Override
NpyCharArray finish() {
NpyIntArray ints = new NpyIntArray(dict.shape(), data, dict.hasFortranOrder());
return ints.asCharArray();
}
}
}

View File

@ -0,0 +1,154 @@
package org.meteoinfo.ndarray.io.npy;
import java.math.BigInteger;
import java.nio.ByteBuffer;
public final class NpyBigIntArray extends AbstractNpyArray<BigInteger[]> {
public NpyBigIntArray(int[] shape, BigInteger[] data, boolean fortranOrder) {
super(shape, data, fortranOrder);
}
public static NpyBigIntArray vectorOf(BigInteger[] data) {
return new NpyBigIntArray(new int[] {data.length}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in row-major order (C order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyBigIntArray rowOrderOf(BigInteger[] data, int rows, int cols) {
return new NpyBigIntArray(new int[]{rows, cols}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in column-major order (
* Fortran order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyBigIntArray columnOrderOf(BigInteger[] data, int rows, int cols) {
return new NpyBigIntArray(new int[]{rows, cols}, data, true);
}
@Override
public NpyDataType dataType() {
return NpyDataType.u8;
}
@Override
public int size() {
return data.length;
}
@Override
public Object getElement(int i) {
return data[i];
}
@Override
public void writeElementTo(int i, ByteBuffer buffer) {
BigInteger value = data[i];
if (value == null) {
buffer.putLong(0);
} else {
buffer.putLong(value.longValueExact());
}
}
@Override
public boolean isBigIntegerArray() {
return true;
}
@Override
public NpyBooleanArray asBooleanArray() {
boolean[] booleans = new boolean[data.length];
for (int i = 0; i < data.length; i++) {
BigInteger val = data[i];
if (val != null) {
booleans[i] = val.longValueExact() != 0;
}
}
return new NpyBooleanArray(copyShape(), booleans, fortranOrder);
}
@Override
public NpyByteArray asByteArray() {
byte[] bytes = new byte[data.length];
for (int i = 0; i < data.length; i++) {
BigInteger val = data[i];
if (val != null) {
bytes[i] = (byte) val.intValueExact();
}
}
return new NpyByteArray(copyShape(), bytes, fortranOrder);
}
@Override
public NpyDoubleArray asDoubleArray() {
double[] doubles = new double[data.length];
for (int i = 0; i < data.length; i++) {
BigInteger val = data[i];
if (val != null) {
doubles[i] = val.doubleValue();
}
}
return new NpyDoubleArray(copyShape(), doubles, fortranOrder);
}
@Override
public NpyFloatArray asFloatArray() {
float[] floats = new float[data.length];
for (int i = 0; i < data.length; i++) {
BigInteger val = data[i];
if (val != null) {
floats[i] = (float) val.doubleValue();
}
}
return new NpyFloatArray(copyShape(), floats, fortranOrder);
}
@Override
public NpyIntArray asIntArray() {
int[] ints = new int[data.length];
for (int i = 0; i < data.length; i++) {
BigInteger val = data[i];
if (val != null) {
ints[i] = val.intValueExact();
}
}
return new NpyIntArray(copyShape(), ints, fortranOrder);
}
@Override
public NpyLongArray asLongArray() {
long[] longs = new long[data.length];
for (int i = 0; i < data.length; i++) {
BigInteger val = data[i];
if (val != null) {
longs[i] = val.longValueExact();
}
}
return new NpyLongArray(copyShape(), longs, fortranOrder);
}
@Override
public NpyShortArray asShortArray() {
short[] shorts = new short[data.length];
for (int i = 0; i < data.length; i++) {
BigInteger val = data[i];
if (val != null) {
shorts[i] = (short) val.intValueExact();
}
}
return new NpyShortArray(copyShape(), shorts, fortranOrder);
}
}

View File

@ -0,0 +1,138 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.ByteBuffer;
public final class NpyBooleanArray extends AbstractNpyArray<boolean[]> {
public NpyBooleanArray(int[] shape, boolean[] data, boolean fortranOrder) {
super(shape, data, fortranOrder);
}
public static NpyBooleanArray vectorOf(boolean[] data) {
return new NpyBooleanArray(new int[] {data.length}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in row-major order (C order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyBooleanArray rowOrderOf(boolean[] data, int rows, int cols) {
return new NpyBooleanArray(new int[]{rows, cols}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in column-major order (
* Fortran order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyBooleanArray columnOrderOf(boolean[] data, int rows, int cols) {
return new NpyBooleanArray(new int[]{rows, cols}, data, true);
}
@Override
public NpyDataType dataType() {
return NpyDataType.bool;
}
@Override
public int size() {
return data.length;
}
@Override
public Object getElement(int i) {
return data[i];
}
@Override
public void writeElementTo(int i, ByteBuffer buffer) {
byte b = data[i] ? (byte) 1 : (byte) 0;
buffer.put(b);
}
@Override
public boolean isBooleanArray() {
return true;
}
@Override
public NpyBooleanArray asBooleanArray() {
return this;
}
@Override
public NpyByteArray asByteArray() {
byte[] bytes = new byte[data.length];
for (int i = 0; i < data.length; i++) {
if (data[i]) {
bytes[i] = 1;
}
}
return new NpyByteArray(copyShape(), bytes, fortranOrder);
}
@Override
public NpyDoubleArray asDoubleArray() {
double[] doubles = new double[data.length];
for (int i = 0; i < data.length; i++) {
if (data[i]) {
doubles[i] = 1d;
}
}
return new NpyDoubleArray(copyShape(), doubles, fortranOrder);
}
@Override
public NpyFloatArray asFloatArray() {
float[] floats = new float[data.length];
for (int i = 0; i < data.length; i++) {
if (data[i]) {
floats[i] = 1f;
}
}
return new NpyFloatArray(copyShape(), floats, fortranOrder);
}
@Override
public NpyIntArray asIntArray() {
int[] ints = new int[data.length];
for (int i = 0; i < data.length; i++) {
if (data[i]) {
ints[i] = 1;
}
}
return new NpyIntArray(copyShape(), ints, fortranOrder);
}
@Override
public NpyLongArray asLongArray() {
long[] longs = new long[data.length];
for (int i = 0; i < data.length; i++) {
if (data[i]) {
longs[i] = 1L;
}
}
return new NpyLongArray(copyShape(), longs, fortranOrder);
}
@Override
public NpyShortArray asShortArray() {
short[] shorts = new short[data.length];
for (int i = 0; i < data.length; i++) {
if (data[i]) {
shorts[i] = 1;
}
}
return new NpyShortArray(copyShape(), shorts, fortranOrder);
}
}

View File

@ -0,0 +1,124 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.ByteBuffer;
public final class NpyByteArray extends AbstractNpyArray<byte[]> {
public NpyByteArray(int[] shape, byte[] data, boolean fortranOrder) {
super(shape, data, fortranOrder);
}
public static NpyByteArray vectorOf(byte[] data) {
return new NpyByteArray(new int[] {data.length}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in row-major order (C order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyByteArray rowOrderOf(byte[] data, int rows, int cols) {
return new NpyByteArray(new int[]{rows, cols}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in column-major order (
* Fortran order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyByteArray columnOrderOf(byte[] data, int rows, int cols) {
return new NpyByteArray(new int[]{rows, cols}, data, true);
}
@Override
public NpyDataType dataType() {
return NpyDataType.i1;
}
@Override
public int size() {
return data.length;
}
@Override
public Object getElement(int i) {
return data[i];
}
@Override
public void writeElementTo(int i, ByteBuffer buffer) {
buffer.put(data[i]);
}
@Override
public boolean isByteArray() {
return true;
}
@Override
public NpyByteArray asByteArray() {
return this;
}
@Override
public NpyBooleanArray asBooleanArray() {
boolean[] booleans = new boolean[data.length];
for (int i = 0; i < data.length; i++) {
booleans[i] = i != 0;
}
return new NpyBooleanArray(copyShape(), booleans, fortranOrder);
}
@Override
public NpyDoubleArray asDoubleArray() {
double[] doubles = new double[data.length];
for (int i = 0; i < data.length; i++) {
doubles[i] = data[i];
}
return new NpyDoubleArray(copyShape(), doubles, fortranOrder);
}
@Override
public NpyFloatArray asFloatArray() {
float[] floats = new float[data.length];
for (int i = 0; i < data.length; i++) {
floats[i] = data[i];
}
return new NpyFloatArray(copyShape(), floats, fortranOrder);
}
@Override
public NpyIntArray asIntArray() {
int[] ints = new int[data.length];
for (int i = 0; i < data.length; i++) {
ints[i] = data[i];
}
return new NpyIntArray(copyShape(), ints, fortranOrder);
}
@Override
public NpyLongArray asLongArray() {
long[] longs = new long[data.length];
for (int i = 0; i < data.length; i++) {
longs[i] = data[i];
}
return new NpyLongArray(copyShape(), longs, fortranOrder);
}
@Override
public NpyShortArray asShortArray() {
short[] shorts = new short[data.length];
for (int i = 0; i < data.length; i++) {
shorts[i] = data[i];
}
return new NpyShortArray(copyShape(), shorts, fortranOrder);
}
}

View File

@ -0,0 +1,71 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.ByteOrder;
public enum NpyByteOrder {
HARDWARE_NATIVE('='),
LITTLE_ENDIAN('<'),
BIG_ENDIAN('>'),
NOT_APPLICABLE('|');
private final char symbol;
NpyByteOrder(char symbol) {
this.symbol = symbol;
}
public char symbol() {
return symbol;
}
/**
* Tries to identify the byte order from a data type description. It tries to
* identify it from the first character:
* <ul>
* <li>{@code =} hardware native</li>
* <li>{@code <} little-endian</li>
* <li>{@code >} big-endian</li>
* <li>{@code |} not applicable</li>
* </ul>
*
* @param description the data type description, eg. {@code <i4}
* @return the detected byte order, or {@link #HARDWARE_NATIVE} if it could
* not detect it
*/
public static NpyByteOrder of(String description) {
if (description == null)
return HARDWARE_NATIVE;
String s = description.trim();
if (s.length() == 0)
return HARDWARE_NATIVE;
char c = s.charAt(0);
for (NpyByteOrder v : values()) {
if (c == v.symbol)
return v;
}
return HARDWARE_NATIVE;
}
/**
* Returns the corresponding Java {@link ByteOrder}.
*/
public ByteOrder toJava() {
switch (this) {
case BIG_ENDIAN:
return ByteOrder.BIG_ENDIAN;
case LITTLE_ENDIAN:
return ByteOrder.LITTLE_ENDIAN;
default:
return ByteOrder.nativeOrder();
}
}
@Override
public String toString() {
return String.valueOf(symbol);
}
}

View File

@ -0,0 +1,152 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
import java.nio.charset.StandardCharsets;
public class NpyCharArray extends AbstractNpyArray<char[]> {
private NpyDataType type;
public NpyCharArray(int[] shape, char[] data) {
super(shape, data, false);
}
public NpyCharArray(int[] shape, char[] data, boolean fortranOrder) {
super(shape, data, fortranOrder);
}
public static NpyCharArray of(String s) {
char[] chars = s.toCharArray();
return new NpyCharArray(new int[0], chars);
}
@Override
public NpyDataType dataType() {
if (type != null)
return type;
boolean isAscii = StandardCharsets.US_ASCII
.newEncoder()
.canEncode(CharBuffer.wrap(data));
type = isAscii
? NpyDataType.S
: NpyDataType.U;
return type;
}
@Override
public Object getElement(int i) {
return data[i];
}
@Override
public void writeElementTo(int i, ByteBuffer buffer) {
if (dataType() == NpyDataType.S) {
buffer.put((byte) data[i]);
} else {
buffer.putInt(data[i]);
}
}
@Override
public int size() {
return data.length;
}
@Override
public boolean isCharArray() {
return true;
}
@Override
public NpyBooleanArray asBooleanArray() {
boolean[] booleans = new boolean[data.length];
for (int i = 0; i < data.length; i++) {
booleans[i] = data[i] != 0;
}
return new NpyBooleanArray(copyShape(), booleans, fortranOrder);
}
/**
* Converts this character array into a byte array. If the characters in this
* array can be encoded in ASCII, a NULL-terminated byte array will be
* returned. Otherwise, an array with the 4-byte unicode code-points encoded
* in little-endian order will be returned.
*
* @return the NPY byte-array representation of this character array
*/
@Override
public NpyByteArray asByteArray() {
NpyDataType type = dataType();
if (type == NpyDataType.S) {
// write as NULL terminated string
byte[] bytes = new byte[data.length + 1];
for (int i = 0; i < data.length; i++) {
bytes[i] = (byte) data[i];
}
return NpyByteArray.vectorOf(bytes);
}
// write unicode code points
byte[] bytes = new byte[data.length * 4];
ByteBuffer buffer = ByteBuffer.wrap(bytes);
buffer.order(ByteOrder.LITTLE_ENDIAN);
for (char datum : data) {
buffer.putInt(datum);
}
return NpyByteArray.vectorOf(bytes);
}
@Override
public NpyCharArray asCharArray() {
return this;
}
@Override
public NpyDoubleArray asDoubleArray() {
return asIntArray().asDoubleArray();
}
@Override
public NpyFloatArray asFloatArray() {
return asIntArray().asFloatArray();
}
@Override
public NpyIntArray asIntArray() {
IntBuffer buffer = IntBuffer.allocate(data.length);
int pos = 0;
while (pos < data.length) {
int codePoint = Character.codePointAt(data, pos);
buffer.put(codePoint);
pos += Character.charCount(codePoint);
}
int[] ints;
if (buffer.remaining() == 0) {
ints = buffer.array();
} else {
buffer.flip();
ints = new int[buffer.limit()];
buffer.get(ints, 0, buffer.limit());
}
return new NpyIntArray(copyShape(), ints, fortranOrder);
}
@Override
public NpyLongArray asLongArray() {
return asIntArray().asLongArray();
}
@Override
public NpyShortArray asShortArray() {
return asIntArray().asShortArray();
}
@Override
public String toString() {
return String.valueOf(data);
}
}

View File

@ -0,0 +1,293 @@
package org.meteoinfo.ndarray.io.npy;
import java.util.EnumSet;
/**
* An enumeration of supported NumPy data types.
*
* @see <a href="https://numpy.org/doc/stable/reference/arrays.dtypes.html">
* https://numpy.org/doc/stable/reference/arrays.dtypes.html</a>
*/
public enum NpyDataType {
/**
* Booleans
*/
bool("?", 1, new String[]{
"b1",
"bool",
"bool8",
"bool_",}),
/**
* 16 bit floating point numbers
*/
f2("f2", 2, new String[]{
"e",
"float16",
"half",
}),
/**
* 32 bit floating point numbers
*/
f4("f4", 4, new String[]{
"f",
"float32",
"single",
}),
/**
* 64 bit floating point numbers
*/
f8("f8", 8, new String[]{
"d",
"double",
"float",
"float64",
"float_",
"g",
"longdouble",
"longfloat",
}),
/**
* 8-bit signed integers
*/
i1("b", 1, new String[]{
"byte",
"i1",
"int8"}),
/**
* 16 bit signed integers
*/
i2("i2", 2, new String[]{
"h",
"int16",
"short",
}),
/**
* 32 bit signed integers
*/
i4("i4", 4, new String[]{
"i",
"int",
"int32",
"int_",
"intc",
"l",
"long",
}),
/**
* 64 bit signed integers
*/
i8("i8", 8, new String[]{
"int0",
"int64",
"intp",
"longlong",
"p",
"q",
}),
/**
* 8-bit unsigned integers
*/
u1("B", 1, new String[]{
"u1",
"ubyte",
"uint8",
}),
/**
* 16 bit unsigned integers
*/
u2("u2", 2, new String[]{
"H",
"uint16",
"ushort",
}),
/**
* 32 bit unsigned integers
*/
u4("u4", 4, new String[]{
"I",
"L",
"uint",
"uint32",
"uintc",
}),
/**
* 64 bit unsigned integers
*/
u8("u8", 8, new String[]{
"P",
"Q",
"Uint64",
"uint0",
"uint64",
"uintp",
"ulonglong",
}),
S("S", 0, new String[]{
"Bytes0",
"a",
"bytes",
"bytes0",
"bytes_",
"string_",
}),
U("U", 0, new String[]{
"Str0",
"str",
"str0",
"str_",
"unicode",
"unicode_",
});
private final String symbol;
private final int size;
private final String[] synonyms;
NpyDataType(String symbol, int size, String[] synonyms) {
this.symbol = symbol;
this.size = size;
this.synonyms = synonyms;
}
@Override
public String toString() {
return symbol;
}
/**
* Get the NPY symbol of the data type.
*
* @return the NPY symbol of the data type, e.g. {@code i4}.
*/
public String symbol() {
return symbol;
}
/**
* For fixed sized data types, returns the number of bytes that are required
* to store a value of this type. Returns {@code 0} for variable sized types
* (the string types {@link #S} and {@link #U}). This method is similar to
* the NumPy method {@code numpy.dtype.itemsize}.
*
* <pre>{@code
* >>> import numpy as np
* >>> np.dtype('i4').itemsize
* 4
* >>> np.dtype('U').itemsize
* 0
* }</pre>
*
* @return for fixed sized types the number of bytes to store a single value,
* for variable sized types like strings {@code 0}.
*/
public int size() {
return size;
}
/**
* Tries to find the data type for the given symbol. The behavior is a bit
* like {@code numpy.dtype('[the type name or symbol]')}. The given symbol
* can have a prefix that indicates the byte order.
*
* @param dtype the data type symbol (e.g. {@code i4, int32, <i4})
* @return the data type or {@code null} if there is no matching type defined.
*/
public static NpyDataType of(String dtype) {
if (dtype == null || dtype.length() == 0)
return null;
char first = dtype.charAt(0);
boolean hasOrderMark = first == '<'
|| first == '>'
|| first == '='
|| first == '|';
String symbol = hasOrderMark
? dtype.substring(1)
: dtype;
for (NpyDataType type : NpyDataType.values()) {
if (symbol.equals(type.symbol()))
return type;
for (String name : type.synonyms) {
if (symbol.equals(name))
return type;
}
}
// for string types the variable length of
// the string is part of the symbol
int digetPos = -1;
for (int i = 0; i < symbol.length(); i++) {
char c = symbol.charAt(i);
if (Character.isDigit(c)) {
digetPos = i;
break;
}
}
if (digetPos == 0)
return null;
if (digetPos > 0) {
symbol = symbol.substring(0, digetPos);
}
for (NpyDataType type : EnumSet.of(S, U)) {
if (type.symbol.equals(symbol))
return type;
for (String syn : type.synonyms) {
if (syn.equals(symbol))
return type;
}
}
return null;
}
/**
* Get the byte order of the given data type description. It tries to identify
* it from the first character of that type description:
*
* <ul>
* <li>{@code =} hardware native</li>
* <li>{@code <} little-endian</li>
* <li>{@code >} big-endian</li>
* <li>{@code |} not applicable</li>
* </ul>
*
* @param dtype the data type description (e.g. {@code <i4})
* @return the identified byte-order or {@link NpyByteOrder#NOT_APPLICABLE} if
* it is not specified in the given type string
* @see <a href="https://numpy.org/doc/stable/reference/generated/numpy.dtype.byteorder.html">
* https://numpy.org/doc/stable/reference/generated/numpy.dtype.byteorder.html</a>
*/
public static NpyByteOrder byteOrderOf(String dtype) {
if (dtype == null || dtype.length() == 0)
return NpyByteOrder.NOT_APPLICABLE;
switch (dtype.charAt(0)) {
case '=':
return NpyByteOrder.HARDWARE_NATIVE;
case '>':
return NpyByteOrder.BIG_ENDIAN;
case '<':
return NpyByteOrder.LITTLE_ENDIAN;
default:
return NpyByteOrder.NOT_APPLICABLE;
}
}
}

View File

@ -0,0 +1,123 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.ByteBuffer;
public final class NpyDoubleArray extends AbstractNpyArray<double[]> {
public NpyDoubleArray(int[] shape, double[] data, boolean fortranOrder) {
super(shape, data, fortranOrder);
}
public static NpyDoubleArray vectorOf(double[] data) {
return new NpyDoubleArray(new int[]{data.length}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in row-major order (C order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyDoubleArray rowOrderOf(double[] data, int rows, int cols) {
return new NpyDoubleArray(new int[]{rows, cols}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in column-major order (
* Fortran order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyDoubleArray columnOrderOf(double[] data, int rows, int cols) {
return new NpyDoubleArray(new int[]{rows, cols}, data, true);
}
@Override
public NpyDataType dataType() {
return NpyDataType.f8;
}
@Override
public int size() {
return data.length;
}
@Override
public Object getElement(int i) {
return data[i];
}
@Override
public void writeElementTo(int i, ByteBuffer buffer) {
buffer.putDouble(data[i]);
}
@Override
public boolean isDoubleArray() {
return true;
}
@Override
public NpyDoubleArray asDoubleArray() {
return this;
}
@Override
public NpyBooleanArray asBooleanArray() {
boolean[] booleans = new boolean[data.length];
for (int i = 0; i < data.length; i++) {
booleans[i] = data[i] != 0;
}
return new NpyBooleanArray(copyShape(), booleans, fortranOrder);
}
@Override
public NpyByteArray asByteArray() {
byte[] bytes = new byte[data.length];
for (int i = 0; i < data.length; i++) {
bytes[i] = (byte) data[i];
}
return new NpyByteArray(copyShape(), bytes, fortranOrder);
}
@Override
public NpyFloatArray asFloatArray() {
float[] floats = new float[data.length];
for (int i = 0; i < data.length; i++) {
floats[i] = (float) data[i];
}
return new NpyFloatArray(copyShape(), floats, fortranOrder);
}
@Override
public NpyIntArray asIntArray() {
int[] ints = new int[data.length];
for (int i = 0; i < data.length; i++) {
ints[i] = (int) data[i];
}
return new NpyIntArray(copyShape(), ints, fortranOrder);
}
@Override
public NpyLongArray asLongArray() {
long[] longs = new long[data.length];
for (int i = 0; i < data.length; i++) {
longs[i] = (long) data[i];
}
return new NpyLongArray(copyShape(), longs, fortranOrder);
}
@Override
public NpyShortArray asShortArray() {
short[] shorts = new short[data.length];
for (int i = 0; i < data.length; i++) {
shorts[i] = (short) data[i];
}
return new NpyShortArray(copyShape(), shorts, fortranOrder);
}
}

View File

@ -0,0 +1,123 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.ByteBuffer;
public final class NpyFloatArray extends AbstractNpyArray<float[]> {
public NpyFloatArray(int[] shape, float[] data, boolean fortranOrder) {
super(shape, data, fortranOrder);
}
public static NpyFloatArray vectorOf(float[] data) {
return new NpyFloatArray(new int[]{data.length}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in row-major order (C order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyFloatArray rowOrderOf(float[] data, int rows, int cols) {
return new NpyFloatArray(new int[]{rows, cols}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in column-major order (
* Fortran order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyFloatArray columnOrderOf(float[] data, int rows, int cols) {
return new NpyFloatArray(new int[]{rows, cols}, data, true);
}
@Override
public NpyDataType dataType() {
return NpyDataType.f4;
}
@Override
public int size() {
return data.length;
}
@Override
public Object getElement(int i) {
return data[i];
}
@Override
public void writeElementTo(int i, ByteBuffer buffer) {
buffer.putFloat(data[i]);
}
@Override
public boolean isFloatArray() {
return true;
}
@Override
public NpyBooleanArray asBooleanArray() {
boolean[] booleans = new boolean[data.length];
for (int i = 0; i < data.length; i++) {
booleans[i] = i != 0;
}
return new NpyBooleanArray(copyShape(), booleans, fortranOrder);
}
@Override
public NpyByteArray asByteArray() {
byte[] bytes = new byte[data.length];
for (int i = 0; i < data.length; i++) {
bytes[i] = (byte) data[i];
}
return new NpyByteArray(copyShape(), bytes, fortranOrder);
}
@Override
public NpyDoubleArray asDoubleArray() {
double[] doubles = new double[data.length];
for (int i = 0; i < data.length; i++) {
doubles[i] = data[i];
}
return new NpyDoubleArray(copyShape(), doubles, fortranOrder);
}
@Override
public NpyFloatArray asFloatArray() {
return this;
}
@Override
public NpyIntArray asIntArray() {
int[] ints = new int[data.length];
for (int i = 0; i < data.length; i++) {
ints[i] = (int) data[i];
}
return new NpyIntArray(copyShape(), ints, fortranOrder);
}
@Override
public NpyLongArray asLongArray() {
long[] longs = new long[data.length];
for (int i = 0; i < data.length; i++) {
longs[i] = (long) data[i];
}
return new NpyLongArray(copyShape(), longs, fortranOrder);
}
@Override
public NpyShortArray asShortArray() {
short[] shorts = new short[data.length];
for (int i = 0; i < data.length; i++) {
shorts[i] = (short) data[i];
}
return new NpyShortArray(copyShape(), shorts, fortranOrder);
}
}

View File

@ -0,0 +1,9 @@
package org.meteoinfo.ndarray.io.npy;
public class NpyFormatException extends RuntimeException {
public NpyFormatException(String message) {
super(message);
}
}

View File

@ -0,0 +1,126 @@
package org.meteoinfo.ndarray.io.npy;
import org.meteoinfo.ndarray.io.npy.dict.NpyHeaderDict;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.ReadableByteChannel;
import java.util.Objects;
/**
* The NPY header contains the metadata of the stored array and the NPY file.
*/
public final class NpyHeader {
private final long dataOffset;
private final NpyHeaderDict dict;
private NpyHeader(long dataOffset, NpyHeaderDict dict) {
this.dataOffset = dataOffset;
this.dict = Objects.requireNonNull(dict);
}
@Override
public String toString() {
return dict.toString();
}
/**
* Returns the dictionary entries of this header.
*/
public NpyHeaderDict dict() {
return dict;
}
/**
* Returns the 0-based position from where the data start in the NPY file.
*/
public long dataOffset() {
return dataOffset;
}
public ByteOrder byteOrder() {
return dict.byteOrder() == null
? ByteOrder.nativeOrder()
: dict.byteOrder().toJava();
}
public static NpyHeader read(InputStream in)
throws IOException, NpyFormatException {
// read the version
byte[] bytes = new byte[8];
int n = in.read(bytes);
if (n != 8)
throw new NpyFormatException("invalid NPY header");
NpyVersion version = NpyVersion.of(bytes);
// read the header length; 2 bytes for version 1; 4 bytes for versions > 1
int headerLength;
long dataOffset;
if (version.major == 1) {
bytes = new byte[2];
n = in.read(bytes);
if (n != 2)
throw new NpyFormatException("invalid NPY header");
headerLength = NpyUtil.u2ToInt(bytes, ByteOrder.LITTLE_ENDIAN);
dataOffset = 10 + headerLength;
} else {
bytes = new byte[4];
n = in.read(bytes);
if (n != 4)
throw new NpyFormatException("invalid NPY header");
long len = NpyUtil.u4ToLong(bytes, ByteOrder.LITTLE_ENDIAN);
dataOffset = 12 + len;
headerLength = (int) len;
}
// read the header string
bytes = new byte[headerLength];
if (in.read(bytes) != headerLength)
throw new NpyFormatException("invalid NPY file");
String header = new String(bytes, version.headerEncoding());
return new NpyHeader(dataOffset, NpyHeaderDict.parse(header));
}
public static NpyHeader read(ReadableByteChannel channel) throws IOException {
// read the version
ByteBuffer buffer = ByteBuffer.allocate(8)
.order(ByteOrder.LITTLE_ENDIAN);
if (channel.read(buffer) < 8) {
throw new NpyFormatException("invalid NPY header");
}
buffer.flip();
NpyVersion version = NpyVersion.of(buffer.array());
int headerLength;
long dataOffset;
buffer.position(0);
if (version.major == 1) {
buffer.limit(2);
if (channel.read(buffer) != 2)
throw new NpyFormatException("invalid NPY header");
buffer.flip();
headerLength = NpyUtil.u2ToInt(buffer);
dataOffset = 10 + headerLength;
} else {
buffer.limit(4);
if (channel.read(buffer) != 4)
throw new NpyFormatException("invalid NPY header");
long len = NpyUtil.u4ToLong(buffer);
dataOffset = 12 + len;
headerLength = (int) len;
}
// read and parse the header
buffer = ByteBuffer.allocate(headerLength);
if (channel.read(buffer) != headerLength)
throw new NpyFormatException("invalid NPY file");
String header = new String(buffer.array(), version.headerEncoding());
return new NpyHeader(dataOffset, NpyHeaderDict.parse(header));
}
}

View File

@ -0,0 +1,167 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
public final class NpyIntArray extends AbstractNpyArray<int[]> {
public NpyIntArray(int[] shape, int[] data, boolean fortranOrder) {
super(shape, data, fortranOrder);
}
public static NpyIntArray vectorOf(int[] data) {
return new NpyIntArray(new int[] {data.length}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in row-major order (C order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyIntArray rowOrderOf(int[] data, int rows, int cols) {
return new NpyIntArray(new int[]{rows, cols}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in column-major order (
* Fortran order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyIntArray columnOrderOf(int[] data, int rows, int cols) {
return new NpyIntArray(new int[]{rows, cols}, data, true);
}
@Override
public NpyDataType dataType() {
return NpyDataType.i4;
}
@Override
public int size() {
return data.length;
}
@Override
public Object getElement(int i) {
return data[i];
}
@Override
public void writeElementTo(int i, ByteBuffer buffer) {
buffer.putInt(data[i]);
}
@Override
public boolean isIntArray() {
return true;
}
@Override
public NpyIntArray asIntArray() {
return this;
}
@Override
public NpyBooleanArray asBooleanArray() {
boolean[] booleans = new boolean[data.length];
for (int i = 0; i < data.length; i++) {
booleans[i] = data[i] != 0;
}
return new NpyBooleanArray(copyShape(), booleans, fortranOrder);
}
@Override
public NpyByteArray asByteArray() {
byte[] bytes = new byte[data.length];
for (int i = 0; i < data.length; i++) {
bytes[i] = (byte) data[i];
}
return new NpyByteArray(copyShape(), bytes, fortranOrder);
}
@Override
public NpyCharArray asCharArray() {
int bufferSize = Math.max(data.length, 10);
CharBuffer buffer = CharBuffer.allocate(bufferSize);
for (int i : data) {
char[] next = Character.toChars(i);
// because a code point can result in multiple
// characters, we may need to allocate a larger
// buffer here
if (buffer.remaining() < next.length) {
bufferSize = Math.max(
bufferSize + next.length,
bufferSize + (bufferSize >> 1));
if (bufferSize < 0)
throw new OutOfMemoryError();
char[] chars = new char[bufferSize];
buffer.flip();
int nextPos = buffer.limit();
buffer.get(chars, 0, nextPos);
buffer = CharBuffer.wrap(chars);
buffer.position(nextPos);
}
for (char c : next) {
buffer.put(c);
}
}
char[] chars;
if (buffer.remaining() == 0) {
chars = buffer.array();
} else {
buffer.flip();
chars = new char[buffer.limit()];
buffer.get(chars, 0, buffer.limit());
}
return new NpyCharArray(copyShape(), chars, fortranOrder);
}
@Override
public NpyDoubleArray asDoubleArray() {
double[] doubles = new double[data.length];
for (int i = 0; i < data.length; i++) {
doubles[i] = data[i];
}
return new NpyDoubleArray(copyShape(), doubles, fortranOrder);
}
@Override
public NpyFloatArray asFloatArray() {
float[] floats = new float[data.length];
for (int i = 0; i < data.length; i++) {
floats[i] = (float) data[i];
}
return new NpyFloatArray(copyShape(), floats, fortranOrder);
}
@Override
public NpyLongArray asLongArray() {
long[] longs = new long[data.length];
for (int i = 0; i < data.length; i++) {
longs[i] = data[i];
}
return new NpyLongArray(copyShape(), longs, fortranOrder);
}
@Override
public NpyShortArray asShortArray() {
short[] shorts = new short[data.length];
for (int i = 0; i < data.length; i++) {
shorts[i] = (short) data[i];
}
return new NpyShortArray(copyShape(), shorts, fortranOrder);
}
}

View File

@ -0,0 +1,124 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.ByteBuffer;
public final class NpyLongArray extends AbstractNpyArray<long[]> {
public NpyLongArray(int[] shape, long[] data, boolean fortranOrder) {
super(shape, data, fortranOrder);
}
public static NpyLongArray vectorOf(long[] data) {
return new NpyLongArray(new int[] {data.length}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in row-major order (C order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyLongArray rowOrderOf(long[] data, int rows, int cols) {
return new NpyLongArray(new int[]{rows, cols}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in column-major order (
* Fortran order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyLongArray columnOrderOf(long[] data, int rows, int cols) {
return new NpyLongArray(new int[]{rows, cols}, data, true);
}
@Override
public NpyDataType dataType() {
return NpyDataType.i8;
}
@Override
public int size() {
return data.length;
}
@Override
public Object getElement(int i) {
return data[i];
}
@Override
public void writeElementTo(int i, ByteBuffer buffer) {
buffer.putLong(data[i]);
}
@Override
public boolean isLongArray() {
return true;
}
@Override
public NpyLongArray asLongArray() {
return this;
}
@Override
public NpyBooleanArray asBooleanArray() {
boolean[] booleans = new boolean[data.length];
for (int i = 0; i < data.length; i++) {
booleans[i] = i != 0;
}
return new NpyBooleanArray(copyShape(), booleans, fortranOrder);
}
@Override
public NpyByteArray asByteArray() {
byte[] bytes = new byte[data.length];
for (int i = 0; i < data.length; i++) {
bytes[i] = (byte) data[i];
}
return new NpyByteArray(copyShape(), bytes, fortranOrder);
}
@Override
public NpyDoubleArray asDoubleArray() {
double[] doubles = new double[data.length];
for (int i = 0; i < data.length; i++) {
doubles[i] = data[i];
}
return new NpyDoubleArray(copyShape(), doubles, fortranOrder);
}
@Override
public NpyFloatArray asFloatArray() {
float[] floats = new float[data.length];
for (int i = 0; i < data.length; i++) {
floats[i] = (float) data[i];
}
return new NpyFloatArray(copyShape(), floats, fortranOrder);
}
@Override
public NpyIntArray asIntArray() {
int[] ints = new int[data.length];
for (int i = 0; i < data.length; i++) {
ints[i] = (int) data[i];
}
return new NpyIntArray(copyShape(), ints, fortranOrder);
}
@Override
public NpyShortArray asShortArray() {
short[] shorts = new short[data.length];
for (int i = 0; i < data.length; i++) {
shorts[i] = (short) data[i];
}
return new NpyShortArray(copyShape(), shorts, fortranOrder);
}
}

View File

@ -0,0 +1,124 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.ByteBuffer;
public final class NpyShortArray extends AbstractNpyArray<short[]> {
public NpyShortArray(int[] shape, short[] data, boolean fortranOrder) {
super(shape, data, fortranOrder);
}
public static NpyShortArray vectorOf(short[] data) {
return new NpyShortArray(new int[] {data.length}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in row-major order (C order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyShortArray rowOrderOf(short[] data, int rows, int cols) {
return new NpyShortArray(new int[]{rows, cols}, data, false);
}
/**
* Wraps the given data in a 2-dimensional array in column-major order (
* Fortran order).
*
* @param data the data of the array
* @param rows the number of rows of the array
* @param cols the number of columns of the array
* @return a 2d array of the given shape
*/
public static NpyShortArray columnOrderOf(short[] data, int rows, int cols) {
return new NpyShortArray(new int[]{rows, cols}, data, true);
}
@Override
public NpyDataType dataType() {
return NpyDataType.i2;
}
@Override
public int size() {
return data.length;
}
@Override
public Object getElement(int i) {
return data[i];
}
@Override
public void writeElementTo(int i, ByteBuffer buffer) {
buffer.putShort(data[i]);
}
@Override
public boolean isShortArray() {
return true;
}
@Override
public NpyShortArray asShortArray() {
return this;
}
@Override
public NpyBooleanArray asBooleanArray() {
boolean[] booleans = new boolean[data.length];
for (int i = 0; i < data.length; i++) {
booleans[i] = i != 0;
}
return new NpyBooleanArray(copyShape(), booleans, fortranOrder);
}
@Override
public NpyByteArray asByteArray() {
byte[] bytes = new byte[data.length];
for (int i = 0; i < data.length; i++) {
bytes[i] = (byte) data[i];
}
return new NpyByteArray(copyShape(), bytes, fortranOrder);
}
@Override
public NpyDoubleArray asDoubleArray() {
double[] doubles = new double[data.length];
for (int i = 0; i < data.length; i++) {
doubles[i] = data[i];
}
return new NpyDoubleArray(copyShape(), doubles, fortranOrder);
}
@Override
public NpyFloatArray asFloatArray() {
float[] floats = new float[data.length];
for (int i = 0; i < data.length; i++) {
floats[i] = data[i];
}
return new NpyFloatArray(copyShape(), floats, fortranOrder);
}
@Override
public NpyIntArray asIntArray() {
int[] ints = new int[data.length];
for (int i = 0; i < data.length; i++) {
ints[i] = data[i];
}
return new NpyIntArray(copyShape(), ints, fortranOrder);
}
@Override
public NpyLongArray asLongArray() {
long[] longs = new long[data.length];
for (int i = 0; i < data.length; i++) {
longs[i] = data[i];
}
return new NpyLongArray(copyShape(), longs, fortranOrder);
}
}

View File

@ -0,0 +1,181 @@
package org.meteoinfo.ndarray.io.npy;
import org.meteoinfo.ndarray.Array;
import org.meteoinfo.ndarray.DataType;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
public class NpyUtil {
private NpyUtil() {
}
static short u1ToShort(byte b) {
return (short) (b & (short) 0xff);
}
static short u1ToShort(ByteBuffer buffer) {
return (short) (buffer.get() & (short) 0xff);
}
static int u2ToInt(byte[] bytes, ByteOrder order) {
ByteBuffer buffer = ByteBuffer.wrap(bytes, 0, 2)
.order(order);
return u2ToInt(buffer);
}
static int u2ToInt(ByteBuffer buffer) {
short s = buffer.getShort();
return s & 0xffff;
}
static long u4ToLong(byte[] bytes, ByteOrder order) {
ByteBuffer buffer = ByteBuffer.wrap(bytes, 0, 4)
.order(order);
return u4ToLong(buffer);
}
static long u4ToLong(ByteBuffer buffer) {
int i = buffer.getInt();
return i & 0xffffffffL;
}
static BigInteger u8ToBigInteger(ByteBuffer buffer) {
long i = buffer.getLong();
if (i >= 0L)
return BigInteger.valueOf(i);
BigInteger upper = BigInteger.valueOf(Integer.toUnsignedLong((int) (i >>> 32)));
BigInteger lower = BigInteger.valueOf(Integer.toUnsignedLong((int) i));
return upper.shiftLeft(32).add(lower);
}
static float f2ToFloat(ByteBuffer buffer) {
return toFloat(buffer.getShort() & 0xffff);
}
/**
* Converts a 16 bit floating point number to a 32 bit floating point number.
* The 16 bits are stored in the given integer parameter, the higher 16 bits
* are ignored. This function was directly taken from here:
* https://stackoverflow.com/a/6162687.
*/
private static float toFloat(int hbits) {
int mant = hbits & 0x03ff; // 10 bits mantissa
int exp = hbits & 0x7c00; // 5 bits exponent
if (exp == 0x7c00) // NaN/Inf
exp = 0x3fc00; // -> NaN/Inf
else if (exp != 0) // normalized value
{
exp += 0x1c000; // exp - 15 + 127
if (mant == 0 && exp > 0x1c400) // smooth transition
return Float.intBitsToFloat((hbits & 0x8000) << 16
| exp << 13 | 0x3ff);
} else if (mant != 0) // && exp==0 -> subnormal
{
exp = 0x1c400; // make it normal
do {
mant <<= 1; // mantissa * 2
exp -= 0x400; // decrease exp by 1
} while ((mant & 0x400) == 0); // while not normal
mant &= 0x3ff; // discard subnormal bit
} // else +/-0 -> +/-0
return Float.intBitsToFloat( // combine all parts
(hbits & 0x8000) << 16 // sign << ( 31 - 15 )
| (exp | mant) << 13); // value << ( 23 - 10 )
}
/**
* Convert Npy data type to MeteoInfo data type
*
* @param npyDataType Npy data type
* @return MeteoInfo data type
*/
public static DataType toMIDataType(NpyDataType npyDataType) {
switch (npyDataType) {
case i1:
return DataType.BYTE;
case i2:
return DataType.SHORT;
case i4:
return DataType.INT;
case i8:
return DataType.LONG;
case f2:
case f4:
return DataType.FLOAT;
case f8:
return DataType.DOUBLE;
case u1:
return DataType.UBYTE;
case u2:
return DataType.USHORT;
case u4:
return DataType.UINT;
case u8:
return DataType.ULONG;
case bool:
return DataType.BOOLEAN;
case S:
case U:
return DataType.STRING;
default:
return DataType.OBJECT;
}
}
/**
* Convert MeteoInfo data type to npy data type
*
* @param dataType MeteoInfo data type
* @return Npy data type
*/
public static NpyDataType toNpyDataType(DataType dataType) {
switch (dataType) {
case BYTE:
return NpyDataType.i1;
case SHORT:
return NpyDataType.i2;
case INT:
return NpyDataType.i4;
case LONG:
return NpyDataType.i8;
case FLOAT:
return NpyDataType.f4;
case DOUBLE:
return NpyDataType.f8;
case UBYTE:
return NpyDataType.u1;
case USHORT:
return NpyDataType.u2;
case UINT:
return NpyDataType.u4;
case ULONG:
return NpyDataType.u8;
case BOOLEAN:
return NpyDataType.bool;
case STRING:
return NpyDataType.S;
default:
throw new NpyFormatException("Unsupported data type: " + dataType);
}
}
/**
* Convert Npy array to MeteoInfo array
*
* @param npyArray Npy array
* @return MeteoInfo array
*/
public static Array toMIArray(NpyArray npyArray) {
DataType dataType = toMIDataType(npyArray.dataType());
int[] shape = npyArray.shape();
Array array = Array.factory(dataType, shape);
for (int i = 0; i < array.getSize(); i++) {
array.setObject(i, npyArray.getElement(i));
}
return array;
}
}

View File

@ -0,0 +1,55 @@
package org.meteoinfo.ndarray.io.npy;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
/**
* Contains the version information of the first bytes of an NPY file:
* <ul>
* <li>the first 6 bytes are the magic string '\x93'</li>
* <li>bytes 7 and 8 contain the major and minor version</li>
* </ul>
*/
class NpyVersion {
final int major;
final int minor;
private NpyVersion(int major, int minor) {
this.major = major;
this.minor = minor;
}
/**
* Reads the format version from the first 8 bytes of an NPY file. It checks
* that the array starts with the magic string {@code '\x93NUMPY'} and that
* the version is in a supported range. If this is not the case, it throws
* an {@code UnsupportedFormatException}.
*
* @param bytes at least, the first 8 bytes of an NPY file
* @return the NPY version of that file
*/
static NpyVersion of(byte[] bytes) throws NpyFormatException {
if (bytes.length < 8)
throw new NpyFormatException("invalid NPY header");
if (NpyUtil.u1ToShort(bytes[0]) != 0x93)
throw new NpyFormatException("invalid NPY header");
String numpy = new String(bytes, 1, 5);
if (!numpy.equals("NUMPY"))
throw new NpyFormatException("invalid NPY header");
int major = NpyUtil.u1ToShort(bytes[6]);
int minor = NpyUtil.u1ToShort(bytes[7]);
if (major != 1 && major != 2 && major != 3)
throw new NpyFormatException(
"unsupported NPY version: " + major);
return new NpyVersion(major, minor);
}
Charset headerEncoding() {
return major >= 3
? StandardCharsets.UTF_8
: StandardCharsets.US_ASCII;
}
}

View File

@ -0,0 +1,197 @@
package org.meteoinfo.ndarray.io.npy;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.channels.Channels;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.function.Consumer;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import org.meteoinfo.ndarray.Array;
public class Npz {
/**
* Open npz data file
* @param fileName The file name
* @return ZipFile object
*/
public static ZipFile open(String fileName) {
File file = new File(fileName);
try {
ZipFile zipFile = new ZipFile(file);
return zipFile;
} catch (IOException e) {
throw new RuntimeException("failed to read zip file: " + fileName, e);
}
}
/**
* Returns the names of the entries of the given NPZ file.
*
* @param fileName a NPZ file name
* @return the names of the entries of the NPZ file
*/
public static List<String> entries(String fileName) {
File file = new File(fileName);
return entries(file);
}
/**
* Returns the names of the entries of the given NPZ file.
*
* @param npz a NPZ file
* @return the names of the entries of the NPZ file
*/
public static List<String> entries(File npz) {
try (ZipFile zip = new ZipFile(npz)) {
return entries(zip);
} catch (IOException e) {
throw new RuntimeException("failed to read zip file: " + npz, e);
}
}
/**
* Returns the names of the entries of the given NPZ file.
*
* @param npz a NPZ file
* @return the names of the entries of the NPZ file
*/
public static List<String> entries(ZipFile npz) {
ArrayList<String> entries = new ArrayList<String>();
Enumeration<ZipEntry> zipEntries = (Enumeration<ZipEntry>) npz.entries();
while (zipEntries.hasMoreElements()) {
ZipEntry e = zipEntries.nextElement();
if (e.isDirectory())
continue;
entries.add(e.getName());
}
return entries;
}
/**
* Read an array from an entry of a NPZ file.
*
* @param npz the NPZ file
* @param entry the name of the entry in which the array is stored
* @return the array of the entry
*/
public static Array load(ZipFile npz, String entry) {
ZipEntry e = npz.getEntry(entry);
try (java.io.InputStream stream = npz.getInputStream(e);
java.nio.channels.ReadableByteChannel channel = Channels.newChannel(stream)) {
return Npy.load(channel);
} catch (IOException ex) {
throw new RuntimeException("failed to read entry " + entry, ex);
}
}
/**
* Read an array from an entry of a NPZ file.
*
* @param npz the NPZ file
* @param entry the name of the entry in which the array is stored
* @return the NPY array of the entry
*/
public static NpyArray<?> read(File npz, String entry) {
try (ZipFile zip = new ZipFile(npz)) {
return read(zip, entry);
} catch (IOException e) {
throw new RuntimeException("failed to read zip file: " + npz, e);
}
}
/**
* Read an array from an entry of a NPZ file.
*
* @param npz the NPZ file
* @param entry the name of the entry in which the array is stored
* @return the NPY array of the entry
*/
public static NpyArray<?> read(ZipFile npz, String entry) {
ZipEntry e = npz.getEntry(entry);
try (java.io.InputStream stream = npz.getInputStream(e);
java.nio.channels.ReadableByteChannel channel = Channels.newChannel(stream)) {
return Npy.read(channel);
} catch (IOException ex) {
throw new RuntimeException("failed to read entry " + entry, ex);
}
}
/**
* Open the given file as an NPZ file. This function is useful when you want
* to do multiple things with a NPZ file, e.g.
*
* <pre>{@code
* Npz.use(file, npz -> {
* for (var entry : Npz.entries(npz)) {
* var array = Npz.read(npz, entry);
* // ...
* }
* });
* }</pre>
*
* @param npz the NPZ file
* @param fn a consumer function of the opened NPZ file
*/
public static void use(File npz, Consumer<ZipFile> fn) {
try (ZipFile zip = new ZipFile(npz)) {
fn.accept(zip);
} catch (IOException e) {
throw new RuntimeException("failed to use NPZ file " + npz, e);
}
}
public static void create(File file, Consumer<ZipOutputStream> fn) {
try (FileOutputStream fileOut = new FileOutputStream(file);
ZipOutputStream zipOut = new ZipOutputStream(fileOut)) {
fn.accept(zipOut);
} catch (IOException e) {
throw new RuntimeException("failed to create NPZ file: " + file, e);
}
}
public static ZipOutputStream create(File file) {
FileOutputStream fileOut = null;
try {
fileOut = new FileOutputStream(file);
ZipOutputStream zipOut = new ZipOutputStream(fileOut);
return zipOut;
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
public static ZipOutputStream create(String fileName) {
File file = new File(fileName);
return create(file);
}
public static void write(ZipOutputStream npz, String entry, Array array) {
ZipEntry e = new ZipEntry(entry);
try {
npz.putNextEntry(e);
Npy.save(npz, array);
npz.closeEntry();
} catch (IOException ex) {
throw new RuntimeException("failed to write NPZ entry: " + entry, ex);
}
}
public static void write(ZipOutputStream npz, String entry, NpyArray<?> array) {
ZipEntry e = new ZipEntry(entry);
try {
npz.putNextEntry(e);
Npy.write(npz, array);
npz.closeEntry();
} catch (IOException ex) {
throw new RuntimeException("failed to write NPZ entry: " + entry, ex);
}
}
}

View File

@ -0,0 +1,197 @@
package org.meteoinfo.ndarray.io.npy;
import java.math.BigInteger;
class OrderSwitch2d<T extends NpyArray<?>> {
private final T array;
private final int rows;
private final int cols;
private OrderSwitch2d(T array) {
this.array = array;
this.rows = Array2d.rowCountOf(array);
this.cols = Array2d.columnCountOf(array);
}
static <T extends NpyArray<?>> T of(T array) {
return new OrderSwitch2d<T>(array).apply();
}
@SuppressWarnings("unchecked")
private T apply() {
if (array.isBigIntegerArray())
return (T) switchBigInts((NpyBigIntArray) array);
if (array.isBooleanArray())
return (T) switchBooleans(array.asBooleanArray());
if (array.isByteArray())
return (T) switchBytes(array.asByteArray());
if (array.isCharArray())
return (T) switchChars(array.asCharArray());
if (array.isDoubleArray())
return (T) switchDoubles(array.asDoubleArray());
if (array.isFloatArray())
return (T) switchFloats(array.asFloatArray());
if (array.isIntArray())
return (T) switchInts(array.asIntArray());
if (array.isLongArray())
return (T) switchLongs(array.asLongArray());
if (array.isShortArray())
return (T) switchShorts(array.asShortArray());
throw new IllegalArgumentException("unsupported array type: " + array);
}
private NpyBigIntArray switchBigInts(NpyBigIntArray a) {
BigInteger[] data = a.data;
BigInteger[] newData = new BigInteger[data.length];
iter((pos, newPos) -> {
BigInteger value = data[pos];
if (value == null)
return;
newData[newPos] = value; // BigInts are immutable; so this is fine
});
return a.hasColumnOrder()
? NpyBigIntArray.rowOrderOf(newData, rows, cols)
: NpyBigIntArray.columnOrderOf(newData, rows, cols);
}
private NpyBooleanArray switchBooleans(NpyBooleanArray a) {
boolean[] data = a.data;
boolean[] newData = new boolean[data.length];
iter((pos, newPos) -> {
boolean value = data[pos];
if (!value)
return;
newData[newPos] = true;
});
return a.hasColumnOrder()
? NpyBooleanArray.rowOrderOf(newData, rows, cols)
: NpyBooleanArray.columnOrderOf(newData, rows, cols);
}
private NpyByteArray switchBytes(NpyByteArray a) {
byte[] data = a.data;
byte[] newData = new byte[data.length];
iter((pos, newPos) -> {
byte value = data[pos];
if (value == 0)
return;
newData[newPos] = value;
});
return a.hasColumnOrder()
? NpyByteArray.rowOrderOf(newData, rows, cols)
: NpyByteArray.columnOrderOf(newData, rows, cols);
}
private NpyCharArray switchChars(NpyCharArray a) {
char[] data = a.data;
char[] newData = new char[data.length];
iter((pos, newPos) -> {
char value = data[pos];
if (value == 0)
return;
newData[newPos] = value;
});
return a.hasColumnOrder()
? new NpyCharArray(new int[]{rows, cols}, newData, false)
: new NpyCharArray(new int[]{rows, cols}, newData, true);
}
private NpyDoubleArray switchDoubles(NpyDoubleArray a) {
double[] data = a.data;
double[] newData = new double[data.length];
iter((pos, newPos) -> {
double value = data[pos];
if (value == 0)
return;
newData[newPos] = value;
});
return a.hasColumnOrder()
? NpyDoubleArray.rowOrderOf(newData, rows, cols)
: NpyDoubleArray.columnOrderOf(newData, rows, cols);
}
private NpyFloatArray switchFloats(NpyFloatArray a) {
float[] data = a.data;
float[] newData = new float[data.length];
iter((pos, newPos) -> {
float value = data[pos];
if (value == 0)
return;
newData[newPos] = value;
});
return a.hasColumnOrder()
? NpyFloatArray.rowOrderOf(newData, rows, cols)
: NpyFloatArray.columnOrderOf(newData, rows, cols);
}
private NpyIntArray switchInts(NpyIntArray a) {
int[] data = a.data;
int[] newData = new int[data.length];
iter((pos, newPos) -> {
int value = data[pos];
if (value == 0)
return;
newData[newPos] = value;
});
return a.hasColumnOrder()
? NpyIntArray.rowOrderOf(newData, rows, cols)
: NpyIntArray.columnOrderOf(newData, rows, cols);
}
private NpyLongArray switchLongs(NpyLongArray a) {
long[] data = a.data;
long[] newData = new long[data.length];
iter((pos, newPos) -> {
long value = data[pos];
if (value == 0)
return;
newData[newPos] = value;
});
return a.hasColumnOrder()
? NpyLongArray.rowOrderOf(newData, rows, cols)
: NpyLongArray.columnOrderOf(newData, rows, cols);
}
private NpyShortArray switchShorts(NpyShortArray a) {
short[] data = a.data;
short[] newData = new short[data.length];
iter((pos, newPos) -> {
short value = data[pos];
if (value == 0)
return;
newData[newPos] = value;
});
return a.hasColumnOrder()
? NpyShortArray.rowOrderOf(newData, rows, cols)
: NpyShortArray.columnOrderOf(newData, rows, cols);
}
private void iter(IndexFn fn) {
int pos = 0;
if (array.hasColumnOrder()) {
for (int col = 0; col < cols; col++) {
for (int row = 0; row < rows; row++) {
int newPos = row * cols + col;
fn.accept(pos, newPos);
pos++;
}
}
} else {
for (int row = 0; row < rows; row++) {
for (int col = 0; col < cols; col++) {
int newPos = col * rows + row;
fn.accept(pos, newPos);
pos++;
}
}
}
}
@FunctionalInterface
interface IndexFn {
void accept(int pos, int newPos);
}
}

View File

@ -0,0 +1,8 @@
package org.meteoinfo.ndarray.io.npy;
@FunctionalInterface
interface ToFloatFunction<T> {
float applyAsFloat(T value);
}

View File

@ -0,0 +1,8 @@
package org.meteoinfo.ndarray.io.npy;
@FunctionalInterface
interface ToShortFunction<T> {
short applyAsShort(T value);
}

View File

@ -0,0 +1,139 @@
package org.meteoinfo.ndarray.io.npy.dict;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
// https://talks.golang.org/2011/lex.slide#27
class Lexer {
private final char EOF = Character.MIN_VALUE;
private final String input;
private final List<Token> tokens = new ArrayList<>();
private int pos = -1;
private Lexer(String input) {
this.input = input;
}
static List<Token> lex(String input) {
Lexer lexer = new Lexer(input);
lexer.loop();
return lexer.tokens;
}
void loop() {
for (StateFunction stateFn = this::lexText; stateFn != null; ) {
stateFn = stateFn.execute();
}
}
private StateFunction lexText() {
char c = peek();
// skip white spaces
while (Character.isWhitespace(c)) {
pos++;
c = peek();
}
// eof
if (c == EOF) {
tokens.add(Token.eof(pos));
return null;
}
// quoted string
if (c == '"' || c == '\'')
return this::lexString;
// number
if (Character.isDigit(c))
return this::lexNumber;
// identifiers
if (Character.isJavaIdentifierStart(c))
return this::lexIdentifier;
// single character tokens
Optional<Token> charToken = Token.of(pos, c);
if (charToken.isPresent()) {
pos++;
tokens.add(charToken.get());
return this::lexText;
}
// error
tokens.add(Token.error(pos, "unexpected character: '" + c + "'"));
return null;
}
private StateFunction lexString() {
char quote = next();
StringBuilder buffer = new StringBuilder();
int start = pos;
while (true) {
char c = next();
if (c == EOF) {
tokens.add(Token.eof(pos));
return null;
}
if (c == quote)
break;
buffer.append(c);
}
tokens.add(Token.string(start, buffer));
return this::lexText;
}
private StateFunction lexIdentifier() {
StringBuilder buffer = new StringBuilder();
int start = pos + 1;
while (true) {
char c = peek();
if (c == EOF || !Character.isJavaIdentifierPart(c))
break;
pos++;
buffer.append(c);
}
tokens.add(Token.identifier(start, buffer));
return this::lexText;
}
private StateFunction lexNumber() {
StringBuilder buffer = new StringBuilder();
int start = pos + 1;
while (true) {
char c = peek();
if (!Character.isDigit(c))
break;
pos++;
buffer.append(c);
}
tokens.add(Token.integer(start, buffer));
return this::lexText;
}
private char next() {
int nextPos = pos + 1;
if (nextPos >= input.length())
return EOF;
pos = nextPos;
return input.charAt(nextPos);
}
private char peek() {
int nextPos = pos + 1;
return nextPos >= input.length()
? EOF
: input.charAt(nextPos);
}
@FunctionalInterface
interface StateFunction {
StateFunction execute();
}
}

View File

@ -0,0 +1,435 @@
package org.meteoinfo.ndarray.io.npy.dict;
import org.meteoinfo.ndarray.io.npy.NpyByteOrder;
import org.meteoinfo.ndarray.io.npy.NpyDataType;
import org.meteoinfo.ndarray.io.npy.NpyFormatException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.*;
/**
* Contains the values of the dictionary that is stored in the header of an NPY
* file.
*/
public class NpyHeaderDict {
private final NpyDataType dataType;
private final NpyByteOrder byteOrder;
private final boolean fortranOrder;
private final int[] shape;
private final Map<String, String> properties;
private final int typeSize;
private NpyHeaderDict(Builder builder) {
this.dataType = Objects.requireNonNull(builder.dataType);
this.fortranOrder = builder.fortranOrder;
// shape
this.shape = builder.shape == null
? new int[0]
: Arrays.copyOf(builder.shape, builder.shape.length);
// byte order
this.byteOrder = builder.byteOrder == null
? NpyByteOrder.NOT_APPLICABLE
: builder.byteOrder;
// type size
this.typeSize = dataType.size() != 0
? dataType.size()
: builder.typeSize;
// additional properties
this.properties = builder.properties != null
? builder.properties
: Collections.emptyMap();
}
public static Builder of(NpyDataType dataType) {
return new Builder(dataType);
}
public NpyDataType dataType() {
return dataType;
}
public NpyByteOrder byteOrder() {
return byteOrder;
}
/**
* Describes the size of the stored type. The meaning of this field depends
* on the respective storage type. For numeric types it is in general the
* number of bytes which are required to store a single value. For strings,
* it is the number of characters of the string (note that for unicode strings
* 4 bytes are used to store a single character in NPY and that ASCII strings
* are stored with an additional null-termination byte).
*
* @return the size of the stored type.
*/
public int typeSize() {
return typeSize;
}
/**
* Returns {@code true} when the array is stored in Fortran order.
*/
public boolean hasFortranOrder() {
return fortranOrder;
}
/**
* Returns the number of dimensions of the array..
*/
public int dimensions() {
return shape.length;
}
/**
* Returns the size if the ith dimension of the array.
*
* @param i the 0-based dimension for which the size is requested.
* @return the size of the requested dimension
* @throws IndexOutOfBoundsException if {@code i < 0 || i >= dimensions()}
*/
public int sizeOfDimension(int i) {
if (i < 0 || i >= shape.length)
throw new IndexOutOfBoundsException(String.valueOf(i));
return shape[i];
}
/**
* Returns the size of the stored array in number of bytes. That is the
* number of elements of the stored array times the size of the data type in
* bytes.
*
* @return the size of the stored array in bytes
*/
public long dataSize() {
long elemCount = numberOfElements();
NpyDataType type = dataType();
if (type.size() != 0)
return elemCount * typeSize();
if (type == NpyDataType.U)
return typeSize() * 4L;
return elemCount * typeSize();
}
/**
* Returns the number of elements that are stored in the array.
*
* @return the number of elements which is the product of all dimension sizes.
*/
public int numberOfElements() {
int count = 1;
int n = dimensions();
for (int i = 0; i < n; i++) {
count *= sizeOfDimension(i);
}
return count;
}
/**
* Returns the shape of the stored array. Note that this returns a new
* allocated array each time you call this method.
*
* @return the shape of the stored array
*/
public int[] shape() {
int n = dimensions();
int[] shape = new int[n];
for (int i = 0; i < shape.length; i++) {
shape[i] = sizeOfDimension(i);
}
return shape;
}
public String property(String key) {
return properties.get(key);
}
public Map<String, String> otherProperties() {
return properties.isEmpty()
? Collections.emptyMap()
: Collections.unmodifiableMap(properties);
}
public static NpyHeaderDict parse(String s) throws NpyFormatException {
PyValue value = Parser.parse(s);
if (value.isError())
throw new NpyFormatException(
"invalid header dictionary: " + value.asError().message());
if (!value.isDict())
throw new NpyFormatException(
"invalid header dictionary; type is " + value.getClass());
PyDict dict = value.asDict();
// read the data type
PyValue typeEntry = dict.get("descr");
if (typeEntry.isNone())
throw new NpyFormatException(
"invalid header dictionary; data type field 'descr' is missing");
if (!typeEntry.isString())
throw new NpyFormatException(
"invalid header dictionary; data type field " +
"'descr' is not a string but: " + typeEntry);
String dtype = typeEntry.asString().value();
NpyDataType dataType = NpyDataType.of(dtype);
if (dataType == null)
throw new NpyFormatException(
"unsupported data type: " + dtype);
Builder builder = of(dataType)
.withShape(getShape(dict))
.withFortranOrder(getFortranOrder(dict))
.withByteOrder(NpyDataType.byteOrderOf(dtype));
// try to set the type size for string types
if (dataType.size() == 0) {
for (int i = 0; i < dtype.length(); i++) {
if (!Character.isDigit(dtype.charAt(i)))
continue;
try {
String lenStr = dtype.substring(i);
int typeSize = Integer.parseInt(lenStr);
builder.withTypeSize(typeSize);
} catch (Exception ignored) {
}
break;
}
}
// collect other string properties
dict.forEach((key, val) -> {
if (!val.isString())
return;
if (key.equals("descr")
|| key.equals("shape")
|| key.equals("fortran_order"))
return;
builder.withOtherProperty(
key, val.asString().value());
});
return builder.create();
}
private static boolean getFortranOrder(PyDict dict)
throws NpyFormatException {
PyValue entry = dict.get("fortran_order");
if (entry.isNone())
return false;
if (!entry.isIdentifier())
throw new NpyFormatException(
"invalid header dictionary: fortran_order must be " +
"True or False but was '" + entry + "'");
String value = entry.asIdentifier().value();
switch (value) {
case "True":
return true;
case "False":
return false;
default:
throw new NpyFormatException(
"invalid header dictionary: fortran_order must be " +
"True or False but was '" + value + "'");
}
}
private static int[] getShape(PyDict dict) throws NpyFormatException {
PyValue entry = dict.get("shape");
if (entry.isNone()) {
throw new NpyFormatException(
"invalid header dictionary: property 'shape' is missing");
}
if (!entry.isTuple()) {
throw new NpyFormatException(
"invalid header dictionary: property 'shape' is not a tuple");
}
PyTuple tuple = entry.asTuple();
int[] shape = new int[tuple.size()];
for (int i = 0; i < tuple.size(); i++) {
PyValue value = tuple.at(i);
if (!value.isInt()) {
throw new NpyFormatException(
"invalid header dictionary: argument "
+ i + " of tuple 'shape' is not an integer");
}
shape[i] = (int) value.asInt().value();
}
return shape;
}
@Override
public String toString() {
// data type
StringBuilder buffer = new StringBuilder("{'descr': '");
if (dataType != null) {
if (dataType.size() != 1) {
buffer.append(byteOrder.symbol());
}
buffer.append(dataType.symbol());
if (dataType.size() == 0) {
buffer.append(typeSize);
}
}
// fortran order
buffer.append("', 'fortran_order': ");
if (fortranOrder) {
buffer.append("True");
} else {
buffer.append("False");
}
// shape
buffer.append(", 'shape': (");
if (shape != null) {
for (int i = 0; i < shape.length; i++) {
if (i > 0) {
buffer.append(' ');
}
buffer.append(shape[i]).append(',');
}
}
buffer.append(")");
// other properties
for (Map.Entry<String, String> prop : properties.entrySet()) {
String key = prop.getKey();
String val = prop.getValue();
if (key == null || val == null
|| "descr".equals(key)
|| "shape".equals(key)
|| "fortran_order".equals(key))
continue;
buffer.append(", '")
.append(key.replace('\'', '"'))
.append("': '")
.append(val.replace('\'', '"'))
.append('\'');
}
buffer.append('}');
return buffer.toString();
}
public byte[] toNpyHeader() {
int version = 1;
// dictionary bytes
String s = toString();
boolean allAscii = StandardCharsets.US_ASCII.newEncoder().canEncode(s);
if (!allAscii) {
version = 3;
}
byte[] dictBytes = allAscii
? s.getBytes(StandardCharsets.US_ASCII)
: s.getBytes(StandardCharsets.UTF_8);
// calculate the length and padding
int filled = version == 1
? 11 + dictBytes.length
: 13 + dictBytes.length;
int padding = 64 - (filled % 64);
int totalLen = filled + padding;
if (version == 1 && totalLen > 65535) {
version = 2;
filled = 13 + dictBytes.length;
padding = 64 - (filled % 64);
totalLen = filled + padding;
}
ByteBuffer buf = ByteBuffer.allocate(totalLen);
buf.order(ByteOrder.LITTLE_ENDIAN);
// magic
buf.put((byte) 0x93);
buf.put("NUMPY".getBytes());
// version
buf.put((byte) version);
buf.put((byte) 0);
// header length
if (version == 1) {
buf.putShort((short) (totalLen - 10));
} else {
buf.putInt(totalLen - 12);
}
// write the padding
buf.put(dictBytes);
for (int i = 0; i < padding; i++) {
buf.put((byte) ' ');
}
buf.put((byte) '\n');
return buf.array();
}
public static class Builder {
private final NpyDataType dataType;
private int[] shape;
private NpyByteOrder byteOrder;
private boolean fortranOrder;
private Map<String, String> properties;
private int typeSize;
private Builder(NpyDataType dataType) {
this.dataType = Objects.requireNonNull(dataType);
}
public Builder withShape(int[] shape) {
this.shape = shape;
return this;
}
public Builder withByteOrder(NpyByteOrder byteOrder) {
this.byteOrder = byteOrder;
return this;
}
public Builder withFortranOrder(boolean b) {
this.fortranOrder = b;
return this;
}
/**
* Set the size of the stored type. This field must be set when the stored
* data type is a string. In this case the size of the type is the number
* of characters of the string.
*
* @param typeSize the size of the stored type
* @return this builder
*/
public Builder withTypeSize(int typeSize) {
this.typeSize = typeSize;
return this;
}
public Builder withOtherProperty(String key, String value) {
if (key == null || value == null)
return this;
if (properties == null) {
properties = new HashMap<>();
}
properties.put(key, value);
return this;
}
public NpyHeaderDict create() {
return new NpyHeaderDict(this);
}
}
}

View File

@ -0,0 +1,161 @@
package org.meteoinfo.ndarray.io.npy.dict;
import java.util.ArrayList;
import java.util.List;
class Parser {
private final List<Token> tokens;
private int pos = -1;
private Parser(List<Token> tokens) {
this.tokens = tokens;
}
static PyValue parse(String text) {
if (text == null)
return PyError.of("empty input");
List<Token> tokens = Lexer.lex(text);
if (tokens.isEmpty())
return PyError.of("empty input");
// check if there is an error token
for (Token token : tokens) {
if (token.type == TokenType.ERROR)
return PyError.of(
"syntax error: " + token.value + "; at " + token.position);
}
// parse the value and make sure that it is followed by EOF
Parser parser = new Parser(tokens);
PyValue value = parser.parseNext();
if (value.isError())
return value;
Token next = parser.next();
if (!next.isEof())
return PyError.of(
"syntax error: expected EOF at "
+ next.position + " but found: " + next);
return value;
}
private PyValue parseNext() {
Token token = peek();
switch (token.type) {
case IDENTIFIER:
moveNext();
return new PyIdentifier(token.value);
case INTEGER:
moveNext();
try {
long value = Long.parseLong(token.value);
return new PyInt(value);
} catch (NumberFormatException e) {
return PyError.of(
"failed to parse integer: '"
+ token.value + "' at:" + token.position);
}
case STRING:
moveNext();
return new PyString(token.value);
case TUPLE_START:
return parseTuple();
case DICT_START:
return parseDict();
default:
return PyError.of(
"syntax error: unexpected token '"
+ token.value + "' at " + token.position);
}
}
private PyValue parseTuple() {
Token start = next();
if (start.type != TokenType.TUPLE_START)
return PyError.of(
"syntax error: expected tuple start at " + start.position);
ArrayList<PyValue> values = new ArrayList<PyValue>();
boolean head = true;
while (true) {
Token next = peek();
if (next.isEof())
return PyError.of("syntax error: unexpected end of tuple");
if (next.type == TokenType.TUPLE_END) {
moveNext();
break;
}
if (!head) {
if (next.type != TokenType.COMMA)
return PyError.of("syntax error: unexpected token: " + next);
head = true;
moveNext();
continue;
}
PyValue value = parseNext();
if (value.isError())
return value;
values.add(value);
head = false;
}
return new PyTuple(values);
}
private PyValue parseDict() {
Token start = next();
if (start.type != TokenType.DICT_START)
return PyError.of(
"syntax error: expected dict start at " + start.position);
PyDict dict = new PyDict();
boolean head = true;
while (true) {
Token next = next();
if (next.type == TokenType.DICT_END)
break;
if (!head) {
if (next.type != TokenType.COMMA)
return PyError.of("syntax error: unexpected token: " + next);
head = true;
continue;
}
if (next.type != TokenType.STRING)
return PyError.of(
"syntax error: only string keys are allowed but found: " + next);
String key = next.value;
Token colon = next();
if (colon.type != TokenType.COLON)
return PyError.of(
"syntax error: expected colon but found: " + next);
PyValue value = parseNext();
if (value.isError())
return value;
dict.put(key, value);
head = false;
}
return dict;
}
private Token peek() {
int nextPos = pos + 1;
return nextPos < tokens.size()
? tokens.get(nextPos)
: Token.eof(-1);
}
private Token next() {
Token peeked = peek();
if (peeked.type != TokenType.EOF) {
pos++;
}
return peeked;
}
private void moveNext() {
pos++;
}
}

View File

@ -0,0 +1,63 @@
package org.meteoinfo.ndarray.io.npy.dict;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.function.BiConsumer;
/**
* A dictionary value is a sequence of key-value pairs.
*/
final class PyDict implements PyValue {
private final List<Entry> entries = new ArrayList<>();
@Override
public boolean isDict() {
return true;
}
void put(String key, PyValue value) {
if (key == null || value == null)
return;
entries.add(new Entry(key, value));
}
PyValue get(String key) {
for (Entry entry : entries) {
if (Objects.equals(key, entry.key()))
return entry.value;
}
return PyNone.get();
}
public int size() {
return entries.size();
}
void forEach(BiConsumer<String, PyValue> fn) {
for (Entry entry : entries) {
fn.accept(entry.key(), entry.value());
}
}
static class Entry {
private final String key;
private final PyValue value;
Entry(String key, PyValue value) {
this.key = key;
this.value = value;
}
String key() {
return key;
}
PyValue value() {
return value;
}
}
}

View File

@ -0,0 +1,24 @@
package org.meteoinfo.ndarray.io.npy.dict;
final class PyError implements PyValue {
private final String message;
PyError(String message) {
this.message = message;
}
@Override
public boolean isError() {
return true;
}
static PyError of(String message) {
return new PyError(message);
}
String message() {
return message;
}
}

View File

@ -0,0 +1,19 @@
package org.meteoinfo.ndarray.io.npy.dict;
final class PyIdentifier implements PyValue {
private final String value;
PyIdentifier(String value) {
this.value = value;
}
@Override
public boolean isIdentifier() {
return true;
}
public String value() {
return value;
}
}

View File

@ -0,0 +1,19 @@
package org.meteoinfo.ndarray.io.npy.dict;
final class PyInt implements PyValue {
private final long value;
PyInt(long value) {
this.value = value;
}
@Override
public boolean isInt() {
return true;
}
public long value() {
return value;
}
}

View File

@ -0,0 +1,18 @@
package org.meteoinfo.ndarray.io.npy.dict;
class PyNone implements PyValue {
private static final PyNone instance = new PyNone();
private PyNone() {
}
static PyNone get() {
return instance;
}
@Override
public boolean isNone() {
return true;
}
}

View File

@ -0,0 +1,19 @@
package org.meteoinfo.ndarray.io.npy.dict;
final class PyString implements PyValue {
private final String value;
PyString(String value) {
this.value = value;
}
@Override
public boolean isString() {
return true;
}
public String value() {
return value;
}
}

View File

@ -0,0 +1,33 @@
package org.meteoinfo.ndarray.io.npy.dict;
import java.util.Collection;
final class PyTuple implements PyValue {
private final PyValue[] values;
PyTuple(Collection<? extends PyValue> values) {
this.values = new PyValue[values.size()];
int i = 0;
for (PyValue value : values) {
this.values[i] = value;
i++;
}
}
@Override
public boolean isTuple() {
return true;
}
int size() {
return values.length;
}
PyValue at(int i) {
if (i < 0 || i >= values.length)
throw new IndexOutOfBoundsException(String.valueOf(i));
return values[i];
}
}

View File

@ -0,0 +1,60 @@
package org.meteoinfo.ndarray.io.npy.dict;
interface PyValue {
default boolean isNone() {
return false;
}
default PyNone asNone() {
return (PyNone) this;
}
default boolean isError() {
return false;
}
default PyError asError() {
return (PyError) this;
}
default boolean isDict() {
return false;
}
default PyDict asDict() {
return (PyDict) this;
}
default boolean isString() {
return false;
}
default PyString asString() {
return (PyString) this;
}
default boolean isInt() {
return false;
}
default PyInt asInt() {
return (PyInt) this;
}
default boolean isIdentifier() {
return false;
}
default PyIdentifier asIdentifier() {
return (PyIdentifier) this;
}
default boolean isTuple() {
return false;
}
default PyTuple asTuple() {
return (PyTuple) this;
}
}

View File

@ -0,0 +1,62 @@
package org.meteoinfo.ndarray.io.npy.dict;
import java.util.Optional;
class Token {
final int position;
final TokenType type;
final String value;
Token(int position, TokenType type, String value) {
this.position = position;
this.type = type;
this.value = value;
}
static Token error(int position, String value) {
return new Token(position, TokenType.ERROR, value);
}
static Token eof(int position) {
return new Token(position, TokenType.EOF, "EOF");
}
static Token string(int position, StringBuilder value) {
return new Token(position, TokenType.STRING, value.toString());
}
static Token identifier(int position, StringBuilder value) {
return new Token(position, TokenType.IDENTIFIER, value.toString());
}
static Token integer(int position, StringBuilder value) {
return new Token(position, TokenType.INTEGER, value.toString());
}
static Optional<Token> of(int position, char c) {
TokenType type = TokenType.of(c);
return type == null
? Optional.empty()
: Optional.of(new Token(position, type, Character.toString(c)));
}
boolean isEof() {
return type == TokenType.EOF;
}
@Override
public String toString() {
switch (type) {
case EOF:
return "EOF";
case ERROR:
return "ERROR: " + value;
case STRING:
return "'" + value + "'";
default:
return value;
}
}
}

View File

@ -0,0 +1,49 @@
package org.meteoinfo.ndarray.io.npy.dict;
enum TokenType {
IDENTIFIER,
STRING,
INTEGER,
COMMA,
COLON,
DICT_START,
DICT_END,
TUPLE_START,
TUPLE_END,
EOF,
ERROR;
/**
* Returns the single-character token type for the given character. Returns
* null if no such token type exists.
*/
static TokenType of(char c) {
switch (c) {
case ',':
return COMMA;
case ':':
return COLON;
case '{':
return DICT_START;
case '}':
return DICT_END;
case '(':
return TUPLE_START;
case ')':
return TUPLE_END;
default:
return null;
}
}
}