mirror of
https://github.com/rasterio/rasterio.git
synced 2025-12-08 17:36:12 +00:00
Improve compatibility of FilePath with GDAL's VSI system (#2856)
* Improve compatibility of FilePath with GDAL's VSI system Resolves #2850 * Pyparsing 3.1 has changes * Store and track file objects, not file wrappers
This commit is contained in:
parent
e2dcbcef13
commit
0560836d5f
@ -4,7 +4,10 @@ Changes
|
||||
1.3.8 (2023-06-26)
|
||||
------------------
|
||||
|
||||
- Prevent a crash when accessing the block shapes of a multidataset HDF5 file (#).
|
||||
- Rasterio's Python file VSI plugin is now compatible with GDAL VRTs such as
|
||||
the one used for boundless reads of datasets (#2856).
|
||||
- Prevent a crash when accessing the block shapes of a multidataset HDF5 file
|
||||
(#2859).
|
||||
- Add a workaround for a GDAL multithreading bug introduced in 3.6.0 (#2851).
|
||||
|
||||
1.3.7 (2023-05-22)
|
||||
|
||||
@ -81,7 +81,7 @@ except ImportError:
|
||||
have_vsi_plugin = False
|
||||
|
||||
__all__ = ['band', 'open', 'pad', 'Env', 'CRS']
|
||||
__version__ = "1.3.7dev"
|
||||
__version__ = "1.3.8dev"
|
||||
__gdal_version__ = gdal_version()
|
||||
__proj_version__ = ".".join([str(version) for version in get_proj_version()])
|
||||
__geos_version__ = ".".join([str(version) for version in get_geos_version()])
|
||||
|
||||
@ -69,7 +69,7 @@ cdef bytes FILESYSTEM_PREFIX_BYTES = FILESYSTEM_PREFIX.encode("ascii")
|
||||
# Currently the only way to "create" a file in the filesystem is to add
|
||||
# an entry to this dictionary. GDAL will then Open the path later.
|
||||
cdef _FILESYSTEM_INFO = {}
|
||||
|
||||
cdef _OPEN_FILE_OBJS = set()
|
||||
|
||||
cdef int install_filepath_plugin(VSIFilesystemPluginCallbacksStruct *callbacks_struct):
|
||||
"""Install handlers for python file-like objects if it isn't already installed."""
|
||||
@ -97,13 +97,33 @@ cdef void uninstall_filepath_plugin(VSIFilesystemPluginCallbacksStruct *callback
|
||||
|
||||
## Filesystem Functions
|
||||
|
||||
def clone_file_obj(fobj):
|
||||
"""Clone a filelike object.
|
||||
|
||||
Supports BytesIO, MemoryFile, fsspec files, and Python file objects.
|
||||
|
||||
"""
|
||||
if hasattr(fobj, "fs"):
|
||||
new_fobj = fobj.fs.open(fobj.path, fobj.mode)
|
||||
elif hasattr(fobj, "getbuffer"):
|
||||
new_fobj = fobj.__class__(fobj.getbuffer())
|
||||
else:
|
||||
new_fobj = open(fobj.name, fobj.mode)
|
||||
|
||||
return new_fobj
|
||||
|
||||
|
||||
cdef void* filepath_open(void *pUserData, const char *pszFilename, const char *pszAccess) with gil:
|
||||
"""Access existing open file-like object in the virtual filesystem.
|
||||
"""Access files in the virtual filesystem.
|
||||
|
||||
This function is mandatory in the GDAL Filesystem Plugin API.
|
||||
|
||||
This function returns clones of the file wrappers stored in
|
||||
_FILESYSTEM_INFO. GDAL may call this function multiple times per
|
||||
filename and each result must be seperately seekable.
|
||||
|
||||
"""
|
||||
cdef object file_wrapper
|
||||
cdef object file_obj
|
||||
|
||||
if pszAccess != b"r" and pszAccess != b"rb":
|
||||
log.error("FilePath is currently a read-only interface.")
|
||||
@ -115,36 +135,33 @@ cdef void* filepath_open(void *pUserData, const char *pszFilename, const char *p
|
||||
cdef dict filesystem_info = <object>pUserData
|
||||
|
||||
try:
|
||||
file_wrapper = filesystem_info[pszFilename]
|
||||
file_obj = clone_file_obj(filesystem_info[pszFilename])
|
||||
except KeyError:
|
||||
log.info("Object not found in virtual filesystem: filename=%r", pszFilename)
|
||||
return NULL
|
||||
|
||||
if not hasattr(file_wrapper, "_file_obj"):
|
||||
log.error("Unexpected file object found in FilePath filesystem.")
|
||||
return NULL
|
||||
return <void *>file_wrapper
|
||||
# Open file wrappers are kept in this set and removed when closed.
|
||||
_OPEN_FILE_OBJS.add(file_obj)
|
||||
|
||||
return <void *>file_obj
|
||||
|
||||
## File functions
|
||||
|
||||
cdef vsi_l_offset filepath_tell(void *pFile) with gil:
|
||||
cdef object file_wrapper = <object>pFile
|
||||
cdef object file_obj = file_wrapper._file_obj
|
||||
cdef object file_obj = <object>pFile
|
||||
cdef long pos = file_obj.tell()
|
||||
return <vsi_l_offset>pos
|
||||
|
||||
|
||||
cdef int filepath_seek(void *pFile, vsi_l_offset nOffset, int nWhence) except -1 with gil:
|
||||
cdef object file_wrapper = <object>pFile
|
||||
cdef object file_obj = file_wrapper._file_obj
|
||||
cdef object file_obj = <object>pFile
|
||||
# TODO: Add "seekable" check?
|
||||
file_obj.seek(nOffset, nWhence)
|
||||
return 0
|
||||
|
||||
|
||||
cdef size_t filepath_read(void *pFile, void *pBuffer, size_t nSize, size_t nCount) with gil:
|
||||
cdef object file_wrapper = <object>pFile
|
||||
cdef object file_obj = file_wrapper._file_obj
|
||||
cdef object file_obj = <object>pFile
|
||||
cdef bytes python_data = file_obj.read(nSize * nCount)
|
||||
cdef int num_bytes = len(python_data)
|
||||
# NOTE: We have to cast to char* first, otherwise Cython doesn't do the conversion properly
|
||||
@ -153,11 +170,8 @@ cdef size_t filepath_read(void *pFile, void *pBuffer, size_t nSize, size_t nCoun
|
||||
|
||||
|
||||
cdef int filepath_close(void *pFile) except -1 with gil:
|
||||
# Optional
|
||||
cdef object file_wrapper = <object>pFile
|
||||
cdef object file_obj = file_wrapper._file_obj
|
||||
file_obj.seek(0)
|
||||
_ = _FILESYSTEM_INFO.pop(file_wrapper._filepath_path, None)
|
||||
cdef object file_obj = <object>pFile
|
||||
_OPEN_FILE_OBJS.remove(file_obj)
|
||||
return 0
|
||||
|
||||
|
||||
@ -183,19 +197,18 @@ cdef class FilePathBase:
|
||||
# auxiliary files.
|
||||
self._dirname = dirname or str(uuid4())
|
||||
|
||||
if filename:
|
||||
# GDAL's SRTMHGT driver requires the filename to be "correct" (match
|
||||
# the bounds being written)
|
||||
self.name = "{0}{1}/{2}".format(FILESYSTEM_PREFIX, self._dirname, filename)
|
||||
else:
|
||||
self.name = "{0}{1}/{1}".format(FILESYSTEM_PREFIX, self._dirname)
|
||||
# GDAL's SRTMHGT driver requires the filename to be "correct" (match
|
||||
# the bounds being written).
|
||||
self._filename = filename or self._dirname
|
||||
|
||||
self.name = "{0}{1}/{2}".format(FILESYSTEM_PREFIX, self._dirname, self._filename)
|
||||
|
||||
self._path = self.name.encode('utf-8')
|
||||
self._filepath_path = self._path[len(FILESYSTEM_PREFIX):]
|
||||
self._file_obj = filelike_obj
|
||||
self.mode = "r"
|
||||
_FILESYSTEM_INFO[self._filepath_path] = self._file_obj
|
||||
self.closed = False
|
||||
_FILESYSTEM_INFO[self._filepath_path] = self
|
||||
|
||||
def exists(self):
|
||||
"""Test if the in-memory file exists.
|
||||
@ -234,4 +247,5 @@ cdef class FilePathBase:
|
||||
to the user.
|
||||
|
||||
"""
|
||||
_ = _FILESYSTEM_INFO.pop(self._filepath_path)
|
||||
self.closed = True
|
||||
|
||||
@ -1182,10 +1182,12 @@ cdef class MemoryFileBase:
|
||||
cdef VSILFILE *fp = NULL
|
||||
|
||||
if file_or_bytes:
|
||||
if hasattr(file_or_bytes, 'read'):
|
||||
if hasattr(file_or_bytes, "read"):
|
||||
initial_bytes = file_or_bytes.read()
|
||||
elif isinstance(file_or_bytes, bytes):
|
||||
initial_bytes = file_or_bytes
|
||||
elif hasattr(file_or_bytes, "itemsize"):
|
||||
initial_bytes = bytes(file_or_bytes)
|
||||
else:
|
||||
raise TypeError(
|
||||
"Constructor argument must be a file opened in binary "
|
||||
@ -1196,16 +1198,11 @@ cdef class MemoryFileBase:
|
||||
# Make an in-memory directory specific to this dataset to help organize
|
||||
# auxiliary files.
|
||||
self._dirname = dirname or str(uuid4())
|
||||
VSIMkdir("/vsimem/{0}".format(self._dirname).encode("utf-8"), 0666)
|
||||
self._filename = filename or f"{self._dirname}.{ext.lstrip('.')}"
|
||||
|
||||
if filename:
|
||||
# GDAL's SRTMHGT driver requires the filename to be "correct" (match
|
||||
# the bounds being written)
|
||||
self.name = "/vsimem/{0}/{1}".format(self._dirname, filename)
|
||||
else:
|
||||
# GDAL 2.1 requires a .zip extension for zipped files.
|
||||
self.name = "/vsimem/{0}/{0}.{1}".format(self._dirname, ext.lstrip('.'))
|
||||
VSIMkdir(f"/vsimem/{self._dirname}".encode('utf-8'), 0666)
|
||||
|
||||
self.name = f"/vsimem/{self._dirname}/{self._filename}"
|
||||
self._path = self.name.encode('utf-8')
|
||||
|
||||
self._initial_bytes = initial_bytes
|
||||
|
||||
@ -11,3 +11,4 @@ matplotlib
|
||||
numpy>=1.10
|
||||
snuggs~=1.4.0
|
||||
setuptools>=20.0
|
||||
pyparsing~=3.1
|
||||
|
||||
@ -10,6 +10,7 @@ import pytest
|
||||
import rasterio
|
||||
from rasterio.enums import MaskFlags
|
||||
from rasterio.shutil import copyfiles
|
||||
from rasterio.windows import Window
|
||||
|
||||
try:
|
||||
from rasterio.io import FilePath
|
||||
@ -52,6 +53,33 @@ def test_initial_bytes(rgb_file_object):
|
||||
with vsifile.open() as src:
|
||||
assert src.driver == 'GTiff'
|
||||
assert src.count == 3
|
||||
assert src.dtypes == ("uint8", "uint8", "uint8")
|
||||
assert src.read().shape == (3, 718, 791)
|
||||
|
||||
|
||||
def test_initial_bytes_boundless(rgb_file_object):
|
||||
"""FilePath contents can initialized from bytes and opened."""
|
||||
with FilePath(rgb_file_object) as vsifile:
|
||||
with vsifile.open() as src:
|
||||
assert src.driver == "GTiff"
|
||||
assert src.count == 3
|
||||
assert src.dtypes == ("uint8", "uint8", "uint8")
|
||||
assert src.read(window=Window(0, 0, 800, 800), boundless=True).shape == (
|
||||
3,
|
||||
800,
|
||||
800,
|
||||
)
|
||||
|
||||
|
||||
def test_filepath_vrt(rgb_file_object):
|
||||
"""A FilePath can be wrapped by a VRT."""
|
||||
from rasterio.vrt import _boundless_vrt_doc
|
||||
|
||||
with FilePath(rgb_file_object) as vsifile, vsifile.open() as dst:
|
||||
vrt_doc = _boundless_vrt_doc(dst)
|
||||
with rasterio.open(vrt_doc) as src:
|
||||
assert src.driver == "VRT"
|
||||
assert src.count == 3
|
||||
assert src.dtypes == ('uint8', 'uint8', 'uint8')
|
||||
assert src.read().shape == (3, 718, 791)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user