diff --git a/CHANGES.txt b/CHANGES.txt index fc48361f..000d7f64 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -4,7 +4,10 @@ Changes 1.3.8 (2023-06-26) ------------------ -- Prevent a crash when accessing the block shapes of a multidataset HDF5 file (#). +- Rasterio's Python file VSI plugin is now compatible with GDAL VRTs such as + the one used for boundless reads of datasets (#2856). +- Prevent a crash when accessing the block shapes of a multidataset HDF5 file + (#2859). - Add a workaround for a GDAL multithreading bug introduced in 3.6.0 (#2851). 1.3.7 (2023-05-22) diff --git a/rasterio/__init__.py b/rasterio/__init__.py index d4b594bd..c5a87fd9 100644 --- a/rasterio/__init__.py +++ b/rasterio/__init__.py @@ -81,7 +81,7 @@ except ImportError: have_vsi_plugin = False __all__ = ['band', 'open', 'pad', 'Env', 'CRS'] -__version__ = "1.3.7dev" +__version__ = "1.3.8dev" __gdal_version__ = gdal_version() __proj_version__ = ".".join([str(version) for version in get_proj_version()]) __geos_version__ = ".".join([str(version) for version in get_geos_version()]) diff --git a/rasterio/_filepath.pyx b/rasterio/_filepath.pyx index 3a888782..bec03459 100644 --- a/rasterio/_filepath.pyx +++ b/rasterio/_filepath.pyx @@ -69,7 +69,7 @@ cdef bytes FILESYSTEM_PREFIX_BYTES = FILESYSTEM_PREFIX.encode("ascii") # Currently the only way to "create" a file in the filesystem is to add # an entry to this dictionary. GDAL will then Open the path later. cdef _FILESYSTEM_INFO = {} - +cdef _OPEN_FILE_OBJS = set() cdef int install_filepath_plugin(VSIFilesystemPluginCallbacksStruct *callbacks_struct): """Install handlers for python file-like objects if it isn't already installed.""" @@ -97,13 +97,33 @@ cdef void uninstall_filepath_plugin(VSIFilesystemPluginCallbacksStruct *callback ## Filesystem Functions +def clone_file_obj(fobj): + """Clone a filelike object. + + Supports BytesIO, MemoryFile, fsspec files, and Python file objects. + + """ + if hasattr(fobj, "fs"): + new_fobj = fobj.fs.open(fobj.path, fobj.mode) + elif hasattr(fobj, "getbuffer"): + new_fobj = fobj.__class__(fobj.getbuffer()) + else: + new_fobj = open(fobj.name, fobj.mode) + + return new_fobj + + cdef void* filepath_open(void *pUserData, const char *pszFilename, const char *pszAccess) with gil: - """Access existing open file-like object in the virtual filesystem. + """Access files in the virtual filesystem. This function is mandatory in the GDAL Filesystem Plugin API. + This function returns clones of the file wrappers stored in + _FILESYSTEM_INFO. GDAL may call this function multiple times per + filename and each result must be seperately seekable. + """ - cdef object file_wrapper + cdef object file_obj if pszAccess != b"r" and pszAccess != b"rb": log.error("FilePath is currently a read-only interface.") @@ -115,36 +135,33 @@ cdef void* filepath_open(void *pUserData, const char *pszFilename, const char *p cdef dict filesystem_info = pUserData try: - file_wrapper = filesystem_info[pszFilename] + file_obj = clone_file_obj(filesystem_info[pszFilename]) except KeyError: log.info("Object not found in virtual filesystem: filename=%r", pszFilename) return NULL - if not hasattr(file_wrapper, "_file_obj"): - log.error("Unexpected file object found in FilePath filesystem.") - return NULL - return file_wrapper + # Open file wrappers are kept in this set and removed when closed. + _OPEN_FILE_OBJS.add(file_obj) + + return file_obj ## File functions cdef vsi_l_offset filepath_tell(void *pFile) with gil: - cdef object file_wrapper = pFile - cdef object file_obj = file_wrapper._file_obj + cdef object file_obj = pFile cdef long pos = file_obj.tell() return pos cdef int filepath_seek(void *pFile, vsi_l_offset nOffset, int nWhence) except -1 with gil: - cdef object file_wrapper = pFile - cdef object file_obj = file_wrapper._file_obj + cdef object file_obj = pFile # TODO: Add "seekable" check? file_obj.seek(nOffset, nWhence) return 0 cdef size_t filepath_read(void *pFile, void *pBuffer, size_t nSize, size_t nCount) with gil: - cdef object file_wrapper = pFile - cdef object file_obj = file_wrapper._file_obj + cdef object file_obj = pFile cdef bytes python_data = file_obj.read(nSize * nCount) cdef int num_bytes = len(python_data) # NOTE: We have to cast to char* first, otherwise Cython doesn't do the conversion properly @@ -153,11 +170,8 @@ cdef size_t filepath_read(void *pFile, void *pBuffer, size_t nSize, size_t nCoun cdef int filepath_close(void *pFile) except -1 with gil: - # Optional - cdef object file_wrapper = pFile - cdef object file_obj = file_wrapper._file_obj - file_obj.seek(0) - _ = _FILESYSTEM_INFO.pop(file_wrapper._filepath_path, None) + cdef object file_obj = pFile + _OPEN_FILE_OBJS.remove(file_obj) return 0 @@ -183,19 +197,18 @@ cdef class FilePathBase: # auxiliary files. self._dirname = dirname or str(uuid4()) - if filename: - # GDAL's SRTMHGT driver requires the filename to be "correct" (match - # the bounds being written) - self.name = "{0}{1}/{2}".format(FILESYSTEM_PREFIX, self._dirname, filename) - else: - self.name = "{0}{1}/{1}".format(FILESYSTEM_PREFIX, self._dirname) + # GDAL's SRTMHGT driver requires the filename to be "correct" (match + # the bounds being written). + self._filename = filename or self._dirname + + self.name = "{0}{1}/{2}".format(FILESYSTEM_PREFIX, self._dirname, self._filename) self._path = self.name.encode('utf-8') self._filepath_path = self._path[len(FILESYSTEM_PREFIX):] self._file_obj = filelike_obj self.mode = "r" + _FILESYSTEM_INFO[self._filepath_path] = self._file_obj self.closed = False - _FILESYSTEM_INFO[self._filepath_path] = self def exists(self): """Test if the in-memory file exists. @@ -234,4 +247,5 @@ cdef class FilePathBase: to the user. """ + _ = _FILESYSTEM_INFO.pop(self._filepath_path) self.closed = True diff --git a/rasterio/_io.pyx b/rasterio/_io.pyx index 90bf5614..f6ee638d 100644 --- a/rasterio/_io.pyx +++ b/rasterio/_io.pyx @@ -1182,10 +1182,12 @@ cdef class MemoryFileBase: cdef VSILFILE *fp = NULL if file_or_bytes: - if hasattr(file_or_bytes, 'read'): + if hasattr(file_or_bytes, "read"): initial_bytes = file_or_bytes.read() elif isinstance(file_or_bytes, bytes): initial_bytes = file_or_bytes + elif hasattr(file_or_bytes, "itemsize"): + initial_bytes = bytes(file_or_bytes) else: raise TypeError( "Constructor argument must be a file opened in binary " @@ -1196,16 +1198,11 @@ cdef class MemoryFileBase: # Make an in-memory directory specific to this dataset to help organize # auxiliary files. self._dirname = dirname or str(uuid4()) - VSIMkdir("/vsimem/{0}".format(self._dirname).encode("utf-8"), 0666) + self._filename = filename or f"{self._dirname}.{ext.lstrip('.')}" - if filename: - # GDAL's SRTMHGT driver requires the filename to be "correct" (match - # the bounds being written) - self.name = "/vsimem/{0}/{1}".format(self._dirname, filename) - else: - # GDAL 2.1 requires a .zip extension for zipped files. - self.name = "/vsimem/{0}/{0}.{1}".format(self._dirname, ext.lstrip('.')) + VSIMkdir(f"/vsimem/{self._dirname}".encode('utf-8'), 0666) + self.name = f"/vsimem/{self._dirname}/{self._filename}" self._path = self.name.encode('utf-8') self._initial_bytes = initial_bytes diff --git a/requirements.txt b/requirements.txt index 15b76b9e..b6e2a000 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ matplotlib numpy>=1.10 snuggs~=1.4.0 setuptools>=20.0 +pyparsing~=3.1 diff --git a/tests/test_filepath.py b/tests/test_filepath.py index 1c11ca0e..d95dc9db 100644 --- a/tests/test_filepath.py +++ b/tests/test_filepath.py @@ -10,6 +10,7 @@ import pytest import rasterio from rasterio.enums import MaskFlags from rasterio.shutil import copyfiles +from rasterio.windows import Window try: from rasterio.io import FilePath @@ -52,6 +53,33 @@ def test_initial_bytes(rgb_file_object): with vsifile.open() as src: assert src.driver == 'GTiff' assert src.count == 3 + assert src.dtypes == ("uint8", "uint8", "uint8") + assert src.read().shape == (3, 718, 791) + + +def test_initial_bytes_boundless(rgb_file_object): + """FilePath contents can initialized from bytes and opened.""" + with FilePath(rgb_file_object) as vsifile: + with vsifile.open() as src: + assert src.driver == "GTiff" + assert src.count == 3 + assert src.dtypes == ("uint8", "uint8", "uint8") + assert src.read(window=Window(0, 0, 800, 800), boundless=True).shape == ( + 3, + 800, + 800, + ) + + +def test_filepath_vrt(rgb_file_object): + """A FilePath can be wrapped by a VRT.""" + from rasterio.vrt import _boundless_vrt_doc + + with FilePath(rgb_file_object) as vsifile, vsifile.open() as dst: + vrt_doc = _boundless_vrt_doc(dst) + with rasterio.open(vrt_doc) as src: + assert src.driver == "VRT" + assert src.count == 3 assert src.dtypes == ('uint8', 'uint8', 'uint8') assert src.read().shape == (3, 718, 791)