From 64474c1795d5d47c03e271386d21c3b244769946 Mon Sep 17 00:00:00 2001 From: Sean Gillies Date: Sat, 14 Dec 2024 19:03:25 -0700 Subject: [PATCH] Introducing cache.invalidate() and invalidate_all() (#3276) * Introducing cache.invalidate() and invalidate_all() Resolves #3275 * Improve documentation of cache module * Even more documentation --- CHANGES.txt | 2 ++ rasterio/_base.pyx | 2 +- rasterio/cache.pyx | 54 +++++++++++++++++++++++++++++++++++++++++++++ rasterio/gdal.pxi | 2 ++ setup.py | 1 + tests/test_cache.py | 13 +++++++++++ 6 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 rasterio/cache.pyx create mode 100644 tests/test_cache.py diff --git a/CHANGES.txt b/CHANGES.txt index ed0fca4a..3d23d43b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -6,6 +6,8 @@ Changes New features: +- The cache module and its invalidate() and invalidate_all() functions + allow invalidation of responses in Rasterio's HTTP cache (#3276). - The CRS class has a new, lazily computed, geodetic_crs property (#3218) 1.4.3 (2024-12-02) diff --git a/rasterio/_base.pyx b/rasterio/_base.pyx index 8562b7d4..79504863 100644 --- a/rasterio/_base.pyx +++ b/rasterio/_base.pyx @@ -1,4 +1,4 @@ -# cython: boundscheck=False, c_string_type=unicode, c_string_encoding=utf8""" +# cython: boundscheck=False, c_string_type=unicode, c_string_encoding=utf8 """Numpy-free base classes.""" diff --git a/rasterio/cache.pyx b/rasterio/cache.pyx new file mode 100644 index 00000000..ef3af74f --- /dev/null +++ b/rasterio/cache.pyx @@ -0,0 +1,54 @@ +# cython: c_string_type=unicode, c_string_encoding=utf8 + +"""Rasterio caches HTTP responses using GDAL's VSI CURL cache. + +A global LRU cache of 16 MB shared among all downloaded content is +enabled by default, and content in it may be reused after a dataset has +been closed and reopened. Responses from FTP and HTTP servers, including +cloud storage like AWS S3 and Microsoft Azure, are stored in this cache. +Responses from HTTP servers used by Rasterio's Python openers are not. + +The size of the cache and which responses are stored can be controlled +by GDAL's CPL_VSIL_CURL_CACHE_SIZE and CPL_VSIL_CURL_NON_CACHED +configuration options. For details See +https://gdal.org/en/latest/user/configoptions.html. +""" + +include "gdal.pxi" + +from rasterio._path import _parse_path + + +def invalidate(pattern): + """Invalidate responses in Rasterio's HTTP cache + + Parameters + ---------- + pattern : str + All responses beginning with this pattern will be invalidated. + Responses served from a particular website can be invalidated + using a pattern like "https://example.com". Responses served + from an S3 bucket can be invalidated using a pattern like + "s3://example.com", where "example.com" is the bucket name. + Invalidation can be made more selective by appending path + segments to the pattern. "s3://example.com/prefix" will + invalidate only responses served for requests for objects in the + "example.com" bucket that have a key beginning with "prefix". + + Returns + ------- + None + """ + path = _parse_path(pattern).as_vsi() + path = path.encode('utf-8') + VSICurlPartialClearCache(path) + + +def invalidate_all(): + """Invalidate all responses in Rasterio's HTTP cache + + Returns + ------- + None + """ + VSICurlClearCache() diff --git a/rasterio/gdal.pxi b/rasterio/gdal.pxi index b7762b57..7d12b333 100644 --- a/rasterio/gdal.pxi +++ b/rasterio/gdal.pxi @@ -146,6 +146,8 @@ cdef extern from "cpl_vsi.h" nogil: size_t VSIFWriteL(void *buffer, size_t nSize, size_t nCount, VSILFILE *fp) int VSIStatL(const char *pszFilename, VSIStatBufL *psStatBuf) + void VSICurlPartialClearCache(const char *) + void VSICurlClearCache() IF (CTE_GDAL_MAJOR_VERSION, CTE_GDAL_MINOR_VERSION) >= (3, 9): cdef extern from "cpl_vsi.h" nogil: diff --git a/setup.py b/setup.py index 25f45dea..3685452f 100755 --- a/setup.py +++ b/setup.py @@ -228,6 +228,7 @@ if "clean" not in sys.argv: Extension("rasterio._err", ["rasterio/_err.pyx"], **ext_options), Extension("rasterio._example", ["rasterio/_example.pyx"], **ext_options), Extension("rasterio._version", ["rasterio/_version.pyx"], **ext_options), + Extension("rasterio.cache", ["rasterio/cache.pyx"], **ext_options), Extension("rasterio.crs", ["rasterio/crs.pyx"], **ext_options), Extension("rasterio.shutil", ["rasterio/shutil.pyx"], **ext_options), Extension("rasterio._transform", ["rasterio/_transform.pyx"], **ext_options), diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 00000000..15ad4eec --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,13 @@ +"""Tests of GDAL VSI cache invalidation.""" + +from rasterio import cache + + +def test_invalidate_all(): + """Cache is entirely invalidated.""" + cache.invalidate_all() + + +def test_invalidate_pattern(): + """Cache is partially invalidated.""" + cache.invalidate("https://example.com")