Introducing cache.invalidate() and invalidate_all() (#3276)

* Introducing cache.invalidate() and invalidate_all()

Resolves #3275

* Improve documentation of cache module

* Even more documentation
This commit is contained in:
Sean Gillies 2024-12-14 19:03:25 -07:00 committed by GitHub
parent bea623a5af
commit 64474c1795
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 73 additions and 1 deletions

View File

@ -6,6 +6,8 @@ Changes
New features:
- The cache module and its invalidate() and invalidate_all() functions
allow invalidation of responses in Rasterio's HTTP cache (#3276).
- The CRS class has a new, lazily computed, geodetic_crs property (#3218)
1.4.3 (2024-12-02)

View File

@ -1,4 +1,4 @@
# cython: boundscheck=False, c_string_type=unicode, c_string_encoding=utf8"""
# cython: boundscheck=False, c_string_type=unicode, c_string_encoding=utf8
"""Numpy-free base classes."""

54
rasterio/cache.pyx Normal file
View File

@ -0,0 +1,54 @@
# cython: c_string_type=unicode, c_string_encoding=utf8
"""Rasterio caches HTTP responses using GDAL's VSI CURL cache.
A global LRU cache of 16 MB shared among all downloaded content is
enabled by default, and content in it may be reused after a dataset has
been closed and reopened. Responses from FTP and HTTP servers, including
cloud storage like AWS S3 and Microsoft Azure, are stored in this cache.
Responses from HTTP servers used by Rasterio's Python openers are not.
The size of the cache and which responses are stored can be controlled
by GDAL's CPL_VSIL_CURL_CACHE_SIZE and CPL_VSIL_CURL_NON_CACHED
configuration options. For details See
https://gdal.org/en/latest/user/configoptions.html.
"""
include "gdal.pxi"
from rasterio._path import _parse_path
def invalidate(pattern):
"""Invalidate responses in Rasterio's HTTP cache
Parameters
----------
pattern : str
All responses beginning with this pattern will be invalidated.
Responses served from a particular website can be invalidated
using a pattern like "https://example.com". Responses served
from an S3 bucket can be invalidated using a pattern like
"s3://example.com", where "example.com" is the bucket name.
Invalidation can be made more selective by appending path
segments to the pattern. "s3://example.com/prefix" will
invalidate only responses served for requests for objects in the
"example.com" bucket that have a key beginning with "prefix".
Returns
-------
None
"""
path = _parse_path(pattern).as_vsi()
path = path.encode('utf-8')
VSICurlPartialClearCache(path)
def invalidate_all():
"""Invalidate all responses in Rasterio's HTTP cache
Returns
-------
None
"""
VSICurlClearCache()

View File

@ -146,6 +146,8 @@ cdef extern from "cpl_vsi.h" nogil:
size_t VSIFWriteL(void *buffer, size_t nSize, size_t nCount, VSILFILE *fp)
int VSIStatL(const char *pszFilename, VSIStatBufL *psStatBuf)
void VSICurlPartialClearCache(const char *)
void VSICurlClearCache()
IF (CTE_GDAL_MAJOR_VERSION, CTE_GDAL_MINOR_VERSION) >= (3, 9):
cdef extern from "cpl_vsi.h" nogil:

View File

@ -228,6 +228,7 @@ if "clean" not in sys.argv:
Extension("rasterio._err", ["rasterio/_err.pyx"], **ext_options),
Extension("rasterio._example", ["rasterio/_example.pyx"], **ext_options),
Extension("rasterio._version", ["rasterio/_version.pyx"], **ext_options),
Extension("rasterio.cache", ["rasterio/cache.pyx"], **ext_options),
Extension("rasterio.crs", ["rasterio/crs.pyx"], **ext_options),
Extension("rasterio.shutil", ["rasterio/shutil.pyx"], **ext_options),
Extension("rasterio._transform", ["rasterio/_transform.pyx"], **ext_options),

13
tests/test_cache.py Normal file
View File

@ -0,0 +1,13 @@
"""Tests of GDAL VSI cache invalidation."""
from rasterio import cache
def test_invalidate_all():
"""Cache is entirely invalidated."""
cache.invalidate_all()
def test_invalidate_pattern():
"""Cache is partially invalidated."""
cache.invalidate("https://example.com")