From 5dbcc6f40f70b51a02202ccb48d82ae97e448595 Mon Sep 17 00:00:00 2001 From: Sean Gillies Date: Thu, 12 Feb 2015 13:52:40 -0700 Subject: [PATCH] Yo, I hear you like parentheses... Old syntax traded in for an easy to parse lisp-like syntax. No need for a temporary variable and much more secure. --- rasterio/rio/calc.py | 126 ++++++++++++++++++----------------------- requirements-dev.txt | 1 + requirements.txt | 1 + setup.py | 3 +- tests/test_rio_calc.py | 75 ++++-------------------- 5 files changed, 71 insertions(+), 135 deletions(-) diff --git a/rasterio/rio/calc.py b/rasterio/rio/calc.py index c2de9e30..5ccc9b79 100644 --- a/rasterio/rio/calc.py +++ b/rasterio/rio/calc.py @@ -8,7 +8,7 @@ import sys import warnings import click -from cligj import files_inout_arg +import parsnip import rasterio from rasterio.rio.cli import cli @@ -22,6 +22,9 @@ from rasterio.rio.cli import cli type=click.Path(resolve_path=False), required=True, metavar="INPUTS... OUTPUT") +@click.option('--name', multiple=True, + help='Specify an input file with a unique short (alphas only) name ' + 'for use in commands like "a=tests/data/RGB.byte.tif".') @click.option('--dtype', type=click.Choice([ 'ubyte', 'uint8', 'uint16', 'int16', 'uint32', @@ -29,23 +32,44 @@ from rasterio.rio.cli import cli default='float64', help="Output data type (default: float64).") @click.pass_context -def calc(ctx, command, files, dtype): +def calc(ctx, command, files, name, dtype): """A raster data calculator - Applies one or more commands to a set of input datasets and writes - the results to a new dataset. + Evaluates an expression using input datasets and writes the result + to a new dataset. - Command syntax is a work in progress. Currently: - - * {n} represents the n-th input dataset (a 3-D array) - * {n,m} represents the m-th band of the n-th dataset (a 2-D array). - * Standard numpy array operators (+, -, *, /) are available. - * Multiple commands delimited by ; may be executed. - * The result of the previous command is represented by {}. - * When the final result is a tuple of arrays, a multi band output - file is written. - * When the final result is a single array, a single band output - file is written. + Command syntax is lisp-like. An expression consists of an operator + or function name and one or more strings, numbers, or expressions + enclosed in parentheses. Functions include ``ra`` (gets a raster + array) and ``list`` (makes a list of arrays). + + \b + * (ra i) evaluates to the i-th input dataset (a 3-D array). + * (ra i j) evaluates to the j-th band of the i-th dataset (a 2-D + array). + * (ra foo j) evaluates to the j-th band of a dataset named foo (see + help on the --name option above). + * Standard numpy array operators (+, -, *, /) are available. + * When the final result is a list of arrays, a multi band output + file is written. + * When the final result is a single array, a single band output + file is written. + + Example: + + \b + $ rio calc "(+ (* (ra 1) 0.95) 2)" tests/data/RGB.byte.tif \\ + > /tmp/out.tif --dtype ubyte + + Produces a 3-band GeoTIFF with all values scaled by 0.95 and + incremented by 2. + + \b + $ rio calc "(list (+ (ra 1) 125) (ra 1) (ra 1))" \\ + > tests/data/shade.tif /tmp/out.tif --dtype ubyte + + Produces a 3-band RGB GeoTIFF, with red levels incremented by 125, + from the single-band input. """ import numpy as np @@ -56,74 +80,34 @@ def calc(ctx, command, files, dtype): try: with rasterio.drivers(CPL_DEBUG=verbosity>2): output = files[-1] - files = files[:-1] - with rasterio.open(files[0]) as first: + inputs = ( + [tuple(n.split('=')) for n in name] + + [(None, n) for n in files[:-1]]) + + with rasterio.open(inputs[0][1]) as first: kwargs = first.meta kwargs['transform'] = kwargs.pop('affine') kwargs['dtype'] = dtype - names = [] - sources = [] - for path in files: + ctxkwds = {} + for name, path in inputs: with rasterio.open(path) as src: - names.append(src.name) # Using the class method instead of instance method. # Latter raises + # # TypeError: astype() got an unexpected keyword argument 'copy' - # Possibly something to do with the instance being a masked + # + # possibly something to do with the instance being a masked # array. - sources.append( - np.ndarray.astype(src.read(), 'float64', copy=False)) + ctxkwds[name or src.name] = np.ndarray.astype( + src.read(), 'float64', copy=False) - #sources = np.ma.asanyarray([s for s in sources]) + with parsnip.ctx(**ctxkwds): + res = parsnip.handleLine(command) - parts = command.split(';') - _prev = None - - def cmd_sources(match): - text = match.group(1) - parts = text.split(',') - v = parts.pop(0) - if v in names: - a = names.index(v) - s = 'sources[%d]' % a - if parts: - s += '[%d]' % (int(parts.pop(0)) - 1) - return s - - for part in filter(lambda p: p.strip(), parts): - - # TODO: implement a real parser for calc expressions, - # perhaps using numexpr's parser as a guide, instead - # eval'ing any string. - - # Translate '{}' to '_prev'. - cmd = re.sub(r'{}', '_prev', part) - - cmd = re.sub( - r'{(\d+),(\d+)}', - lambda m: 'sources[%d][%d]' % ( - int(m.group(1))-1, - int(m.group(2))-1), - cmd) - - cmd = re.sub( - r'{(\d+)}', - lambda m: 'sources[%d]' % (int(m.group(1))-1), - cmd) - - cmd = re.sub(r'{(.+)}', cmd_sources, cmd) - - logger.debug("Translated cmd: %r", cmd) - - res = eval(cmd) - _prev = res - - if isinstance(res, tuple) or len(res.shape) == 3: - results = np.asanyarray([ - np.ndarray.astype(r, dtype, copy=False - ) for r in res]) + if len(res.shape) == 3: + results = np.ndarray.astype(res, dtype, copy=False) else: results = np.asanyarray( [np.ndarray.astype(res, dtype, copy=False)]) diff --git a/requirements-dev.txt b/requirements-dev.txt index 4d9ff531..b3da13eb 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,6 +5,7 @@ cython>=0.20 delocate enum34 numpy>=1.8.0 +git+git://github.com/mapbox/parsnip#egg=parsnip pytest setuptools>=0.9.8 wheel diff --git a/requirements.txt b/requirements.txt index 66a3935a..a22fd8e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ affine cligj enum34 numpy>=1.8.0 +parsnip setuptools diff --git a/setup.py b/setup.py index 1788e526..778a6d31 100755 --- a/setup.py +++ b/setup.py @@ -169,7 +169,8 @@ with open('README.rst') as f: inst_reqs = [ 'affine>=1.0', 'cligj', - 'Numpy>=1.7' ] + 'Numpy>=1.7', + 'parsnip'] if sys.version_info < (3, 4): inst_reqs.append('enum34') diff --git a/tests/test_rio_calc.py b/tests/test_rio_calc.py index ee461f5a..0248ae8b 100644 --- a/tests/test_rio_calc.py +++ b/tests/test_rio_calc.py @@ -14,7 +14,7 @@ def test_err(tmpdir): outfile = str(tmpdir.join('out.tif')) runner = CliRunner() result = runner.invoke(calc, [ - '0.10*{1}.upper()', 'tests/data/shade.tif', outfile], + '(& 0.1 (ra 1))', 'tests/data/shade.tif', outfile], catch_exceptions=False) assert result.exit_code == 1 @@ -23,7 +23,7 @@ def test_multiband_calc(tmpdir): outfile = str(tmpdir.join('out.tif')) runner = CliRunner() result = runner.invoke(calc, [ - '0.10*{1} + 125', 'tests/data/shade.tif', outfile], + '(+ 125 (* 0.1 (ra 1)))', 'tests/data/shade.tif', outfile], catch_exceptions=False) assert result.exit_code == 0 with rasterio.open(outfile) as src: @@ -33,11 +33,13 @@ def test_multiband_calc(tmpdir): assert data.min() == 125 -def test_singleband_calc(tmpdir): +def test_singleband_calc_byindex(tmpdir): outfile = str(tmpdir.join('out.tif')) runner = CliRunner() result = runner.invoke(calc, [ - '0.10*{1,1} + 125', 'tests/data/shade.tif', outfile], + '(+ 125 (* 0.1 (ra 1 1)))', + 'tests/data/shade.tif', + outfile], catch_exceptions=False) assert result.exit_code == 0 with rasterio.open(outfile) as src: @@ -47,12 +49,12 @@ def test_singleband_calc(tmpdir): assert data.min() == 125 -def test_singleband_calc_by_name(tmpdir): +def test_singleband_calc_byname(tmpdir): outfile = str(tmpdir.join('out.tif')) runner = CliRunner() result = runner.invoke(calc, [ - '0.10*{tests/data/shade.tif,1} + 125', - 'tests/data/shade.tif', + '(+ 125 (* 0.1 (ra shade 1)))', + '--name', 'shade=tests/data/shade.tif', outfile], catch_exceptions=False) assert result.exit_code == 0 @@ -69,7 +71,7 @@ def test_parts_calc(tmpdir): outfile = str(tmpdir.join('out.tif')) runner = CliRunner() result = runner.invoke(calc, [ - '({1,1} + 125, {1,1}, {1,1})', + '(list (+ (ra 1 1) 125) (ra 1 1) (ra 1 1))', '--dtype', 'uint8', 'tests/data/shade.tif', outfile], @@ -89,7 +91,7 @@ def test_parts_calc_2(tmpdir): outfile = str(tmpdir.join('out.tif')) runner = CliRunner() result = runner.invoke(calc, [ - '({1,1} + {1,2} + {1,3})/3', + '(+ (+ (/ (ra 1 1) 3) (/ (ra 1 2) 3)) (/ (ra 1 3) 3))', '--dtype', 'uint8', 'tests/data/RGB.byte.tif', outfile], @@ -102,64 +104,11 @@ def test_parts_calc_2(tmpdir): assert round(data.mean(), 1) == 60.3 -def test_parts_calc_tempval(tmpdir): - # Produce greyscale output from the RGB file. - outfile = str(tmpdir.join('out.tif')) - runner = CliRunner() - result = runner.invoke(calc, [ - '{1,1}; {} + {1,2}; {} + {1,3}; {}; ({}/3)', - '--dtype', 'uint8', - 'tests/data/RGB.byte.tif', - outfile], - catch_exceptions=False) - assert result.exit_code == 0 - with rasterio.open(outfile) as src: - assert src.count == 1 - assert src.meta['dtype'] == 'uint8' - data = src.read() - assert round(data.mean(), 1) == 60.3 - - - def test_copy_rgb(tmpdir): outfile = str(tmpdir.join('out.tif')) runner = CliRunner() result = runner.invoke(calc, [ - '{1}', - '--dtype', 'uint8', - 'tests/data/RGB.byte.tif', - outfile], - catch_exceptions=False) - assert result.exit_code == 0 - with rasterio.open(outfile) as src: - assert src.count == 3 - assert src.meta['dtype'] == 'uint8' - data = src.read() - assert round(data.mean(), 1) == 60.6 - - -def test_copy_rgb_tempval(tmpdir): - outfile = str(tmpdir.join('out.tif')) - runner = CliRunner() - result = runner.invoke(calc, [ - '{1}; {}', - '--dtype', 'uint8', - 'tests/data/RGB.byte.tif', - outfile], - catch_exceptions=False) - assert result.exit_code == 0 - with rasterio.open(outfile) as src: - assert src.count == 3 - assert src.meta['dtype'] == 'uint8' - data = src.read() - assert round(data.mean(), 1) == 60.6 - - -def test_copy_rgb_by_name(tmpdir): - outfile = str(tmpdir.join('out.tif')) - runner = CliRunner() - result = runner.invoke(calc, [ - '{tests/data/RGB.byte.tif}', + '(ra 1)', '--dtype', 'uint8', 'tests/data/RGB.byte.tif', outfile],