Yo, I hear you like parentheses...

Old syntax traded in for an easy to parse lisp-like syntax.
No need for a temporary variable and much more secure.
This commit is contained in:
Sean Gillies 2015-02-12 13:52:40 -07:00
parent c6d8bfeb17
commit 5dbcc6f40f
5 changed files with 71 additions and 135 deletions

View File

@ -8,7 +8,7 @@ import sys
import warnings
import click
from cligj import files_inout_arg
import parsnip
import rasterio
from rasterio.rio.cli import cli
@ -22,6 +22,9 @@ from rasterio.rio.cli import cli
type=click.Path(resolve_path=False),
required=True,
metavar="INPUTS... OUTPUT")
@click.option('--name', multiple=True,
help='Specify an input file with a unique short (alphas only) name '
'for use in commands like "a=tests/data/RGB.byte.tif".')
@click.option('--dtype',
type=click.Choice([
'ubyte', 'uint8', 'uint16', 'int16', 'uint32',
@ -29,23 +32,44 @@ from rasterio.rio.cli import cli
default='float64',
help="Output data type (default: float64).")
@click.pass_context
def calc(ctx, command, files, dtype):
def calc(ctx, command, files, name, dtype):
"""A raster data calculator
Applies one or more commands to a set of input datasets and writes
the results to a new dataset.
Evaluates an expression using input datasets and writes the result
to a new dataset.
Command syntax is a work in progress. Currently:
* {n} represents the n-th input dataset (a 3-D array)
* {n,m} represents the m-th band of the n-th dataset (a 2-D array).
* Standard numpy array operators (+, -, *, /) are available.
* Multiple commands delimited by ; may be executed.
* The result of the previous command is represented by {}.
* When the final result is a tuple of arrays, a multi band output
file is written.
* When the final result is a single array, a single band output
file is written.
Command syntax is lisp-like. An expression consists of an operator
or function name and one or more strings, numbers, or expressions
enclosed in parentheses. Functions include ``ra`` (gets a raster
array) and ``list`` (makes a list of arrays).
\b
* (ra i) evaluates to the i-th input dataset (a 3-D array).
* (ra i j) evaluates to the j-th band of the i-th dataset (a 2-D
array).
* (ra foo j) evaluates to the j-th band of a dataset named foo (see
help on the --name option above).
* Standard numpy array operators (+, -, *, /) are available.
* When the final result is a list of arrays, a multi band output
file is written.
* When the final result is a single array, a single band output
file is written.
Example:
\b
$ rio calc "(+ (* (ra 1) 0.95) 2)" tests/data/RGB.byte.tif \\
> /tmp/out.tif --dtype ubyte
Produces a 3-band GeoTIFF with all values scaled by 0.95 and
incremented by 2.
\b
$ rio calc "(list (+ (ra 1) 125) (ra 1) (ra 1))" \\
> tests/data/shade.tif /tmp/out.tif --dtype ubyte
Produces a 3-band RGB GeoTIFF, with red levels incremented by 125,
from the single-band input.
"""
import numpy as np
@ -56,74 +80,34 @@ def calc(ctx, command, files, dtype):
try:
with rasterio.drivers(CPL_DEBUG=verbosity>2):
output = files[-1]
files = files[:-1]
with rasterio.open(files[0]) as first:
inputs = (
[tuple(n.split('=')) for n in name] +
[(None, n) for n in files[:-1]])
with rasterio.open(inputs[0][1]) as first:
kwargs = first.meta
kwargs['transform'] = kwargs.pop('affine')
kwargs['dtype'] = dtype
names = []
sources = []
for path in files:
ctxkwds = {}
for name, path in inputs:
with rasterio.open(path) as src:
names.append(src.name)
# Using the class method instead of instance method.
# Latter raises
#
# TypeError: astype() got an unexpected keyword argument 'copy'
# Possibly something to do with the instance being a masked
#
# possibly something to do with the instance being a masked
# array.
sources.append(
np.ndarray.astype(src.read(), 'float64', copy=False))
ctxkwds[name or src.name] = np.ndarray.astype(
src.read(), 'float64', copy=False)
#sources = np.ma.asanyarray([s for s in sources])
with parsnip.ctx(**ctxkwds):
res = parsnip.handleLine(command)
parts = command.split(';')
_prev = None
def cmd_sources(match):
text = match.group(1)
parts = text.split(',')
v = parts.pop(0)
if v in names:
a = names.index(v)
s = 'sources[%d]' % a
if parts:
s += '[%d]' % (int(parts.pop(0)) - 1)
return s
for part in filter(lambda p: p.strip(), parts):
# TODO: implement a real parser for calc expressions,
# perhaps using numexpr's parser as a guide, instead
# eval'ing any string.
# Translate '{}' to '_prev'.
cmd = re.sub(r'{}', '_prev', part)
cmd = re.sub(
r'{(\d+),(\d+)}',
lambda m: 'sources[%d][%d]' % (
int(m.group(1))-1,
int(m.group(2))-1),
cmd)
cmd = re.sub(
r'{(\d+)}',
lambda m: 'sources[%d]' % (int(m.group(1))-1),
cmd)
cmd = re.sub(r'{(.+)}', cmd_sources, cmd)
logger.debug("Translated cmd: %r", cmd)
res = eval(cmd)
_prev = res
if isinstance(res, tuple) or len(res.shape) == 3:
results = np.asanyarray([
np.ndarray.astype(r, dtype, copy=False
) for r in res])
if len(res.shape) == 3:
results = np.ndarray.astype(res, dtype, copy=False)
else:
results = np.asanyarray(
[np.ndarray.astype(res, dtype, copy=False)])

View File

@ -5,6 +5,7 @@ cython>=0.20
delocate
enum34
numpy>=1.8.0
git+git://github.com/mapbox/parsnip#egg=parsnip
pytest
setuptools>=0.9.8
wheel

View File

@ -2,4 +2,5 @@ affine
cligj
enum34
numpy>=1.8.0
parsnip
setuptools

View File

@ -169,7 +169,8 @@ with open('README.rst') as f:
inst_reqs = [
'affine>=1.0',
'cligj',
'Numpy>=1.7' ]
'Numpy>=1.7',
'parsnip']
if sys.version_info < (3, 4):
inst_reqs.append('enum34')

View File

@ -14,7 +14,7 @@ def test_err(tmpdir):
outfile = str(tmpdir.join('out.tif'))
runner = CliRunner()
result = runner.invoke(calc, [
'0.10*{1}.upper()', 'tests/data/shade.tif', outfile],
'(& 0.1 (ra 1))', 'tests/data/shade.tif', outfile],
catch_exceptions=False)
assert result.exit_code == 1
@ -23,7 +23,7 @@ def test_multiband_calc(tmpdir):
outfile = str(tmpdir.join('out.tif'))
runner = CliRunner()
result = runner.invoke(calc, [
'0.10*{1} + 125', 'tests/data/shade.tif', outfile],
'(+ 125 (* 0.1 (ra 1)))', 'tests/data/shade.tif', outfile],
catch_exceptions=False)
assert result.exit_code == 0
with rasterio.open(outfile) as src:
@ -33,11 +33,13 @@ def test_multiband_calc(tmpdir):
assert data.min() == 125
def test_singleband_calc(tmpdir):
def test_singleband_calc_byindex(tmpdir):
outfile = str(tmpdir.join('out.tif'))
runner = CliRunner()
result = runner.invoke(calc, [
'0.10*{1,1} + 125', 'tests/data/shade.tif', outfile],
'(+ 125 (* 0.1 (ra 1 1)))',
'tests/data/shade.tif',
outfile],
catch_exceptions=False)
assert result.exit_code == 0
with rasterio.open(outfile) as src:
@ -47,12 +49,12 @@ def test_singleband_calc(tmpdir):
assert data.min() == 125
def test_singleband_calc_by_name(tmpdir):
def test_singleband_calc_byname(tmpdir):
outfile = str(tmpdir.join('out.tif'))
runner = CliRunner()
result = runner.invoke(calc, [
'0.10*{tests/data/shade.tif,1} + 125',
'tests/data/shade.tif',
'(+ 125 (* 0.1 (ra shade 1)))',
'--name', 'shade=tests/data/shade.tif',
outfile],
catch_exceptions=False)
assert result.exit_code == 0
@ -69,7 +71,7 @@ def test_parts_calc(tmpdir):
outfile = str(tmpdir.join('out.tif'))
runner = CliRunner()
result = runner.invoke(calc, [
'({1,1} + 125, {1,1}, {1,1})',
'(list (+ (ra 1 1) 125) (ra 1 1) (ra 1 1))',
'--dtype', 'uint8',
'tests/data/shade.tif',
outfile],
@ -89,7 +91,7 @@ def test_parts_calc_2(tmpdir):
outfile = str(tmpdir.join('out.tif'))
runner = CliRunner()
result = runner.invoke(calc, [
'({1,1} + {1,2} + {1,3})/3',
'(+ (+ (/ (ra 1 1) 3) (/ (ra 1 2) 3)) (/ (ra 1 3) 3))',
'--dtype', 'uint8',
'tests/data/RGB.byte.tif',
outfile],
@ -102,64 +104,11 @@ def test_parts_calc_2(tmpdir):
assert round(data.mean(), 1) == 60.3
def test_parts_calc_tempval(tmpdir):
# Produce greyscale output from the RGB file.
outfile = str(tmpdir.join('out.tif'))
runner = CliRunner()
result = runner.invoke(calc, [
'{1,1}; {} + {1,2}; {} + {1,3}; {}; ({}/3)',
'--dtype', 'uint8',
'tests/data/RGB.byte.tif',
outfile],
catch_exceptions=False)
assert result.exit_code == 0
with rasterio.open(outfile) as src:
assert src.count == 1
assert src.meta['dtype'] == 'uint8'
data = src.read()
assert round(data.mean(), 1) == 60.3
def test_copy_rgb(tmpdir):
outfile = str(tmpdir.join('out.tif'))
runner = CliRunner()
result = runner.invoke(calc, [
'{1}',
'--dtype', 'uint8',
'tests/data/RGB.byte.tif',
outfile],
catch_exceptions=False)
assert result.exit_code == 0
with rasterio.open(outfile) as src:
assert src.count == 3
assert src.meta['dtype'] == 'uint8'
data = src.read()
assert round(data.mean(), 1) == 60.6
def test_copy_rgb_tempval(tmpdir):
outfile = str(tmpdir.join('out.tif'))
runner = CliRunner()
result = runner.invoke(calc, [
'{1}; {}',
'--dtype', 'uint8',
'tests/data/RGB.byte.tif',
outfile],
catch_exceptions=False)
assert result.exit_code == 0
with rasterio.open(outfile) as src:
assert src.count == 3
assert src.meta['dtype'] == 'uint8'
data = src.read()
assert round(data.mean(), 1) == 60.6
def test_copy_rgb_by_name(tmpdir):
outfile = str(tmpdir.join('out.tif'))
runner = CliRunner()
result = runner.invoke(calc, [
'{tests/data/RGB.byte.tif}',
'(ra 1)',
'--dtype', 'uint8',
'tests/data/RGB.byte.tif',
outfile],