From b6e75dacb1d35e345e8a43fb1660bbd18c07f08a Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Wed, 14 Dec 2011 18:40:54 -0800 Subject: [PATCH] function parsing in template string parser (#231) --- beets/util/functemplate.py | 135 ++++++++++++++++++++++++++++++------- test/test_template.py | 82 ++++++++++++++++++++-- 2 files changed, 184 insertions(+), 33 deletions(-) diff --git a/beets/util/functemplate.py b/beets/util/functemplate.py index aa01ad655..273c2e799 100644 --- a/beets/util/functemplate.py +++ b/beets/util/functemplate.py @@ -69,8 +69,8 @@ class Call(object): self.original = original def __repr__(self): - return u'Call(%s, %s, %s)' % (repr(self.ident, self.args, - self.original)) + return u'Call(%s, %s, %s)' % (repr(self.ident), repr(self.args), + repr(self.original)) def evaluate(self, env): """Evaluate the function call in the environment, returning a @@ -81,6 +81,28 @@ class Call(object): else: return self.original +class Expression(object): + """Top-level template construct: contains a list of text blobs, + Symbols, and Calls. + """ + def __init__(self, parts): + self.parts = parts + + def __repr__(self): + return u'Expression(%s)' % (repr(self.parts)) + + def evaluate(self, env): + """Evaluate the entire expression in the environment, returning + a Unicode string. + """ + out = [] + for part in self.parts: + if isinstance(part, basestring): + out.append(part) + else: + out.append(part.evaluate(env)) + return u''.join(out) + class ParseError(Exception): pass @@ -98,7 +120,7 @@ class Parser(object): self.pos = 0 self.parts = [] - def parse_template(self): + def parse_expression(self): """Parse a template expression starting at ``pos``. Resulting components (Unicode strings, Symbols, and Calls) are added to the ``parts`` field, a list. The ``pos`` field is updated to be @@ -120,11 +142,13 @@ class Parser(object): continue if self.pos == len(self.string) - 1: - # The last character can never begin a structure, so we just - # interpret it as a literal character. - text_parts.append(char) - self.pos += 1 - continue + # The last character can never begin a structure, so we + # just interpret it as a literal character (unless it + # terminates the expression, as with , and }). + if char not in (GROUP_CLOSE, ARG_SEP): + text_parts.append(char) + self.pos += 1 + break next_char = self.string[self.pos + 1] if char == next_char: @@ -203,6 +227,77 @@ class Parser(object): # A standalone $. self.parts.append(SYMBOL_DELIM) + def parse_call(self): + """Parse a function call (like ``%foo{bar,baz}``) starting at + ``pos``. Possibly appends a Call object to ``parts`` and update + ``pos``. The character at ``pos`` must be ``%``. + """ + assert self.pos < len(self.string) + assert self.string[self.pos] == FUNC_DELIM + + start_pos = self.pos + self.pos += 1 + + ident = self._parse_ident() + if not ident: + # No function name. + self.parts.append(FUNC_DELIM) + return + + if self.pos >= len(self.string): + # Identifier terminates string. + self.parts.append(self.string[start_pos:self.pos]) + return + + if self.string[self.pos] != GROUP_OPEN: + # Argument list not opened. + self.parts.append(self.string[start_pos:self.pos]) + return + + # Skip past opening brace and try to parse an argument list. + self.pos += 1 + args = self.parse_argument_list() + if self.pos >= len(self.string) or \ + self.string[self.pos] != GROUP_CLOSE: + # Arguments unclosed. + self.parts.append(self.string[start_pos:self.pos]) + return + + self.pos += 1 # Move past closing brace. + self.parts.append(Call(ident, args, self.string[start_pos:self.pos])) + + def parse_argument_list(self): + """Parse a list of arguments starting at ``pos``, returning a + list of Expression objects. Does not modify ``parts``. Should + leave ``pos`` pointing to a } character or the end of the + string. + """ + # Try to parse a subexpression in a subparser. + expressions = [] + + while self.pos < len(self.string) and \ + self.string[self.pos] != GROUP_CLOSE: + subparser = Parser(self.string[self.pos:]) + subparser.parse_expression() + if subparser.pos == 0: + # No expression could be parsed. + break + + # Extract and advance past the parsed expression. + expressions.append(Expression(subparser.parts)) + self.pos += subparser.pos + + if self.pos >= len(self.string) or \ + self.string[self.pos] == GROUP_CLOSE: + # Argument list terminated by EOF or closing brace. + break + + # Only other way to terminate an expression is with ,. + assert self.string[self.pos] == ARG_SEP + self.pos += 1 + + return expressions + def _parse_ident(self): """Parse an identifier and return it (possibly an empty string). Updates ``pos``. @@ -213,38 +308,26 @@ class Parser(object): return ident def _parse(template): - """Parse a top-level template string expression, returning a list of - nodes. Any extraneous text is considered literal text. + """Parse a top-level template string Expression. Any extraneous text + is considered literal text. """ parser = Parser(template) - parser.parse_template() + parser.parse_expression() parts = parser.parts remainder = parser.string[parser.pos:] if remainder: parts.append(remainder) - return parts + return Expression(parts) class Template(object): """A string template, including text, Symbols, and Calls. """ def __init__(self, template): - self.parts = _parse(template) + self.expr = _parse(template) self.original = template - def evaluate(self, env): - """Evaluate the entire template in the environment, returning a - Unicode string. - """ - out = [] - for part in self.parts: - if isinstance(part, basestring): - out.append(part) - else: - out.append(part.evaluate(env)) - return u''.join(out) - def substitute(self, values={}, functions={}): """Evaluate the template given the values and functions. """ - return self.evaluate(Environment(values, functions)) + return self.expr.evaluate(Environment(values, functions)) diff --git a/test/test_template.py b/test/test_template.py index 3841d7876..29ee9795d 100644 --- a/test/test_template.py +++ b/test/test_template.py @@ -19,13 +19,13 @@ import unittest import _common from beets.util import functemplate -def _normparse(text): - """Parse a template and then normalize the result, collapsing - multiple adjacent text blocks and removing empty text blocks. - Generates a sequence of parts. +def _normexpr(expr): + """Normalize an Expression object's parts, collapsing multiple + adjacent text blocks and removing empty text blocks. Generates a + sequence of parts. """ textbuf = [] - for part in functemplate._parse(text): + for part in expr.parts: if isinstance(part, basestring): textbuf.append(part) else: @@ -40,6 +40,10 @@ def _normparse(text): if text: yield text +def _normparse(text): + """Parse a template and then normalize the resulting Expression.""" + return _normexpr(functemplate._parse(text)) + class ParseTest(unittest.TestCase): def test_empty_string(self): self.assertEqual(list(_normparse(u'')), []) @@ -53,6 +57,19 @@ class ParseTest(unittest.TestCase): u"wrong identifier: %s vs. %s" % (repr(obj.ident), repr(ident))) + def _assert_call(self, obj, ident, numargs): + """Assert that an object is a Call with the given identifier and + argument count. + """ + self.assertTrue(isinstance(obj, functemplate.Call), + u"not a Call: %s" % repr(obj)) + self.assertEqual(obj.ident, ident, + u"wrong identifier: %s vs. %s" % + (repr(obj.ident), repr(ident))) + self.assertEqual(len(obj.args), numargs, + u"wrong argument count in %s: %i vs. %i" % + (repr(obj.ident), len(obj.args), numargs)) + def test_plain_text(self): self.assertEqual(list(_normparse(u'hello world')), [u'hello world']) @@ -78,7 +95,7 @@ class ParseTest(unittest.TestCase): self.assertEqual(list(_normparse(u'a {{ b')), [u'a { b']) def test_escaped_close_brace(self): - self.assertEqual(list(_normparse(u'a } b')), [u'a } b']) + self.assertEqual(list(_normparse(u'a }} b')), [u'a } b']) def test_bare_value_delim_kept_intact(self): self.assertEqual(list(_normparse(u'a $ b')), [u'a $ b']) @@ -86,6 +103,15 @@ class ParseTest(unittest.TestCase): def test_bare_function_delim_kept_intact(self): self.assertEqual(list(_normparse(u'a % b')), [u'a % b']) + def test_bare_opener_kept_intact(self): + self.assertEqual(list(_normparse(u'a { b')), [u'a { b']) + + def test_bare_closer_kept_intact(self): + self.assertEqual(list(_normparse(u'a } b')), [u'a } b']) + + def test_bare_sep_kept_intact(self): + self.assertEqual(list(_normparse(u'a , b')), [u'a , b']) + def test_symbol_alone(self): parts = list(_normparse(u'$foo')) self.assertEqual(len(parts), 1) @@ -110,10 +136,52 @@ class ParseTest(unittest.TestCase): def test_empty_braces_symbol(self): self.assertEqual(list(_normparse(u'a ${} b')), [u'a ${} b']) + + def test_call_without_args_at_end(self): + self.assertEqual(list(_normparse(u'foo %bar')), [u'foo %bar']) + + def test_call_without_args(self): + self.assertEqual(list(_normparse(u'foo %bar baz')), [u'foo %bar baz']) + + def test_call_with_unclosed_args(self): + self.assertEqual(list(_normparse(u'foo %bar{ baz')), [u'foo %bar{ baz']) + + def test_call_with_unclosed_multiple_args(self): + self.assertEqual(list(_normparse(u'foo %bar{bar,bar baz')), + [u'foo %bar{bar,bar baz']) + + def test_call_no_args(self): + parts = list(_normparse(u'%foo{}')) + self.assertEqual(len(parts), 1) + self._assert_call(parts[0], u"foo", 0) + + def test_call_single_arg(self): + parts = list(_normparse(u'%foo{bar}')) + self.assertEqual(len(parts), 1) + self._assert_call(parts[0], u"foo", 1) + self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar']) + + def test_call_two_args(self): + parts = list(_normparse(u'%foo{bar,baz}')) + self.assertEqual(len(parts), 1) + self._assert_call(parts[0], u"foo", 2) + self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar']) + self.assertEqual(list(_normexpr(parts[0].args[1])), [u'baz']) + + def test_call_with_escaped_sep(self): + parts = list(_normparse(u'%foo{bar,,baz}')) + self.assertEqual(len(parts), 1) + self._assert_call(parts[0], u"foo", 1) + self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar,baz']) + + def test_call_with_escaped_close(self): + parts = list(_normparse(u'%foo{bar}}baz}')) + self.assertEqual(len(parts), 1) + self._assert_call(parts[0], u"foo", 1) + self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar}baz']) def suite(): return unittest.TestLoader().loadTestsFromName(__name__) if __name__ == '__main__': unittest.main(defaultTest='suite') -