From b6e75dacb1d35e345e8a43fb1660bbd18c07f08a Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Wed, 14 Dec 2011 18:40:54 -0800
Subject: [PATCH] function parsing in template string parser (#231)

---
 beets/util/functemplate.py | 135 ++++++++++++++++++++++++++++++-------
 test/test_template.py      |  82 ++++++++++++++++++++--
 2 files changed, 184 insertions(+), 33 deletions(-)

diff --git a/beets/util/functemplate.py b/beets/util/functemplate.py
index aa01ad655..273c2e799 100644
--- a/beets/util/functemplate.py
+++ b/beets/util/functemplate.py
@@ -69,8 +69,8 @@ class Call(object):
         self.original = original
 
     def __repr__(self):
-        return u'Call(%s, %s, %s)' % (repr(self.ident, self.args,
-                                           self.original))
+        return u'Call(%s, %s, %s)' % (repr(self.ident), repr(self.args),
+                                      repr(self.original))
 
     def evaluate(self, env):
         """Evaluate the function call in the environment, returning a
@@ -81,6 +81,28 @@ class Call(object):
         else:
             return self.original
 
+class Expression(object):
+    """Top-level template construct: contains a list of text blobs,
+    Symbols, and Calls.
+    """
+    def __init__(self, parts):
+        self.parts = parts
+
+    def __repr__(self):
+        return u'Expression(%s)' % (repr(self.parts))
+
+    def evaluate(self, env):
+        """Evaluate the entire expression in the environment, returning
+        a Unicode string.
+        """
+        out = []
+        for part in self.parts:
+            if isinstance(part, basestring):
+                out.append(part)
+            else:
+                out.append(part.evaluate(env))
+        return u''.join(out)
+
 class ParseError(Exception):
     pass
 
@@ -98,7 +120,7 @@ class Parser(object):
         self.pos = 0
         self.parts = []
 
-    def parse_template(self):
+    def parse_expression(self):
         """Parse a template expression starting at ``pos``. Resulting
         components (Unicode strings, Symbols, and Calls) are added to
         the ``parts`` field, a list.  The ``pos`` field is updated to be
@@ -120,11 +142,13 @@ class Parser(object):
                 continue
 
             if self.pos == len(self.string) - 1:
-                # The last character can never begin a structure, so we just
-                # interpret it as a literal character.
-                text_parts.append(char)
-                self.pos += 1
-                continue
+                # The last character can never begin a structure, so we
+                # just interpret it as a literal character (unless it
+                # terminates the expression, as with , and }).
+                if char not in (GROUP_CLOSE, ARG_SEP):
+                    text_parts.append(char)
+                    self.pos += 1
+                break
 
             next_char = self.string[self.pos + 1]
             if char == next_char:
@@ -203,6 +227,77 @@ class Parser(object):
                 # A standalone $.
                 self.parts.append(SYMBOL_DELIM)
 
+    def parse_call(self):
+        """Parse a function call (like ``%foo{bar,baz}``) starting at
+        ``pos``.  Possibly appends a Call object to ``parts`` and update
+        ``pos``. The character at ``pos`` must be ``%``.
+        """
+        assert self.pos < len(self.string)
+        assert self.string[self.pos] == FUNC_DELIM
+
+        start_pos = self.pos
+        self.pos += 1
+
+        ident = self._parse_ident()
+        if not ident:
+            # No function name.
+            self.parts.append(FUNC_DELIM)
+            return
+        
+        if self.pos >= len(self.string):
+            # Identifier terminates string.
+            self.parts.append(self.string[start_pos:self.pos])
+            return
+
+        if self.string[self.pos] != GROUP_OPEN:
+            # Argument list not opened.
+            self.parts.append(self.string[start_pos:self.pos])
+            return
+
+        # Skip past opening brace and try to parse an argument list.
+        self.pos += 1
+        args = self.parse_argument_list()
+        if self.pos >= len(self.string) or \
+           self.string[self.pos] != GROUP_CLOSE:
+            # Arguments unclosed.
+            self.parts.append(self.string[start_pos:self.pos])
+            return
+
+        self.pos += 1 # Move past closing brace.
+        self.parts.append(Call(ident, args, self.string[start_pos:self.pos]))
+
+    def parse_argument_list(self):
+        """Parse a list of arguments starting at ``pos``, returning a
+        list of Expression objects. Does not modify ``parts``. Should
+        leave ``pos`` pointing to a } character or the end of the
+        string.
+        """
+        # Try to parse a subexpression in a subparser.
+        expressions = []
+
+        while self.pos < len(self.string) and \
+              self.string[self.pos] != GROUP_CLOSE:
+            subparser = Parser(self.string[self.pos:])
+            subparser.parse_expression()
+            if subparser.pos == 0:
+                # No expression could be parsed.
+                break
+
+            # Extract and advance past the parsed expression.
+            expressions.append(Expression(subparser.parts))
+            self.pos += subparser.pos 
+
+            if self.pos >= len(self.string) or \
+               self.string[self.pos] == GROUP_CLOSE:
+                # Argument list terminated by EOF or closing brace.
+                break
+
+            # Only other way to terminate an expression is with ,.
+            assert self.string[self.pos] == ARG_SEP
+            self.pos += 1
+
+        return expressions
+
     def _parse_ident(self):
         """Parse an identifier and return it (possibly an empty string).
         Updates ``pos``.
@@ -213,38 +308,26 @@ class Parser(object):
         return ident
 
 def _parse(template):
-    """Parse a top-level template string expression, returning a list of
-    nodes. Any extraneous text is considered literal text.
+    """Parse a top-level template string Expression. Any extraneous text
+    is considered literal text.
     """
     parser = Parser(template)
-    parser.parse_template()
+    parser.parse_expression()
 
     parts = parser.parts
     remainder = parser.string[parser.pos:]
     if remainder:
         parts.append(remainder)
-    return parts
+    return Expression(parts)
 
 class Template(object):
     """A string template, including text, Symbols, and Calls.
     """
     def __init__(self, template):
-        self.parts = _parse(template)
+        self.expr = _parse(template)
         self.original = template
 
-    def evaluate(self, env):
-        """Evaluate the entire template in the environment, returning a
-        Unicode string.
-        """
-        out = []
-        for part in self.parts:
-            if isinstance(part, basestring):
-                out.append(part)
-            else:
-                out.append(part.evaluate(env))
-        return u''.join(out)
-
     def substitute(self, values={}, functions={}):
         """Evaluate the template given the values and functions.
         """
-        return self.evaluate(Environment(values, functions))
+        return self.expr.evaluate(Environment(values, functions))
diff --git a/test/test_template.py b/test/test_template.py
index 3841d7876..29ee9795d 100644
--- a/test/test_template.py
+++ b/test/test_template.py
@@ -19,13 +19,13 @@ import unittest
 import _common
 from beets.util import functemplate
 
-def _normparse(text):
-    """Parse a template and then normalize the result, collapsing
-    multiple adjacent text blocks and removing empty text blocks.
-    Generates a sequence of parts.
+def _normexpr(expr):
+    """Normalize an Expression object's parts, collapsing multiple
+    adjacent text blocks and removing empty text blocks. Generates a
+    sequence of parts.
     """
     textbuf = []
-    for part in functemplate._parse(text):
+    for part in expr.parts:
         if isinstance(part, basestring):
             textbuf.append(part)
         else:
@@ -40,6 +40,10 @@ def _normparse(text):
         if text:
             yield text
 
+def _normparse(text):
+    """Parse a template and then normalize the resulting Expression."""
+    return _normexpr(functemplate._parse(text))
+
 class ParseTest(unittest.TestCase):
     def test_empty_string(self):
         self.assertEqual(list(_normparse(u'')), [])
@@ -53,6 +57,19 @@ class ParseTest(unittest.TestCase):
                          u"wrong identifier: %s vs. %s" %
                          (repr(obj.ident), repr(ident)))
 
+    def _assert_call(self, obj, ident, numargs):
+        """Assert that an object is a Call with the given identifier and
+        argument count.
+        """
+        self.assertTrue(isinstance(obj, functemplate.Call),
+                        u"not a Call: %s" % repr(obj))
+        self.assertEqual(obj.ident, ident,
+                         u"wrong identifier: %s vs. %s" %
+                         (repr(obj.ident), repr(ident)))
+        self.assertEqual(len(obj.args), numargs,
+                         u"wrong argument count in %s: %i vs. %i" %
+                         (repr(obj.ident), len(obj.args), numargs))
+
     def test_plain_text(self):
         self.assertEqual(list(_normparse(u'hello world')), [u'hello world'])
 
@@ -78,7 +95,7 @@ class ParseTest(unittest.TestCase):
         self.assertEqual(list(_normparse(u'a {{ b')), [u'a { b'])
 
     def test_escaped_close_brace(self):
-        self.assertEqual(list(_normparse(u'a } b')), [u'a } b'])
+        self.assertEqual(list(_normparse(u'a }} b')), [u'a } b'])
 
     def test_bare_value_delim_kept_intact(self):
         self.assertEqual(list(_normparse(u'a $ b')), [u'a $ b'])
@@ -86,6 +103,15 @@ class ParseTest(unittest.TestCase):
     def test_bare_function_delim_kept_intact(self):
         self.assertEqual(list(_normparse(u'a % b')), [u'a % b'])
 
+    def test_bare_opener_kept_intact(self):
+        self.assertEqual(list(_normparse(u'a { b')), [u'a { b'])
+
+    def test_bare_closer_kept_intact(self):
+        self.assertEqual(list(_normparse(u'a } b')), [u'a } b'])
+
+    def test_bare_sep_kept_intact(self):
+        self.assertEqual(list(_normparse(u'a , b')), [u'a , b'])
+
     def test_symbol_alone(self):
         parts = list(_normparse(u'$foo'))
         self.assertEqual(len(parts), 1)
@@ -110,10 +136,52 @@ class ParseTest(unittest.TestCase):
     
     def test_empty_braces_symbol(self):
         self.assertEqual(list(_normparse(u'a ${} b')), [u'a ${} b'])
+
+    def test_call_without_args_at_end(self):
+        self.assertEqual(list(_normparse(u'foo %bar')), [u'foo %bar'])
+    
+    def test_call_without_args(self):
+        self.assertEqual(list(_normparse(u'foo %bar baz')), [u'foo %bar baz'])
+
+    def test_call_with_unclosed_args(self):
+        self.assertEqual(list(_normparse(u'foo %bar{ baz')), [u'foo %bar{ baz'])
+    
+    def test_call_with_unclosed_multiple_args(self):
+        self.assertEqual(list(_normparse(u'foo %bar{bar,bar baz')),
+                         [u'foo %bar{bar,bar baz'])
+
+    def test_call_no_args(self):
+        parts = list(_normparse(u'%foo{}'))
+        self.assertEqual(len(parts), 1)
+        self._assert_call(parts[0], u"foo", 0)
+    
+    def test_call_single_arg(self):
+        parts = list(_normparse(u'%foo{bar}'))
+        self.assertEqual(len(parts), 1)
+        self._assert_call(parts[0], u"foo", 1)
+        self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar'])
+    
+    def test_call_two_args(self):
+        parts = list(_normparse(u'%foo{bar,baz}'))
+        self.assertEqual(len(parts), 1)
+        self._assert_call(parts[0], u"foo", 2)
+        self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar'])
+        self.assertEqual(list(_normexpr(parts[0].args[1])), [u'baz'])
+    
+    def test_call_with_escaped_sep(self):
+        parts = list(_normparse(u'%foo{bar,,baz}'))
+        self.assertEqual(len(parts), 1)
+        self._assert_call(parts[0], u"foo", 1)
+        self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar,baz'])
+    
+    def test_call_with_escaped_close(self):
+        parts = list(_normparse(u'%foo{bar}}baz}'))
+        self.assertEqual(len(parts), 1)
+        self._assert_call(parts[0], u"foo", 1)
+        self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar}baz'])
     
 def suite():
     return unittest.TestLoader().loadTestsFromName(__name__)
 
 if __name__ == '__main__':
     unittest.main(defaultTest='suite')
-