function parsing in template string parser (#231)

This commit is contained in:
Adrian Sampson 2011-12-14 18:40:54 -08:00
parent b5a76e9d1e
commit b6e75dacb1
2 changed files with 184 additions and 33 deletions

View file

@ -69,8 +69,8 @@ class Call(object):
self.original = original self.original = original
def __repr__(self): def __repr__(self):
return u'Call(%s, %s, %s)' % (repr(self.ident, self.args, return u'Call(%s, %s, %s)' % (repr(self.ident), repr(self.args),
self.original)) repr(self.original))
def evaluate(self, env): def evaluate(self, env):
"""Evaluate the function call in the environment, returning a """Evaluate the function call in the environment, returning a
@ -81,6 +81,28 @@ class Call(object):
else: else:
return self.original return self.original
class Expression(object):
"""Top-level template construct: contains a list of text blobs,
Symbols, and Calls.
"""
def __init__(self, parts):
self.parts = parts
def __repr__(self):
return u'Expression(%s)' % (repr(self.parts))
def evaluate(self, env):
"""Evaluate the entire expression in the environment, returning
a Unicode string.
"""
out = []
for part in self.parts:
if isinstance(part, basestring):
out.append(part)
else:
out.append(part.evaluate(env))
return u''.join(out)
class ParseError(Exception): class ParseError(Exception):
pass pass
@ -98,7 +120,7 @@ class Parser(object):
self.pos = 0 self.pos = 0
self.parts = [] self.parts = []
def parse_template(self): def parse_expression(self):
"""Parse a template expression starting at ``pos``. Resulting """Parse a template expression starting at ``pos``. Resulting
components (Unicode strings, Symbols, and Calls) are added to components (Unicode strings, Symbols, and Calls) are added to
the ``parts`` field, a list. The ``pos`` field is updated to be the ``parts`` field, a list. The ``pos`` field is updated to be
@ -120,11 +142,13 @@ class Parser(object):
continue continue
if self.pos == len(self.string) - 1: if self.pos == len(self.string) - 1:
# The last character can never begin a structure, so we just # The last character can never begin a structure, so we
# interpret it as a literal character. # just interpret it as a literal character (unless it
# terminates the expression, as with , and }).
if char not in (GROUP_CLOSE, ARG_SEP):
text_parts.append(char) text_parts.append(char)
self.pos += 1 self.pos += 1
continue break
next_char = self.string[self.pos + 1] next_char = self.string[self.pos + 1]
if char == next_char: if char == next_char:
@ -203,6 +227,77 @@ class Parser(object):
# A standalone $. # A standalone $.
self.parts.append(SYMBOL_DELIM) self.parts.append(SYMBOL_DELIM)
def parse_call(self):
"""Parse a function call (like ``%foo{bar,baz}``) starting at
``pos``. Possibly appends a Call object to ``parts`` and update
``pos``. The character at ``pos`` must be ``%``.
"""
assert self.pos < len(self.string)
assert self.string[self.pos] == FUNC_DELIM
start_pos = self.pos
self.pos += 1
ident = self._parse_ident()
if not ident:
# No function name.
self.parts.append(FUNC_DELIM)
return
if self.pos >= len(self.string):
# Identifier terminates string.
self.parts.append(self.string[start_pos:self.pos])
return
if self.string[self.pos] != GROUP_OPEN:
# Argument list not opened.
self.parts.append(self.string[start_pos:self.pos])
return
# Skip past opening brace and try to parse an argument list.
self.pos += 1
args = self.parse_argument_list()
if self.pos >= len(self.string) or \
self.string[self.pos] != GROUP_CLOSE:
# Arguments unclosed.
self.parts.append(self.string[start_pos:self.pos])
return
self.pos += 1 # Move past closing brace.
self.parts.append(Call(ident, args, self.string[start_pos:self.pos]))
def parse_argument_list(self):
"""Parse a list of arguments starting at ``pos``, returning a
list of Expression objects. Does not modify ``parts``. Should
leave ``pos`` pointing to a } character or the end of the
string.
"""
# Try to parse a subexpression in a subparser.
expressions = []
while self.pos < len(self.string) and \
self.string[self.pos] != GROUP_CLOSE:
subparser = Parser(self.string[self.pos:])
subparser.parse_expression()
if subparser.pos == 0:
# No expression could be parsed.
break
# Extract and advance past the parsed expression.
expressions.append(Expression(subparser.parts))
self.pos += subparser.pos
if self.pos >= len(self.string) or \
self.string[self.pos] == GROUP_CLOSE:
# Argument list terminated by EOF or closing brace.
break
# Only other way to terminate an expression is with ,.
assert self.string[self.pos] == ARG_SEP
self.pos += 1
return expressions
def _parse_ident(self): def _parse_ident(self):
"""Parse an identifier and return it (possibly an empty string). """Parse an identifier and return it (possibly an empty string).
Updates ``pos``. Updates ``pos``.
@ -213,38 +308,26 @@ class Parser(object):
return ident return ident
def _parse(template): def _parse(template):
"""Parse a top-level template string expression, returning a list of """Parse a top-level template string Expression. Any extraneous text
nodes. Any extraneous text is considered literal text. is considered literal text.
""" """
parser = Parser(template) parser = Parser(template)
parser.parse_template() parser.parse_expression()
parts = parser.parts parts = parser.parts
remainder = parser.string[parser.pos:] remainder = parser.string[parser.pos:]
if remainder: if remainder:
parts.append(remainder) parts.append(remainder)
return parts return Expression(parts)
class Template(object): class Template(object):
"""A string template, including text, Symbols, and Calls. """A string template, including text, Symbols, and Calls.
""" """
def __init__(self, template): def __init__(self, template):
self.parts = _parse(template) self.expr = _parse(template)
self.original = template self.original = template
def evaluate(self, env):
"""Evaluate the entire template in the environment, returning a
Unicode string.
"""
out = []
for part in self.parts:
if isinstance(part, basestring):
out.append(part)
else:
out.append(part.evaluate(env))
return u''.join(out)
def substitute(self, values={}, functions={}): def substitute(self, values={}, functions={}):
"""Evaluate the template given the values and functions. """Evaluate the template given the values and functions.
""" """
return self.evaluate(Environment(values, functions)) return self.expr.evaluate(Environment(values, functions))

View file

@ -19,13 +19,13 @@ import unittest
import _common import _common
from beets.util import functemplate from beets.util import functemplate
def _normparse(text): def _normexpr(expr):
"""Parse a template and then normalize the result, collapsing """Normalize an Expression object's parts, collapsing multiple
multiple adjacent text blocks and removing empty text blocks. adjacent text blocks and removing empty text blocks. Generates a
Generates a sequence of parts. sequence of parts.
""" """
textbuf = [] textbuf = []
for part in functemplate._parse(text): for part in expr.parts:
if isinstance(part, basestring): if isinstance(part, basestring):
textbuf.append(part) textbuf.append(part)
else: else:
@ -40,6 +40,10 @@ def _normparse(text):
if text: if text:
yield text yield text
def _normparse(text):
"""Parse a template and then normalize the resulting Expression."""
return _normexpr(functemplate._parse(text))
class ParseTest(unittest.TestCase): class ParseTest(unittest.TestCase):
def test_empty_string(self): def test_empty_string(self):
self.assertEqual(list(_normparse(u'')), []) self.assertEqual(list(_normparse(u'')), [])
@ -53,6 +57,19 @@ class ParseTest(unittest.TestCase):
u"wrong identifier: %s vs. %s" % u"wrong identifier: %s vs. %s" %
(repr(obj.ident), repr(ident))) (repr(obj.ident), repr(ident)))
def _assert_call(self, obj, ident, numargs):
"""Assert that an object is a Call with the given identifier and
argument count.
"""
self.assertTrue(isinstance(obj, functemplate.Call),
u"not a Call: %s" % repr(obj))
self.assertEqual(obj.ident, ident,
u"wrong identifier: %s vs. %s" %
(repr(obj.ident), repr(ident)))
self.assertEqual(len(obj.args), numargs,
u"wrong argument count in %s: %i vs. %i" %
(repr(obj.ident), len(obj.args), numargs))
def test_plain_text(self): def test_plain_text(self):
self.assertEqual(list(_normparse(u'hello world')), [u'hello world']) self.assertEqual(list(_normparse(u'hello world')), [u'hello world'])
@ -78,7 +95,7 @@ class ParseTest(unittest.TestCase):
self.assertEqual(list(_normparse(u'a {{ b')), [u'a { b']) self.assertEqual(list(_normparse(u'a {{ b')), [u'a { b'])
def test_escaped_close_brace(self): def test_escaped_close_brace(self):
self.assertEqual(list(_normparse(u'a } b')), [u'a } b']) self.assertEqual(list(_normparse(u'a }} b')), [u'a } b'])
def test_bare_value_delim_kept_intact(self): def test_bare_value_delim_kept_intact(self):
self.assertEqual(list(_normparse(u'a $ b')), [u'a $ b']) self.assertEqual(list(_normparse(u'a $ b')), [u'a $ b'])
@ -86,6 +103,15 @@ class ParseTest(unittest.TestCase):
def test_bare_function_delim_kept_intact(self): def test_bare_function_delim_kept_intact(self):
self.assertEqual(list(_normparse(u'a % b')), [u'a % b']) self.assertEqual(list(_normparse(u'a % b')), [u'a % b'])
def test_bare_opener_kept_intact(self):
self.assertEqual(list(_normparse(u'a { b')), [u'a { b'])
def test_bare_closer_kept_intact(self):
self.assertEqual(list(_normparse(u'a } b')), [u'a } b'])
def test_bare_sep_kept_intact(self):
self.assertEqual(list(_normparse(u'a , b')), [u'a , b'])
def test_symbol_alone(self): def test_symbol_alone(self):
parts = list(_normparse(u'$foo')) parts = list(_normparse(u'$foo'))
self.assertEqual(len(parts), 1) self.assertEqual(len(parts), 1)
@ -111,9 +137,51 @@ class ParseTest(unittest.TestCase):
def test_empty_braces_symbol(self): def test_empty_braces_symbol(self):
self.assertEqual(list(_normparse(u'a ${} b')), [u'a ${} b']) self.assertEqual(list(_normparse(u'a ${} b')), [u'a ${} b'])
def test_call_without_args_at_end(self):
self.assertEqual(list(_normparse(u'foo %bar')), [u'foo %bar'])
def test_call_without_args(self):
self.assertEqual(list(_normparse(u'foo %bar baz')), [u'foo %bar baz'])
def test_call_with_unclosed_args(self):
self.assertEqual(list(_normparse(u'foo %bar{ baz')), [u'foo %bar{ baz'])
def test_call_with_unclosed_multiple_args(self):
self.assertEqual(list(_normparse(u'foo %bar{bar,bar baz')),
[u'foo %bar{bar,bar baz'])
def test_call_no_args(self):
parts = list(_normparse(u'%foo{}'))
self.assertEqual(len(parts), 1)
self._assert_call(parts[0], u"foo", 0)
def test_call_single_arg(self):
parts = list(_normparse(u'%foo{bar}'))
self.assertEqual(len(parts), 1)
self._assert_call(parts[0], u"foo", 1)
self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar'])
def test_call_two_args(self):
parts = list(_normparse(u'%foo{bar,baz}'))
self.assertEqual(len(parts), 1)
self._assert_call(parts[0], u"foo", 2)
self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar'])
self.assertEqual(list(_normexpr(parts[0].args[1])), [u'baz'])
def test_call_with_escaped_sep(self):
parts = list(_normparse(u'%foo{bar,,baz}'))
self.assertEqual(len(parts), 1)
self._assert_call(parts[0], u"foo", 1)
self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar,baz'])
def test_call_with_escaped_close(self):
parts = list(_normparse(u'%foo{bar}}baz}'))
self.assertEqual(len(parts), 1)
self._assert_call(parts[0], u"foo", 1)
self.assertEqual(list(_normexpr(parts[0].args[0])), [u'bar}baz'])
def suite(): def suite():
return unittest.TestLoader().loadTestsFromName(__name__) return unittest.TestLoader().loadTestsFromName(__name__)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main(defaultTest='suite') unittest.main(defaultTest='suite')