From 518c6b8801dc349a4b8fff6dd52c8a4a52a716f9 Mon Sep 17 00:00:00 2001 From: Diego Moreda Date: Sun, 2 Oct 2016 19:17:56 +0200 Subject: [PATCH 1/7] Add tests for comma outside functions in templates Add unit tests for the use of the separator special character (comma) outside a function argument. --- test/test_template.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/test_template.py b/test/test_template.py index 331de0458..67d5d97fe 100644 --- a/test/test_template.py +++ b/test/test_template.py @@ -211,6 +211,22 @@ class ParseTest(unittest.TestCase): self._assert_call(arg_parts[0], u"bar", 1) self.assertEqual(list(_normexpr(arg_parts[0].args[0])), [u'baz']) + def test_sep_before_call_two_args(self): + parts = list(_normparse(u'hello, %foo{bar,baz}')) + self.assertEqual(len(parts), 2) + self.assertEqual(parts[0], u'hello, ') + self._assert_call(parts[1], u"foo", 2) + self.assertEqual(list(_normexpr(parts[1].args[0])), [u'bar']) + self.assertEqual(list(_normexpr(parts[1].args[1])), [u'baz']) + + def test_sep_with_symbols(self): + parts = list(_normparse(u'hello,$foo,$bar')) + self.assertEqual(len(parts), 4) + self.assertEqual(parts[0], u'hello,') + self._assert_symbol(parts[1], u"foo") + self.assertEqual(parts[2], u',') + self._assert_symbol(parts[3], u"bar") + class EvalTest(unittest.TestCase): def _eval(self, template): From 657a711f17b34c0ea2e980445912003ba1d61721 Mon Sep 17 00:00:00 2001 From: Diego Moreda Date: Sun, 2 Oct 2016 19:36:32 +0200 Subject: [PATCH 2/7] Remove comma from special chars, class variables Remove ARG_SEP from Parser.special_chars, and promote some groups of characters used in parse_expression to class variables. ARG_SEP is still considered an "escapable" character, pending a decision on whether both escaped ('$,') and unescaped (',') syntax would be allowed. --- beets/util/functemplate.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/beets/util/functemplate.py b/beets/util/functemplate.py index 7a309e793..05d412508 100644 --- a/beets/util/functemplate.py +++ b/beets/util/functemplate.py @@ -318,9 +318,11 @@ class Parser(object): # Common parsing resources. special_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_OPEN, GROUP_CLOSE, - ARG_SEP, ESCAPE_CHAR) + ESCAPE_CHAR) special_char_re = re.compile(r'[%s]|$' % u''.join(re.escape(c) for c in special_chars)) + escapable_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP) + terminator_chars = (GROUP_CLOSE) def parse_expression(self): """Parse a template expression starting at ``pos``. Resulting @@ -348,14 +350,13 @@ class Parser(object): # The last character can never begin a structure, so we # just interpret it as a literal character (unless it # terminates the expression, as with , and }). - if char not in (GROUP_CLOSE, ARG_SEP): + if char not in self.terminator_chars: text_parts.append(char) self.pos += 1 break next_char = self.string[self.pos + 1] - if char == ESCAPE_CHAR and next_char in \ - (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP): + if char == ESCAPE_CHAR and next_char in self.escapable_chars: # An escaped special character ($$, $}, etc.). Note that # ${ is not an escape sequence: this is ambiguous with # the start of a symbol and it's not necessary (just @@ -375,7 +376,7 @@ class Parser(object): elif char == FUNC_DELIM: # Parse a function call. self.parse_call() - elif char in (GROUP_CLOSE, ARG_SEP): + elif char in self.terminator_chars: # Template terminated. break elif char == GROUP_OPEN: From 3e8200703674c2a518d4186a4c77287333fc0a14 Mon Sep 17 00:00:00 2001 From: Diego Moreda Date: Sun, 2 Oct 2016 19:44:13 +0200 Subject: [PATCH 3/7] Add ArgumentParser, taking into account commas Add ArgumentParser as a subclass of Parser that considers ARG_SEP a special character (ie. always needs escaping, terminates a block); and use it for parsing the substring that contains the list of arguments at parse_argument_list(). --- beets/util/functemplate.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/beets/util/functemplate.py b/beets/util/functemplate.py index 05d412508..f6b5b3c25 100644 --- a/beets/util/functemplate.py +++ b/beets/util/functemplate.py @@ -484,7 +484,7 @@ class Parser(object): expressions = [] while self.pos < len(self.string): - subparser = Parser(self.string[self.pos:]) + subparser = ArgumentsParser(self.string[self.pos:]) subparser.parse_expression() # Extract and advance past the parsed expression. @@ -513,6 +513,18 @@ class Parser(object): return ident +class ArgumentsParser(Parser): + """``Parser`` that considers ``ARG_SEP`` to be a special character. + """ + # Common parsing resources. + special_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_OPEN, GROUP_CLOSE, + ARG_SEP, ESCAPE_CHAR) + special_char_re = re.compile(r'[%s]|$' % + u''.join(re.escape(c) for c in special_chars)) + escapable_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP) + terminator_chars = (GROUP_CLOSE, ARG_SEP) + + def _parse(template): """Parse a top-level template string Expression. Any extraneous text is considered literal text. From c5da6296c40146a4db9d158496353165127437f9 Mon Sep 17 00:00:00 2001 From: Diego Moreda Date: Mon, 3 Oct 2016 23:01:50 +0200 Subject: [PATCH 4/7] Use flag instead of subclass for comma in Parser Use a `in_argument` flag on Parser constructor for specifying if the parser should treat commas as a special character, including the logic in parse_expression. --- beets/util/functemplate.py | 45 +++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/beets/util/functemplate.py b/beets/util/functemplate.py index f6b5b3c25..51716552c 100644 --- a/beets/util/functemplate.py +++ b/beets/util/functemplate.py @@ -311,8 +311,14 @@ class Parser(object): replaced with a real, accepted parsing technique (PEG, parser generator, etc.). """ - def __init__(self, string): + def __init__(self, string, in_argument=False): + """ Create a new parser. + :param in_arguments: boolean that indicates the parser is to be + used for parsing function arguments, ie. considering commas + (`ARG_SEP`) a special character + """ self.string = string + self.in_argument = in_argument self.pos = 0 self.parts = [] @@ -322,7 +328,7 @@ class Parser(object): special_char_re = re.compile(r'[%s]|$' % u''.join(re.escape(c) for c in special_chars)) escapable_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP) - terminator_chars = (GROUP_CLOSE) + terminator_chars = (GROUP_CLOSE,) def parse_expression(self): """Parse a template expression starting at ``pos``. Resulting @@ -330,16 +336,26 @@ class Parser(object): the ``parts`` field, a list. The ``pos`` field is updated to be the next character after the expression. """ + # Append comma (ARG_SEP) to the list of special characters only when + # parsing function arguments. + extra_special_chars = () + special_char_re = self.special_char_re + if self.in_argument: + extra_special_chars = (ARG_SEP,) + special_char_re = re.compile( + r'[%s]|$' % u''.join(re.escape(c) for c in + self.special_chars + extra_special_chars)) + text_parts = [] while self.pos < len(self.string): char = self.string[self.pos] - if char not in self.special_chars: + if char not in self.special_chars + extra_special_chars: # A non-special character. Skip to the next special # character, treating the interstice as literal text. next_pos = ( - self.special_char_re.search( + special_char_re.search( self.string[self.pos:]).start() + self.pos ) text_parts.append(self.string[self.pos:next_pos]) @@ -350,13 +366,14 @@ class Parser(object): # The last character can never begin a structure, so we # just interpret it as a literal character (unless it # terminates the expression, as with , and }). - if char not in self.terminator_chars: + if char not in self.terminator_chars + extra_special_chars: text_parts.append(char) self.pos += 1 break next_char = self.string[self.pos + 1] - if char == ESCAPE_CHAR and next_char in self.escapable_chars: + if char == ESCAPE_CHAR and next_char in (self.escapable_chars + + extra_special_chars): # An escaped special character ($$, $}, etc.). Note that # ${ is not an escape sequence: this is ambiguous with # the start of a symbol and it's not necessary (just @@ -376,7 +393,7 @@ class Parser(object): elif char == FUNC_DELIM: # Parse a function call. self.parse_call() - elif char in self.terminator_chars: + elif char in self.terminator_chars + extra_special_chars: # Template terminated. break elif char == GROUP_OPEN: @@ -484,7 +501,7 @@ class Parser(object): expressions = [] while self.pos < len(self.string): - subparser = ArgumentsParser(self.string[self.pos:]) + subparser = Parser(self.string[self.pos:], in_argument=True) subparser.parse_expression() # Extract and advance past the parsed expression. @@ -513,18 +530,6 @@ class Parser(object): return ident -class ArgumentsParser(Parser): - """``Parser`` that considers ``ARG_SEP`` to be a special character. - """ - # Common parsing resources. - special_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_OPEN, GROUP_CLOSE, - ARG_SEP, ESCAPE_CHAR) - special_char_re = re.compile(r'[%s]|$' % - u''.join(re.escape(c) for c in special_chars)) - escapable_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP) - terminator_chars = (GROUP_CLOSE, ARG_SEP) - - def _parse(template): """Parse a top-level template string Expression. Any extraneous text is considered literal text. From 550206adb55bf312fc1863ac003b222caa5a4745 Mon Sep 17 00:00:00 2001 From: Diego Moreda Date: Mon, 3 Oct 2016 23:31:47 +0200 Subject: [PATCH 5/7] Add changelog for unescaped commas in Parser --- docs/changelog.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 699a3c500..416568799 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -45,6 +45,8 @@ The are a couple of small new features: when a song can be found on AcousticBrainz, this is faster and more automatic than using the :doc:`/plugins/bpm`. * ``beet --version`` now includes the python version used to run beets. +* :doc:`/reference/pathformat` can now include unescaped commas (``,``) when + they are not part of a function call. :bug:`2166` :bug:`2213` And there are a few bug fixes too: From 0eb03534533143f5c6c6436a241317b891c7c209 Mon Sep 17 00:00:00 2001 From: Diego Moreda Date: Tue, 4 Oct 2016 16:17:26 +0200 Subject: [PATCH 6/7] Revise documentation for commas in Parser --- docs/reference/pathformat.rst | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/docs/reference/pathformat.rst b/docs/reference/pathformat.rst index b5d754bd4..240534a9a 100644 --- a/docs/reference/pathformat.rst +++ b/docs/reference/pathformat.rst @@ -142,11 +142,17 @@ Syntax Details The characters ``$``, ``%``, ``{``, ``}``, and ``,`` are "special" in the path template syntax. This means that, for example, if you want a ``%`` character to appear in your paths, you'll need to be careful that you don't accidentally -write a function call. To escape any of these characters (except ``{``), prefix -it with a ``$``. For example, ``$$`` becomes ``$``; ``$%`` becomes ``%``, etc. -The only exception is ``${``, which is ambiguous with the variable reference -syntax (like ``${title}``). To insert a ``{`` alone, it's always sufficient to -just type ``{``. +write a function call. To escape any of these characters (except ``{``, and +``,`` outside a function argument), prefix it with a ``$``. For example, +``$$`` becomes ``$``; ``$%`` becomes ``%``, etc. The only exceptions are: + +* ``${``, which is ambiguous with the variable reference syntax (like + ``${title}``). To insert a ``{`` alone, it's always sufficient to just type + ``{``. +* commas are used as argument separators in function calls. Inside of a + function's argument, use ``$,`` to get a literal ``,`` character. Outside of + any function argument, escaping is not necessary: ``,`` by itself will + produce ``,`` in the output. If a value or function is undefined, the syntax is simply left unreplaced. For example, if you write ``$foo`` in a path template, this will yield ``$foo`` in From f0a14bfdd530e1ddc8cc67316f458de6996c8a55 Mon Sep 17 00:00:00 2001 From: Diego Moreda Date: Tue, 4 Oct 2016 16:20:52 +0200 Subject: [PATCH 7/7] Fix documentation issues for %first Fix a formatting problem related to sphinx not allowing spaces at the beginning or end of an inline literal, and removed an extra sentence at th end of the %first template function documentation. --- docs/reference/pathformat.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/pathformat.rst b/docs/reference/pathformat.rst index 240534a9a..72453a6e5 100644 --- a/docs/reference/pathformat.rst +++ b/docs/reference/pathformat.rst @@ -76,12 +76,12 @@ These functions are built in to beets: * ``%time{date_time,format}``: Return the date and time in any format accepted by `strftime`_. For example, to get the year some music was added to your library, use ``%time{$added,%Y}``. -* ``%first{text}``: Returns the first item, separated by ``; ``. +* ``%first{text}``: Returns the first item, separated by ``;`` (a semicolon + followed by a space). You can use ``%first{text,count,skip}``, where ``count`` is the number of items (default 1) and ``skip`` is number to skip (default 0). You can also use ``%first{text,count,skip,sep,join}`` where ``sep`` is the separator, like ``;`` or ``/`` and join is the text to concatenate the items. - For example, * ``%ifdef{field}``, ``%ifdef{field,truetext}`` or ``%ifdef{field,truetext,falsetext}``: If ``field`` exists, then return ``truetext`` or ``field`` (default). Otherwise, returns ``falsetext``.