Perform a regex substitution in the substitute plugin (#5357)

This utilises regex substitution in the substitute plugin. The previous
approach only used regex to match the pattern, then replaced it with a
static string. This change allows more complex substitutions, where the
output depends on the input.

### Example use case
Say we want to keep only the first artist of a multi-artist credit, as
in the following list:
```
Neil Young & Crazy Horse -> Neil Young
Michael Hurley, The Holy Modal Rounders, Jeffrey Frederick & The Clamtones -> Michael Hurley
James Yorkston and the Athletes -> James Yorkston
````
This would previously have required three separate rules, one for each
resulting artist. By using a regex substitution, we can get the desired
behaviour in a single rule:
```yaml
substitute:
  ^(.*?)(,| &| and).*: \1
```
(Capture the text until the first `,` ` &` or ` and`, then use that
capture group as the output)

### Notes
I've kept the previous behaviour of only applying the first matching
rule, but I'm not 100% sure it's the ideal approach.
I can imagine both cases where you want to apply several rules in
sequence and cases where you want to stop after the first match.
This commit is contained in:
Šarūnas Nejus 2024-11-22 05:02:50 +00:00 committed by GitHub
commit 65e935bee5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 125 additions and 18 deletions

View file

@ -503,12 +503,8 @@ class PluginMixin:
Album._queries = getattr(Album, "_original_queries", {})
@contextmanager
def configure_plugin(self, config: list[Any] | dict[str, Any]):
if isinstance(config, list):
beets.config[self.plugin] = config
else:
for key, value in config.items():
beets.config[self.plugin][key] = value
def configure_plugin(self, config: Any):
beets.config[self.plugin].set(config)
self.load_plugins(self.plugin)
yield

View file

@ -34,8 +34,7 @@ class Substitute(BeetsPlugin):
"""Do the actual replacing."""
if text:
for pattern, replacement in self.substitute_rules:
if pattern.match(text.lower()):
return replacement
text = pattern.sub(replacement, text)
return text
else:
return ""
@ -47,10 +46,8 @@ class Substitute(BeetsPlugin):
substitute rules.
"""
super().__init__()
self.substitute_rules = []
self.template_funcs["substitute"] = self.tmpl_substitute
for key, view in self.config.items():
value = view.as_str()
pattern = re.compile(key.lower())
self.substitute_rules.append((pattern, value))
self.substitute_rules = [
(re.compile(key, flags=re.IGNORECASE), value)
for key, value in self.config.flatten().items()
]

View file

@ -31,6 +31,9 @@ New features:
* Beets now uses ``platformdirs`` to determine the default music directory.
This location varies between systems -- for example, users can configure it
on Unix systems via ``user-dirs.dirs(5)``.
* :doc:`/plugins/substitute`: Allow the replacement string to use capture groups
from the match. It is thus possible to create more general rules, applying to
many different artists at once.
Bug fixes:

View file

@ -11,13 +11,34 @@ the ``rewrite`` plugin modifies the metadata, this plugin does not.
Enable the ``substitute`` plugin (see :ref:`using-plugins`), then make a ``substitute:`` section in your config file to contain your rules.
Each rule consists of a case-insensitive regular expression pattern, and a
replacement value. For example, you might use:
replacement string. For example, you might use:
.. code-block:: yaml
substitute:
.*jimi hendrix.*: Jimi Hendrix
.*jimi hendrix.*: Jimi Hendrix
The replacement can be an expression utilising the matched regex, allowing us
to create more general rules. Say for example, we want to sort all albums by
multiple artists into the directory of the first artist. We can thus capture
everything before the first ``,``, `` &`` or `` and``, and use this capture
group in the output, discarding the rest of the string.
.. code-block:: yaml
substitute:
^(.*?)(,| &| and).*: \1
This would handle all the below cases in a single rule:
Bob Dylan and The Band -> Bob Dylan
Neil Young & Crazy Horse -> Neil Young
James Yorkston, Nina Persson & The Second Hand Orchestra -> James Yorkston
To apply the substitution, you have to call the function ``%substitute{}`` in the paths section. For example:
.. code-block:: yaml
paths:
default: %substitute{$albumartist}/$year - $album%aunique{}/$track - $title
default: \%substitute{$albumartist}/$year - $album\%aunique{}/$track - $title

View file

@ -0,0 +1,90 @@
# This file is part of beets.
# Copyright 2024, Nicholas Boyd Isacsson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Test the substitute plugin regex functionality."""
from beets.test.helper import PluginTestCase
from beetsplug.substitute import Substitute
class SubstitutePluginTest(PluginTestCase):
plugin = "substitute"
preload_plugin = False
def run_substitute(self, config, cases):
with self.configure_plugin(config):
for input, expected in cases:
assert Substitute().tmpl_substitute(input) == expected
def test_simple_substitute(self):
self.run_substitute(
{
"a": "x",
"b": "y",
"c": "z",
},
[("a", "x"), ("b", "y"), ("c", "z")],
)
def test_case_insensitivity(self):
self.run_substitute({"a": "x"}, [("A", "x")])
def test_unmatched_input_preserved(self):
self.run_substitute({"a": "x"}, [("c", "c")])
def test_regex_to_static(self):
self.run_substitute(
{".*jimi hendrix.*": "Jimi Hendrix"},
[("The Jimi Hendrix Experience", "Jimi Hendrix")],
)
def test_regex_capture_group(self):
self.run_substitute(
{"^(.*?)(,| &| and).*": r"\1"},
[
("King Creosote & Jon Hopkins", "King Creosote"),
(
"Michael Hurley, The Holy Modal Rounders, Jeffrey Frederick & "
+ "The Clamtones",
"Michael Hurley",
),
("James Yorkston and the Athletes", "James Yorkston"),
],
)
def test_partial_substitution(self):
self.run_substitute({r"\.": ""}, [("U.N.P.O.C.", "UNPOC")])
def test_rules_applied_in_definition_order(self):
self.run_substitute(
{
"a": "x",
"[ab]": "y",
"b": "z",
},
[
("a", "x"),
("b", "y"),
],
)
def test_rules_applied_in_sequence(self):
self.run_substitute(
{"a": "b", "b": "c", "d": "a"},
[
("a", "c"),
("b", "c"),
("d", "a"),
],
)