User patterns implemented

This commit is contained in:
Henry Oberholtzer 2026-01-06 17:00:55 -08:00
parent 9ca9e86cf2
commit e147a21894
3 changed files with 128 additions and 167 deletions

View file

@ -25,7 +25,7 @@ from typing import TypedDict
from typing_extensions import NotRequired
from beets import config
from beets.importer import ImportSession, ImportTask, SingletonImportTask
from beets.importer import ImportSession, ImportTask
from beets.library import Item
from beets.plugins import BeetsPlugin
from beets.util import displayable_path
@ -149,43 +149,19 @@ class FromFilenamePlugin(BeetsPlugin):
# TODO: Add ignore parent folder
}
)
self.fields = set(self.config["fields"].as_str_seq())
self.register_listener("import_task_start", self.filename_task)
@cached_property
def fields(self) -> set[str]:
return set(self.config["fields"].as_str_seq())
@cached_property
def file_patterns(self) -> list[re.Pattern[str]]:
return self._to_regex(self.config["patterns"]["file"].as_str_seq())
return self._user_pattern_to_regex(
self.config["patterns"]["file"].as_str_seq())
@cached_property
def folder_patterns(self) -> list[re.Pattern[str]]:
return self._to_regex(self.config["patterns"]["folder"].as_str_seq())
def _to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]:
"""Compile user patterns into a list of usable regex
patterns. Catches errors are continues without bad regex patterns.
"""
compiled: list[re.Pattern[str]] = []
for p in patterns:
try:
# check that the pattern has actual content
if len(p) < 1:
raise Exception("pattern is empty")
if not RE_NAMED_SUBGROUP.search(p):
raise Exception("no named subgroups")
regexp = re.compile(p, re.IGNORECASE | re.VERBOSE)
compiled.append(regexp)
except Exception as e:
self._log.info(f"Invalid user pattern {self._escape(p)!r}: {e}")
return compiled
@staticmethod
def _escape(text: str) -> str:
# escape brackets for fstring logs
# TODO: Create an issue for brackets in logger
return re.sub("}", "}}", re.sub("{", "{{", text))
return self._user_pattern_to_regex(
self.config["patterns"]["folder"].as_str_seq()
)
def filename_task(self, task: ImportTask, session: ImportSession) -> None:
""" Examines all files in the given import task for any missing
@ -210,6 +186,21 @@ class FromFilenamePlugin(BeetsPlugin):
# Apply the information
self._apply_matches(album_matches, track_matches)
def _user_pattern_to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]:
"""Compile user patterns into a list of usable regex
patterns. Catches errors are continues without bad regex patterns.
"""
return [
re.compile(regexp) for p in patterns if (
regexp := self._parse_user_pattern_strings(p))
]
@staticmethod
def _escape(text: str) -> str:
# escape brackets for fstring logs
# TODO: Create an issue for brackets in logger
return re.sub("}", "}}", re.sub("{", "{{", text))
@staticmethod
def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]:
parent_folder: str = ""
@ -222,14 +213,23 @@ class FromFilenamePlugin(BeetsPlugin):
parent_folder = path.parent.stem
return parent_folder, filenames
def _build_track_matches(self,
item_filenames: dict[Item, str]) -> dict[Item, TrackMatch]:
track_matches: dict[Item, TrackMatch] = {}
for item, filename in item_filenames.items():
m = self._parse_track_info(filename)
track_matches[item] = m
return track_matches
def _check_user_matches(self, text: str,
patterns: list[re.Pattern[str]]) -> dict[str, str]:
for p in patterns:
if (usermatch := p.fullmatch(text)):
return usermatch.groupdict()
return None
def _build_track_matches(self,
item_filenames: dict[Item, str]) -> dict[Item, dict[str, str]]:
track_matches: dict[Item, dict[str, str]] = {}
for item, filename in item_filenames.items():
if (m := self._check_user_matches(filename, self.file_patterns)):
track_matches[item] = m
else:
match = self._parse_track_info(filename)
track_matches[item] = match
return track_matches
@staticmethod
def _parse_track_info(text: str) -> TrackMatch:
@ -268,7 +268,10 @@ class FromFilenamePlugin(BeetsPlugin):
return trackmatch
def _parse_album_info(self, text: str) -> AlbumMatch:
def _parse_album_info(self, text: str) -> dict[str, str]:
# Check if a user pattern matches
if (m := self._check_user_matches(text, self.folder_patterns)):
return m
matches: AlbumMatch = {
"albumartist": None,
"album": None,
@ -411,6 +414,18 @@ class FromFilenamePlugin(BeetsPlugin):
return match.group("catalognum"), match.span()
return None, (0, 0)
def _parse_user_pattern_strings(self, text: str) -> str | None:
# escape any special characters
fields: list[str] = [s.lower() for s in re.findall(r"\$([a-zA-Z\_]+)", text)]
if not fields:
# if there are no usable fields
return None
pattern = re.escape(text)
for f in fields:
pattern = re.sub(rf"\\\${f}", f"(?P<{f}>.+)", pattern)
self.fields.add(f)
return rf"{pattern}"
@staticmethod
def _mutate_string(text: str, span: tuple[int, int]) -> str:
"""Replace a matched field with a seperator"""
@ -439,11 +454,6 @@ class FromFilenamePlugin(BeetsPlugin):
if len(track_matches) < 2:
return
# If the album artist is not various artists
# check that all artists match
# if they do not, try seeing if all the titles match
# if all the titles match, swap title and artist fields
# If we know that it's a VA album, then we can't assert much from the artists
tracks: list[TrackMatch] = list(track_matches.values())
album_artist = album_match["albumartist"]
one_artist = self._equal_fields(tracks, "artist")

View file

@ -5,8 +5,9 @@ The ``fromfilename`` plugin helps to tag albums that are missing tags altogether
but where the filenames contain useful information like the artist and title.
When you attempt to import a track that's missing a title, this plugin will look
at the track's filename and guess its disc, track number, title, and artist.
These will be used to search for metadata and match track ordering.
at the track's filename and parent folder, and guess a number of fields.
The extracted information will be used to search for metadata and match track ordering.
To use the ``fromfilename`` plugin, enable it in your configuration (see
:ref:`using-plugins`).
@ -45,88 +46,24 @@ Default
.. conf:: patterns
Extra regular expression patterns specified by the user. See the section on patterns for more information.
Users can specify patterns to improve the efficacy of the plugin. Patterns can
be specified as ``file`` or ``folder`` patterns. ``file`` patterns are checked
against the filename. ``folder`` patterns are checked against the parent folder
of the file.
Patterns
--------
If ``fromfilename`` can't match the entire string to the given pattern, it will
falls back to the default pattern.
Examples of paths that the plugin can parse successfully, and the fields
retrieved.
The following custom patterns will match this path and retrieve the specified
fields.
.. code-block:: yaml
``/music/James Lawson - 841689 (2004)/Coming Up - James Lawson & Andy Farley.mp3``
"/Artist - Album (2025)/03.wav"
album: Album
albumartist: Artist
title: "03"
track: 3
year: 2025
.. code-block:: yaml
"/[CAT123] Album - Various [WEB-FLAC]/2-10 - Artist - Song One.flac"
artist: Artist
album: Album
albumartist: Various Artists
catalognum: CAT123
disc: 2
media: Digital Media
title: Song One
track: 10
patterns:
folder:
- "$albumartist - $discogs_albumid ($year)"
file:
- "$title - $artist"
"/Album Artist - Album Title (1997) {CATALOGNUM123}/1-23.flac"
albumartist: Album Artist
album: Album Title
year: 1997
disc: 1
track: 23
"/04. Song.mp3"
title: Song
track: 4
"/5_-_My_Artist_-_My_Title.m4a"
artist: My_Artist
title: My_Title
track: 5
"/8 Song by Artist.wav"
artist: Artist
title: Song
track: 8
User Patterns
~~~~~~~~~~~~~
Users can specify patterns to improve the efficacy of the plugin. Patterns can
be specified as ``file`` or ``folder`` patterns. ``file`` patterns are checked
against the filename. ``folder`` patterns are checked against the parent folder
of the file.
To contribute information, the patterns must use named capture groups
``(?P<name>...)``. The name of the capture group represents the beets field the
captured text will be applied to. User patterns are compiled with the verbose
and ignore case flags. Spaces in a match should be noted with `\s`.
If ``fromfilename`` can't match the entire string to the given pattern, it will
fall back to the default pattern.
The following custom patterns will match this path and retrieve the specified
fields.
``/music/James Lawson - 841689/Coming Up - James Lawson & Andy Farley.mp3``
.. code-block:: yaml
patterns:
folder:
# multiline blocks are allowed for readability
- |
(?P<albumartist>\w+)
\s-\s
(?P<discogs_albumid>\d+)'
file:
- '(?P<artist>\w+)\s-\s(?P<track>\d+)'
For more information on writing regular expressions, check out the `python
documentation`_.
.. _python documentation: https://docs.python.org/3/library/re.html

View file

@ -13,8 +13,6 @@
"""Tests for the fromfilename plugin."""
from dataclasses import dataclass
import pytest
from beets.library import Item
@ -255,44 +253,19 @@ def test_parse_album_info(text, matchgroup):
m = f._parse_album_info(text)
assert matchgroup == m
@pytest.mark.parametrize(
"patterns,expected",
[
@pytest.mark.parametrize("string,pattern",[
(
[
r"""
(?P<disc>\d+(?=[\.\-_]\d))?
# a disc must be followed by punctuation and a digit
[\.\-]{,1}
# disc punctuation
(?P<track>\d+)?
# match the track number
[\.\-_\s]*
# artist separators
(?P<artist>.+?(?=[\s*_]?[\.\-by].+))?
# artist match depends on title existing
[\.\-_\s]*
(?P<by>by)?
# if 'by' is found, artist and title will need to be swapped
[\.\-_\s]*
# title separators
(?P<title>.+)?
# match the track title
""",
r"",
r"(?:<invalid)",
r"(.*)",
r"(?P<disc>asda}]",
],
1,
)
],
)
def test_to_regex(patterns, expected):
"$albumartist - $album ($year) {$comments}",
r"(?P<albumartist>.+)\ \-\ (?P<album>.+)\ \((?P<year>.+)\)\ \ \{(?P<comments>.+)\}"
),
(
"$",
None
),
])
def test_parse_user_pattern_strings(string,pattern):
f = FromFilenamePlugin()
p = f._to_regex(patterns)
assert len(p) == expected
assert f._parse_user_pattern_strings(string) == pattern
class TestFromFilename(PluginMixin):
@ -647,7 +620,6 @@ class TestFromFilename(PluginMixin):
expected.path = path
with self.configure_plugin({"fields": fields}):
f = FromFilenamePlugin()
f.config
f.filename_task(task, Session())
res = task.items[0]
assert res.path == expected.path
@ -658,6 +630,48 @@ class TestFromFilename(PluginMixin):
assert res.year == expected.year
assert res.title == expected.title
@pytest.mark.parametrize("patterns,expected_item", [])
def test_user_regex(self, patterns, expected_item):
return
@pytest.mark.parametrize("patterns,expected", [
(
{
"folder": ["($comments) - {$albumartist} - {$album}"],
"file": ["$artist - $track - $title"]
},
mock_item(
path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac",
comments="Comment",
albumartist="Album Artist",
album="Album",
artist="Artist",
track=2,
title="Title",
)
),
(
{
"folder": ["[$comments] - {$albumartist} - {$album}"],
"file": ["$artist - $track - $title"]
},
mock_item(
path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac",
artist="Artist",
track=2,
title="Title",
catalognum="Comment"
)
)
])
def test_user_patterns(self, patterns, expected):
task = mock_task([mock_item(path=expected.path)])
with self.configure_plugin({ "patterns": patterns }):
f = FromFilenamePlugin()
f.filename_task(task, Session())
res = task.items[0]
assert res.comments == expected.comments
assert res.path == expected.path
assert res.artist == expected.artist
assert res.albumartist == expected.albumartist
assert res.disc == expected.disc
assert res.catalognum == expected.catalognum
assert res.year == expected.year
assert res.title == expected.title