mirror of
https://github.com/beetbox/beets.git
synced 2026-02-17 12:56:05 +01:00
User patterns implemented
This commit is contained in:
parent
9ca9e86cf2
commit
e147a21894
3 changed files with 128 additions and 167 deletions
|
|
@ -25,7 +25,7 @@ from typing import TypedDict
|
|||
from typing_extensions import NotRequired
|
||||
|
||||
from beets import config
|
||||
from beets.importer import ImportSession, ImportTask, SingletonImportTask
|
||||
from beets.importer import ImportSession, ImportTask
|
||||
from beets.library import Item
|
||||
from beets.plugins import BeetsPlugin
|
||||
from beets.util import displayable_path
|
||||
|
|
@ -149,43 +149,19 @@ class FromFilenamePlugin(BeetsPlugin):
|
|||
# TODO: Add ignore parent folder
|
||||
}
|
||||
)
|
||||
self.fields = set(self.config["fields"].as_str_seq())
|
||||
self.register_listener("import_task_start", self.filename_task)
|
||||
|
||||
@cached_property
|
||||
def fields(self) -> set[str]:
|
||||
return set(self.config["fields"].as_str_seq())
|
||||
|
||||
@cached_property
|
||||
def file_patterns(self) -> list[re.Pattern[str]]:
|
||||
return self._to_regex(self.config["patterns"]["file"].as_str_seq())
|
||||
return self._user_pattern_to_regex(
|
||||
self.config["patterns"]["file"].as_str_seq())
|
||||
|
||||
@cached_property
|
||||
def folder_patterns(self) -> list[re.Pattern[str]]:
|
||||
return self._to_regex(self.config["patterns"]["folder"].as_str_seq())
|
||||
|
||||
def _to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]:
|
||||
"""Compile user patterns into a list of usable regex
|
||||
patterns. Catches errors are continues without bad regex patterns.
|
||||
"""
|
||||
compiled: list[re.Pattern[str]] = []
|
||||
for p in patterns:
|
||||
try:
|
||||
# check that the pattern has actual content
|
||||
if len(p) < 1:
|
||||
raise Exception("pattern is empty")
|
||||
if not RE_NAMED_SUBGROUP.search(p):
|
||||
raise Exception("no named subgroups")
|
||||
regexp = re.compile(p, re.IGNORECASE | re.VERBOSE)
|
||||
compiled.append(regexp)
|
||||
except Exception as e:
|
||||
self._log.info(f"Invalid user pattern {self._escape(p)!r}: {e}")
|
||||
return compiled
|
||||
|
||||
@staticmethod
|
||||
def _escape(text: str) -> str:
|
||||
# escape brackets for fstring logs
|
||||
# TODO: Create an issue for brackets in logger
|
||||
return re.sub("}", "}}", re.sub("{", "{{", text))
|
||||
return self._user_pattern_to_regex(
|
||||
self.config["patterns"]["folder"].as_str_seq()
|
||||
)
|
||||
|
||||
def filename_task(self, task: ImportTask, session: ImportSession) -> None:
|
||||
""" Examines all files in the given import task for any missing
|
||||
|
|
@ -210,6 +186,21 @@ class FromFilenamePlugin(BeetsPlugin):
|
|||
# Apply the information
|
||||
self._apply_matches(album_matches, track_matches)
|
||||
|
||||
def _user_pattern_to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]:
|
||||
"""Compile user patterns into a list of usable regex
|
||||
patterns. Catches errors are continues without bad regex patterns.
|
||||
"""
|
||||
return [
|
||||
re.compile(regexp) for p in patterns if (
|
||||
regexp := self._parse_user_pattern_strings(p))
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _escape(text: str) -> str:
|
||||
# escape brackets for fstring logs
|
||||
# TODO: Create an issue for brackets in logger
|
||||
return re.sub("}", "}}", re.sub("{", "{{", text))
|
||||
|
||||
@staticmethod
|
||||
def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]:
|
||||
parent_folder: str = ""
|
||||
|
|
@ -222,14 +213,23 @@ class FromFilenamePlugin(BeetsPlugin):
|
|||
parent_folder = path.parent.stem
|
||||
return parent_folder, filenames
|
||||
|
||||
def _build_track_matches(self,
|
||||
item_filenames: dict[Item, str]) -> dict[Item, TrackMatch]:
|
||||
track_matches: dict[Item, TrackMatch] = {}
|
||||
for item, filename in item_filenames.items():
|
||||
m = self._parse_track_info(filename)
|
||||
track_matches[item] = m
|
||||
return track_matches
|
||||
def _check_user_matches(self, text: str,
|
||||
patterns: list[re.Pattern[str]]) -> dict[str, str]:
|
||||
for p in patterns:
|
||||
if (usermatch := p.fullmatch(text)):
|
||||
return usermatch.groupdict()
|
||||
return None
|
||||
|
||||
def _build_track_matches(self,
|
||||
item_filenames: dict[Item, str]) -> dict[Item, dict[str, str]]:
|
||||
track_matches: dict[Item, dict[str, str]] = {}
|
||||
for item, filename in item_filenames.items():
|
||||
if (m := self._check_user_matches(filename, self.file_patterns)):
|
||||
track_matches[item] = m
|
||||
else:
|
||||
match = self._parse_track_info(filename)
|
||||
track_matches[item] = match
|
||||
return track_matches
|
||||
|
||||
@staticmethod
|
||||
def _parse_track_info(text: str) -> TrackMatch:
|
||||
|
|
@ -268,7 +268,10 @@ class FromFilenamePlugin(BeetsPlugin):
|
|||
|
||||
return trackmatch
|
||||
|
||||
def _parse_album_info(self, text: str) -> AlbumMatch:
|
||||
def _parse_album_info(self, text: str) -> dict[str, str]:
|
||||
# Check if a user pattern matches
|
||||
if (m := self._check_user_matches(text, self.folder_patterns)):
|
||||
return m
|
||||
matches: AlbumMatch = {
|
||||
"albumartist": None,
|
||||
"album": None,
|
||||
|
|
@ -411,6 +414,18 @@ class FromFilenamePlugin(BeetsPlugin):
|
|||
return match.group("catalognum"), match.span()
|
||||
return None, (0, 0)
|
||||
|
||||
def _parse_user_pattern_strings(self, text: str) -> str | None:
|
||||
# escape any special characters
|
||||
fields: list[str] = [s.lower() for s in re.findall(r"\$([a-zA-Z\_]+)", text)]
|
||||
if not fields:
|
||||
# if there are no usable fields
|
||||
return None
|
||||
pattern = re.escape(text)
|
||||
for f in fields:
|
||||
pattern = re.sub(rf"\\\${f}", f"(?P<{f}>.+)", pattern)
|
||||
self.fields.add(f)
|
||||
return rf"{pattern}"
|
||||
|
||||
@staticmethod
|
||||
def _mutate_string(text: str, span: tuple[int, int]) -> str:
|
||||
"""Replace a matched field with a seperator"""
|
||||
|
|
@ -439,11 +454,6 @@ class FromFilenamePlugin(BeetsPlugin):
|
|||
if len(track_matches) < 2:
|
||||
return
|
||||
|
||||
# If the album artist is not various artists
|
||||
# check that all artists match
|
||||
# if they do not, try seeing if all the titles match
|
||||
# if all the titles match, swap title and artist fields
|
||||
# If we know that it's a VA album, then we can't assert much from the artists
|
||||
tracks: list[TrackMatch] = list(track_matches.values())
|
||||
album_artist = album_match["albumartist"]
|
||||
one_artist = self._equal_fields(tracks, "artist")
|
||||
|
|
|
|||
|
|
@ -5,8 +5,9 @@ The ``fromfilename`` plugin helps to tag albums that are missing tags altogether
|
|||
but where the filenames contain useful information like the artist and title.
|
||||
|
||||
When you attempt to import a track that's missing a title, this plugin will look
|
||||
at the track's filename and guess its disc, track number, title, and artist.
|
||||
These will be used to search for metadata and match track ordering.
|
||||
at the track's filename and parent folder, and guess a number of fields.
|
||||
|
||||
The extracted information will be used to search for metadata and match track ordering.
|
||||
|
||||
To use the ``fromfilename`` plugin, enable it in your configuration (see
|
||||
:ref:`using-plugins`).
|
||||
|
|
@ -45,88 +46,24 @@ Default
|
|||
|
||||
.. conf:: patterns
|
||||
|
||||
Extra regular expression patterns specified by the user. See the section on patterns for more information.
|
||||
Users can specify patterns to improve the efficacy of the plugin. Patterns can
|
||||
be specified as ``file`` or ``folder`` patterns. ``file`` patterns are checked
|
||||
against the filename. ``folder`` patterns are checked against the parent folder
|
||||
of the file.
|
||||
|
||||
Patterns
|
||||
--------
|
||||
If ``fromfilename`` can't match the entire string to the given pattern, it will
|
||||
falls back to the default pattern.
|
||||
|
||||
Examples of paths that the plugin can parse successfully, and the fields
|
||||
retrieved.
|
||||
The following custom patterns will match this path and retrieve the specified
|
||||
fields.
|
||||
|
||||
.. code-block:: yaml
|
||||
``/music/James Lawson - 841689 (2004)/Coming Up - James Lawson & Andy Farley.mp3``
|
||||
|
||||
"/Artist - Album (2025)/03.wav"
|
||||
album: Album
|
||||
albumartist: Artist
|
||||
title: "03"
|
||||
track: 3
|
||||
year: 2025
|
||||
.. code-block:: yaml
|
||||
|
||||
"/[CAT123] Album - Various [WEB-FLAC]/2-10 - Artist - Song One.flac"
|
||||
artist: Artist
|
||||
album: Album
|
||||
albumartist: Various Artists
|
||||
catalognum: CAT123
|
||||
disc: 2
|
||||
media: Digital Media
|
||||
title: Song One
|
||||
track: 10
|
||||
patterns:
|
||||
folder:
|
||||
- "$albumartist - $discogs_albumid ($year)"
|
||||
file:
|
||||
- "$title - $artist"
|
||||
|
||||
"/Album Artist - Album Title (1997) {CATALOGNUM123}/1-23.flac"
|
||||
albumartist: Album Artist
|
||||
album: Album Title
|
||||
year: 1997
|
||||
disc: 1
|
||||
track: 23
|
||||
|
||||
"/04. Song.mp3"
|
||||
title: Song
|
||||
track: 4
|
||||
|
||||
"/5_-_My_Artist_-_My_Title.m4a"
|
||||
artist: My_Artist
|
||||
title: My_Title
|
||||
track: 5
|
||||
|
||||
"/8 Song by Artist.wav"
|
||||
artist: Artist
|
||||
title: Song
|
||||
track: 8
|
||||
|
||||
User Patterns
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
Users can specify patterns to improve the efficacy of the plugin. Patterns can
|
||||
be specified as ``file`` or ``folder`` patterns. ``file`` patterns are checked
|
||||
against the filename. ``folder`` patterns are checked against the parent folder
|
||||
of the file.
|
||||
|
||||
To contribute information, the patterns must use named capture groups
|
||||
``(?P<name>...)``. The name of the capture group represents the beets field the
|
||||
captured text will be applied to. User patterns are compiled with the verbose
|
||||
and ignore case flags. Spaces in a match should be noted with `\s`.
|
||||
|
||||
If ``fromfilename`` can't match the entire string to the given pattern, it will
|
||||
fall back to the default pattern.
|
||||
|
||||
The following custom patterns will match this path and retrieve the specified
|
||||
fields.
|
||||
|
||||
``/music/James Lawson - 841689/Coming Up - James Lawson & Andy Farley.mp3``
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
patterns:
|
||||
folder:
|
||||
# multiline blocks are allowed for readability
|
||||
- |
|
||||
(?P<albumartist>\w+)
|
||||
\s-\s
|
||||
(?P<discogs_albumid>\d+)'
|
||||
file:
|
||||
- '(?P<artist>\w+)\s-\s(?P<track>\d+)'
|
||||
|
||||
For more information on writing regular expressions, check out the `python
|
||||
documentation`_.
|
||||
|
||||
.. _python documentation: https://docs.python.org/3/library/re.html
|
||||
|
|
|
|||
|
|
@ -13,8 +13,6 @@
|
|||
|
||||
"""Tests for the fromfilename plugin."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import pytest
|
||||
|
||||
from beets.library import Item
|
||||
|
|
@ -255,44 +253,19 @@ def test_parse_album_info(text, matchgroup):
|
|||
m = f._parse_album_info(text)
|
||||
assert matchgroup == m
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"patterns,expected",
|
||||
[
|
||||
@pytest.mark.parametrize("string,pattern",[
|
||||
(
|
||||
[
|
||||
r"""
|
||||
(?P<disc>\d+(?=[\.\-_]\d))?
|
||||
# a disc must be followed by punctuation and a digit
|
||||
[\.\-]{,1}
|
||||
# disc punctuation
|
||||
(?P<track>\d+)?
|
||||
# match the track number
|
||||
[\.\-_\s]*
|
||||
# artist separators
|
||||
(?P<artist>.+?(?=[\s*_]?[\.\-by].+))?
|
||||
# artist match depends on title existing
|
||||
[\.\-_\s]*
|
||||
(?P<by>by)?
|
||||
# if 'by' is found, artist and title will need to be swapped
|
||||
[\.\-_\s]*
|
||||
# title separators
|
||||
(?P<title>.+)?
|
||||
# match the track title
|
||||
""",
|
||||
r"",
|
||||
r"(?:<invalid)",
|
||||
r"(.*)",
|
||||
r"(?P<disc>asda}]",
|
||||
],
|
||||
1,
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_to_regex(patterns, expected):
|
||||
"$albumartist - $album ($year) {$comments}",
|
||||
r"(?P<albumartist>.+)\ \-\ (?P<album>.+)\ \((?P<year>.+)\)\ \ \{(?P<comments>.+)\}"
|
||||
),
|
||||
(
|
||||
"$",
|
||||
None
|
||||
),
|
||||
])
|
||||
def test_parse_user_pattern_strings(string,pattern):
|
||||
f = FromFilenamePlugin()
|
||||
p = f._to_regex(patterns)
|
||||
assert len(p) == expected
|
||||
assert f._parse_user_pattern_strings(string) == pattern
|
||||
|
||||
|
||||
class TestFromFilename(PluginMixin):
|
||||
|
|
@ -647,7 +620,6 @@ class TestFromFilename(PluginMixin):
|
|||
expected.path = path
|
||||
with self.configure_plugin({"fields": fields}):
|
||||
f = FromFilenamePlugin()
|
||||
f.config
|
||||
f.filename_task(task, Session())
|
||||
res = task.items[0]
|
||||
assert res.path == expected.path
|
||||
|
|
@ -658,6 +630,48 @@ class TestFromFilename(PluginMixin):
|
|||
assert res.year == expected.year
|
||||
assert res.title == expected.title
|
||||
|
||||
@pytest.mark.parametrize("patterns,expected_item", [])
|
||||
def test_user_regex(self, patterns, expected_item):
|
||||
return
|
||||
@pytest.mark.parametrize("patterns,expected", [
|
||||
(
|
||||
{
|
||||
"folder": ["($comments) - {$albumartist} - {$album}"],
|
||||
"file": ["$artist - $track - $title"]
|
||||
},
|
||||
mock_item(
|
||||
path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac",
|
||||
comments="Comment",
|
||||
albumartist="Album Artist",
|
||||
album="Album",
|
||||
artist="Artist",
|
||||
track=2,
|
||||
title="Title",
|
||||
)
|
||||
),
|
||||
(
|
||||
{
|
||||
"folder": ["[$comments] - {$albumartist} - {$album}"],
|
||||
"file": ["$artist - $track - $title"]
|
||||
},
|
||||
mock_item(
|
||||
path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac",
|
||||
artist="Artist",
|
||||
track=2,
|
||||
title="Title",
|
||||
catalognum="Comment"
|
||||
)
|
||||
)
|
||||
])
|
||||
def test_user_patterns(self, patterns, expected):
|
||||
task = mock_task([mock_item(path=expected.path)])
|
||||
with self.configure_plugin({ "patterns": patterns }):
|
||||
f = FromFilenamePlugin()
|
||||
f.filename_task(task, Session())
|
||||
res = task.items[0]
|
||||
assert res.comments == expected.comments
|
||||
assert res.path == expected.path
|
||||
assert res.artist == expected.artist
|
||||
assert res.albumartist == expected.albumartist
|
||||
assert res.disc == expected.disc
|
||||
assert res.catalognum == expected.catalognum
|
||||
assert res.year == expected.year
|
||||
assert res.title == expected.title
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue