diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index 2141b4ed5..34ea1155a 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -25,7 +25,7 @@ from typing import TypedDict from typing_extensions import NotRequired from beets import config -from beets.importer import ImportSession, ImportTask, SingletonImportTask +from beets.importer import ImportSession, ImportTask from beets.library import Item from beets.plugins import BeetsPlugin from beets.util import displayable_path @@ -149,43 +149,19 @@ class FromFilenamePlugin(BeetsPlugin): # TODO: Add ignore parent folder } ) + self.fields = set(self.config["fields"].as_str_seq()) self.register_listener("import_task_start", self.filename_task) - @cached_property - def fields(self) -> set[str]: - return set(self.config["fields"].as_str_seq()) - @cached_property def file_patterns(self) -> list[re.Pattern[str]]: - return self._to_regex(self.config["patterns"]["file"].as_str_seq()) + return self._user_pattern_to_regex( + self.config["patterns"]["file"].as_str_seq()) @cached_property def folder_patterns(self) -> list[re.Pattern[str]]: - return self._to_regex(self.config["patterns"]["folder"].as_str_seq()) - - def _to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]: - """Compile user patterns into a list of usable regex - patterns. Catches errors are continues without bad regex patterns. - """ - compiled: list[re.Pattern[str]] = [] - for p in patterns: - try: - # check that the pattern has actual content - if len(p) < 1: - raise Exception("pattern is empty") - if not RE_NAMED_SUBGROUP.search(p): - raise Exception("no named subgroups") - regexp = re.compile(p, re.IGNORECASE | re.VERBOSE) - compiled.append(regexp) - except Exception as e: - self._log.info(f"Invalid user pattern {self._escape(p)!r}: {e}") - return compiled - - @staticmethod - def _escape(text: str) -> str: - # escape brackets for fstring logs - # TODO: Create an issue for brackets in logger - return re.sub("}", "}}", re.sub("{", "{{", text)) + return self._user_pattern_to_regex( + self.config["patterns"]["folder"].as_str_seq() + ) def filename_task(self, task: ImportTask, session: ImportSession) -> None: """ Examines all files in the given import task for any missing @@ -210,6 +186,21 @@ class FromFilenamePlugin(BeetsPlugin): # Apply the information self._apply_matches(album_matches, track_matches) + def _user_pattern_to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]: + """Compile user patterns into a list of usable regex + patterns. Catches errors are continues without bad regex patterns. + """ + return [ + re.compile(regexp) for p in patterns if ( + regexp := self._parse_user_pattern_strings(p)) + ] + + @staticmethod + def _escape(text: str) -> str: + # escape brackets for fstring logs + # TODO: Create an issue for brackets in logger + return re.sub("}", "}}", re.sub("{", "{{", text)) + @staticmethod def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]: parent_folder: str = "" @@ -222,14 +213,23 @@ class FromFilenamePlugin(BeetsPlugin): parent_folder = path.parent.stem return parent_folder, filenames - def _build_track_matches(self, - item_filenames: dict[Item, str]) -> dict[Item, TrackMatch]: - track_matches: dict[Item, TrackMatch] = {} - for item, filename in item_filenames.items(): - m = self._parse_track_info(filename) - track_matches[item] = m - return track_matches + def _check_user_matches(self, text: str, + patterns: list[re.Pattern[str]]) -> dict[str, str]: + for p in patterns: + if (usermatch := p.fullmatch(text)): + return usermatch.groupdict() + return None + def _build_track_matches(self, + item_filenames: dict[Item, str]) -> dict[Item, dict[str, str]]: + track_matches: dict[Item, dict[str, str]] = {} + for item, filename in item_filenames.items(): + if (m := self._check_user_matches(filename, self.file_patterns)): + track_matches[item] = m + else: + match = self._parse_track_info(filename) + track_matches[item] = match + return track_matches @staticmethod def _parse_track_info(text: str) -> TrackMatch: @@ -268,7 +268,10 @@ class FromFilenamePlugin(BeetsPlugin): return trackmatch - def _parse_album_info(self, text: str) -> AlbumMatch: + def _parse_album_info(self, text: str) -> dict[str, str]: + # Check if a user pattern matches + if (m := self._check_user_matches(text, self.folder_patterns)): + return m matches: AlbumMatch = { "albumartist": None, "album": None, @@ -411,6 +414,18 @@ class FromFilenamePlugin(BeetsPlugin): return match.group("catalognum"), match.span() return None, (0, 0) + def _parse_user_pattern_strings(self, text: str) -> str | None: + # escape any special characters + fields: list[str] = [s.lower() for s in re.findall(r"\$([a-zA-Z\_]+)", text)] + if not fields: + # if there are no usable fields + return None + pattern = re.escape(text) + for f in fields: + pattern = re.sub(rf"\\\${f}", f"(?P<{f}>.+)", pattern) + self.fields.add(f) + return rf"{pattern}" + @staticmethod def _mutate_string(text: str, span: tuple[int, int]) -> str: """Replace a matched field with a seperator""" @@ -439,11 +454,6 @@ class FromFilenamePlugin(BeetsPlugin): if len(track_matches) < 2: return - # If the album artist is not various artists - # check that all artists match - # if they do not, try seeing if all the titles match - # if all the titles match, swap title and artist fields - # If we know that it's a VA album, then we can't assert much from the artists tracks: list[TrackMatch] = list(track_matches.values()) album_artist = album_match["albumartist"] one_artist = self._equal_fields(tracks, "artist") diff --git a/docs/plugins/fromfilename.rst b/docs/plugins/fromfilename.rst index 7431c45a2..5308d5c5b 100644 --- a/docs/plugins/fromfilename.rst +++ b/docs/plugins/fromfilename.rst @@ -5,8 +5,9 @@ The ``fromfilename`` plugin helps to tag albums that are missing tags altogether but where the filenames contain useful information like the artist and title. When you attempt to import a track that's missing a title, this plugin will look -at the track's filename and guess its disc, track number, title, and artist. -These will be used to search for metadata and match track ordering. +at the track's filename and parent folder, and guess a number of fields. + +The extracted information will be used to search for metadata and match track ordering. To use the ``fromfilename`` plugin, enable it in your configuration (see :ref:`using-plugins`). @@ -45,88 +46,24 @@ Default .. conf:: patterns - Extra regular expression patterns specified by the user. See the section on patterns for more information. + Users can specify patterns to improve the efficacy of the plugin. Patterns can + be specified as ``file`` or ``folder`` patterns. ``file`` patterns are checked + against the filename. ``folder`` patterns are checked against the parent folder + of the file. -Patterns --------- + If ``fromfilename`` can't match the entire string to the given pattern, it will + falls back to the default pattern. -Examples of paths that the plugin can parse successfully, and the fields -retrieved. + The following custom patterns will match this path and retrieve the specified + fields. -.. code-block:: yaml + ``/music/James Lawson - 841689 (2004)/Coming Up - James Lawson & Andy Farley.mp3`` - "/Artist - Album (2025)/03.wav" - album: Album - albumartist: Artist - title: "03" - track: 3 - year: 2025 + .. code-block:: yaml - "/[CAT123] Album - Various [WEB-FLAC]/2-10 - Artist - Song One.flac" - artist: Artist - album: Album - albumartist: Various Artists - catalognum: CAT123 - disc: 2 - media: Digital Media - title: Song One - track: 10 + patterns: + folder: + - "$albumartist - $discogs_albumid ($year)" + file: + - "$title - $artist" - "/Album Artist - Album Title (1997) {CATALOGNUM123}/1-23.flac" - albumartist: Album Artist - album: Album Title - year: 1997 - disc: 1 - track: 23 - - "/04. Song.mp3" - title: Song - track: 4 - - "/5_-_My_Artist_-_My_Title.m4a" - artist: My_Artist - title: My_Title - track: 5 - - "/8 Song by Artist.wav" - artist: Artist - title: Song - track: 8 - -User Patterns -~~~~~~~~~~~~~ - -Users can specify patterns to improve the efficacy of the plugin. Patterns can -be specified as ``file`` or ``folder`` patterns. ``file`` patterns are checked -against the filename. ``folder`` patterns are checked against the parent folder -of the file. - -To contribute information, the patterns must use named capture groups -``(?P...)``. The name of the capture group represents the beets field the -captured text will be applied to. User patterns are compiled with the verbose -and ignore case flags. Spaces in a match should be noted with `\s`. - -If ``fromfilename`` can't match the entire string to the given pattern, it will -fall back to the default pattern. - -The following custom patterns will match this path and retrieve the specified -fields. - -``/music/James Lawson - 841689/Coming Up - James Lawson & Andy Farley.mp3`` - -.. code-block:: yaml - - patterns: - folder: - # multiline blocks are allowed for readability - - | - (?P\w+) - \s-\s - (?P\d+)' - file: - - '(?P\w+)\s-\s(?P\d+)' - -For more information on writing regular expressions, check out the `python -documentation`_. - -.. _python documentation: https://docs.python.org/3/library/re.html diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index d964c714e..98709c201 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -13,8 +13,6 @@ """Tests for the fromfilename plugin.""" -from dataclasses import dataclass - import pytest from beets.library import Item @@ -255,44 +253,19 @@ def test_parse_album_info(text, matchgroup): m = f._parse_album_info(text) assert matchgroup == m - -@pytest.mark.parametrize( - "patterns,expected", - [ +@pytest.mark.parametrize("string,pattern",[ ( - [ - r""" - (?P\d+(?=[\.\-_]\d))? - # a disc must be followed by punctuation and a digit - [\.\-]{,1} - # disc punctuation - (?P\d+)? - # match the track number - [\.\-_\s]* - # artist separators - (?P.+?(?=[\s*_]?[\.\-by].+))? - # artist match depends on title existing - [\.\-_\s]* - (?Pby)? - # if 'by' is found, artist and title will need to be swapped - [\.\-_\s]* - # title separators - (?P.+)? - # match the track title - """, - r"", - r"(?:<invalid)", - r"(.*)", - r"(?P<disc>asda}]", - ], - 1, - ) - ], -) -def test_to_regex(patterns, expected): + "$albumartist - $album ($year) {$comments}", + r"(?P<albumartist>.+)\ \-\ (?P<album>.+)\ \((?P<year>.+)\)\ \ \{(?P<comments>.+)\}" + ), + ( + "$", + None + ), + ]) +def test_parse_user_pattern_strings(string,pattern): f = FromFilenamePlugin() - p = f._to_regex(patterns) - assert len(p) == expected + assert f._parse_user_pattern_strings(string) == pattern class TestFromFilename(PluginMixin): @@ -647,7 +620,6 @@ class TestFromFilename(PluginMixin): expected.path = path with self.configure_plugin({"fields": fields}): f = FromFilenamePlugin() - f.config f.filename_task(task, Session()) res = task.items[0] assert res.path == expected.path @@ -658,6 +630,48 @@ class TestFromFilename(PluginMixin): assert res.year == expected.year assert res.title == expected.title - @pytest.mark.parametrize("patterns,expected_item", []) - def test_user_regex(self, patterns, expected_item): - return + @pytest.mark.parametrize("patterns,expected", [ + ( + { + "folder": ["($comments) - {$albumartist} - {$album}"], + "file": ["$artist - $track - $title"] + }, + mock_item( + path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac", + comments="Comment", + albumartist="Album Artist", + album="Album", + artist="Artist", + track=2, + title="Title", + ) + ), + ( + { + "folder": ["[$comments] - {$albumartist} - {$album}"], + "file": ["$artist - $track - $title"] + }, + mock_item( + path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac", + artist="Artist", + track=2, + title="Title", + catalognum="Comment" + ) + ) + ]) + def test_user_patterns(self, patterns, expected): + task = mock_task([mock_item(path=expected.path)]) + with self.configure_plugin({ "patterns": patterns }): + f = FromFilenamePlugin() + f.filename_task(task, Session()) + res = task.items[0] + assert res.comments == expected.comments + assert res.path == expected.path + assert res.artist == expected.artist + assert res.albumartist == expected.albumartist + assert res.disc == expected.disc + assert res.catalognum == expected.catalognum + assert res.year == expected.year + assert res.title == expected.title +