From 683786a09f313db5b1cea77bfaf2a349481b6f43 Mon Sep 17 00:00:00 2001 From: Henry Date: Thu, 1 Jan 2026 23:44:46 -0800 Subject: [PATCH] Rewrite and extend regex, extend bad titles --- beetsplug/fromfilename.py | 82 +++++++++++++++++++++---------- test/plugins/test_fromfilename.py | 81 ++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 26 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index ad3cd7d0f..ec250a471 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -25,24 +25,35 @@ from beets.library import Item from beets.plugins import BeetsPlugin from beets.util import displayable_path -# Filename field extraction patterns. -PATTERNS = [ - # Useful patterns. - ( - r"^(?P\d+)\.?\s*-\s*(?P.+?)\s*-\s*(?P.+?)" - r"(\s*-\s*(?P<tag>.*))?$" - ), - r"^(?P<artist>.+?)\s*-\s*(?P<title>.+?)(\s*-\s*(?P<tag>.*))?$", - r"^(?P<track>\d+)\.?[\s_-]+(?P<title>.+)$", - r"^(?P<title>.+) by (?P<artist>.+)$", - r"^(?P<track>\d+).*$", - r"^(?P<title>.+)$", -] +# Filename field extraction patterns +RE_TRACK_INFO = re.compile( + r""" + (?P<disc>\d+(?=[\.\-_]\d))? + # a disc must be followed by punctuation and a digit + [\.\-]{,1} + # disc punctuation + (?P<track>\d+)? + # match the track number + [\.\-_\s]* + # artist separators + (?P<artist>.+?(?=[\s*_]?[\.\-by].+))? + # artist match depends on title existing + [\.\-_\s]* + (?P<by>by)? + # if 'by' is found, artist and title will need to be swapped + [\.\-_\s]* + # title separators + (?P<title>.+)? + # match the track title + """, + re.VERBOSE | re.IGNORECASE, +) -# Titles considered "empty" and in need of replacement. -BAD_TITLE_PATTERNS = [ - r"^$", -] +# Match the disc names of parent folders +RE_DISC = re.compile(r"((?:cd|disc)\s*\d+)", re.IGNORECASE) + +# Matches fields that are empty or only whitespace +RE_BAD_TITLE = re.compile(r"^\s*$") def equal(seq: list[str]): @@ -83,9 +94,8 @@ def bad_title(title: str) -> bool: """Determine whether a given title is "bad" (empty or otherwise meaningless) and in need of replacement. """ - for pat in BAD_TITLE_PATTERNS: - if re.match(pat, title, re.IGNORECASE): - return True + if RE_BAD_TITLE.match(title): + return True return False @@ -117,10 +127,29 @@ class FromFilenamePlugin(BeetsPlugin): names[item] = name # Look for useful information in the filenames. - for pattern in PATTERNS: - self._log.debug(f"Trying pattern: {pattern}") - if d := all_matches(names, pattern): - self._apply_matches(d) + matches: dict[Item, dict[str, str]] = {} + for item, name in names.items(): + m = self.parse_track_info(name) + matches[item] = m + self._apply_matches(matches) + + def parse_track_info(self, text: str) -> dict[str, str]: + m = RE_TRACK_INFO.match(text) + matches = m.groupdict() + # if the phrase "by" is matched, swap + # artist and title + if matches["by"]: + artist = matches["title"] + matches["title"] = matches["artist"] + matches["artist"] = artist + del matches["by"] + # if all fields except track are none + # set title to track - we can't be sure if it's the + # index or track number + if set(matches.values()) == {None, matches["track"]}: + matches["title"] = matches["track"] + + return matches def _apply_matches(self, d: dict[Item, dict[str, str]]) -> None: """Given a mapping from items to field dicts, apply the fields to @@ -149,7 +178,7 @@ class FromFilenamePlugin(BeetsPlugin): return for item in d: - if not item.artist: + if not item.artist and artist: item.artist = artist self._log.info(f"Artist replaced with: {item.artist}") # otherwise, if the pattern contains "title", use that for title_field @@ -165,5 +194,6 @@ class FromFilenamePlugin(BeetsPlugin): self._log.info(f"Title replaced with: {item.title}") if "track" in d[item] and item.track == 0: - item.track = int(d[item]["track"]) + if d[item]["track"]: + item.track = int(d[item]["track"]) self._log.info(f"Track replaced with: {item.track}") diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index f13e88aea..5f173a8f5 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -36,6 +36,73 @@ class Task: self.is_album = True +@pytest.mark.parametrize( + "text,matchgroup", + [ + ("3", {"disc": None, "track": "3", "artist": None, "title": "3"}), + ("04", {"disc": None, "track": "04", "artist": None, "title": "04"}), + ("6.", {"disc": None, "track": "6", "artist": None, "title": "6"}), + ("3.5", {"disc": "3", "track": "5", "artist": None, "title": None}), + ("1-02", {"disc": "1", "track": "02", "artist": None, "title": None}), + ("100-4", {"disc": "100", "track": "4", "artist": None, "title": None}), + ( + "04.Title", + {"disc": None, "track": "04", "artist": None, "title": "Title"}, + ), + ( + "5_-_Title", + {"disc": None, "track": "5", "artist": None, "title": "Title"}, + ), + ( + "1-02 Title", + {"disc": "1", "track": "02", "artist": None, "title": "Title"}, + ), + ( + "3.5 - Title", + {"disc": "3", "track": "5", "artist": None, "title": "Title"}, + ), + ( + "5_-_Artist_-_Title", + {"disc": None, "track": "5", "artist": "Artist", "title": "Title"}, + ), + ( + "3-8- Artist-Title", + {"disc": "3", "track": "8", "artist": "Artist", "title": "Title"}, + ), + ( + "4-3 - Artist Name - Title", + { + "disc": "4", + "track": "3", + "artist": "Artist Name", + "title": "Title", + }, + ), + ( + "4-3_-_Artist_Name_-_Title", + { + "disc": "4", + "track": "3", + "artist": "Artist_Name", + "title": "Title", + }, + ), + ( + "6 Title by Artist", + {"disc": None, "track": "6", "artist": "Artist", "title": "Title"}, + ), + ( + "Title", + {"disc": None, "track": None, "artist": None, "title": "Title"}, + ), + ], +) +def test_parse_track_info(text, matchgroup): + f = fromfilename.FromFilenamePlugin() + m = f.parse_track_info(text) + assert matchgroup == m + + @pytest.mark.parametrize( "song1, song2", [ @@ -53,6 +120,20 @@ class Task: "Song Two", ), ), + ( + ( + "/tmp/01 The Artist - Song One.m4a", + 1, + "The Artist", + "Song One", + ), + ( + "/tmp/02 The Artist - Song Two.m4a", + 2, + "The Artist", + "Song Two", + ), + ), ( ("/tmp/01-The_Artist-Song_One.m4a", 1, "The_Artist", "Song_One"), ("/tmp/02.-The_Artist-Song_Two.m4a", 2, "The_Artist", "Song_Two"),