Rewrite and extend regex, extend bad titles

This commit is contained in:
Henry 2026-01-01 23:44:46 -08:00
parent af07f45b2d
commit 683786a09f
2 changed files with 137 additions and 26 deletions

View file

@ -25,24 +25,35 @@ from beets.library import Item
from beets.plugins import BeetsPlugin
from beets.util import displayable_path
# Filename field extraction patterns.
PATTERNS = [
# Useful patterns.
(
r"^(?P<track>\d+)\.?\s*-\s*(?P<artist>.+?)\s*-\s*(?P<title>.+?)"
r"(\s*-\s*(?P<tag>.*))?$"
),
r"^(?P<artist>.+?)\s*-\s*(?P<title>.+?)(\s*-\s*(?P<tag>.*))?$",
r"^(?P<track>\d+)\.?[\s_-]+(?P<title>.+)$",
r"^(?P<title>.+) by (?P<artist>.+)$",
r"^(?P<track>\d+).*$",
r"^(?P<title>.+)$",
]
# Filename field extraction patterns
RE_TRACK_INFO = re.compile(
r"""
(?P<disc>\d+(?=[\.\-_]\d))?
# a disc must be followed by punctuation and a digit
[\.\-]{,1}
# disc punctuation
(?P<track>\d+)?
# match the track number
[\.\-_\s]*
# artist separators
(?P<artist>.+?(?=[\s*_]?[\.\-by].+))?
# artist match depends on title existing
[\.\-_\s]*
(?P<by>by)?
# if 'by' is found, artist and title will need to be swapped
[\.\-_\s]*
# title separators
(?P<title>.+)?
# match the track title
""",
re.VERBOSE | re.IGNORECASE,
)
# Titles considered "empty" and in need of replacement.
BAD_TITLE_PATTERNS = [
r"^$",
]
# Match the disc names of parent folders
RE_DISC = re.compile(r"((?:cd|disc)\s*\d+)", re.IGNORECASE)
# Matches fields that are empty or only whitespace
RE_BAD_TITLE = re.compile(r"^\s*$")
def equal(seq: list[str]):
@ -83,9 +94,8 @@ def bad_title(title: str) -> bool:
"""Determine whether a given title is "bad" (empty or otherwise
meaningless) and in need of replacement.
"""
for pat in BAD_TITLE_PATTERNS:
if re.match(pat, title, re.IGNORECASE):
return True
if RE_BAD_TITLE.match(title):
return True
return False
@ -117,10 +127,29 @@ class FromFilenamePlugin(BeetsPlugin):
names[item] = name
# Look for useful information in the filenames.
for pattern in PATTERNS:
self._log.debug(f"Trying pattern: {pattern}")
if d := all_matches(names, pattern):
self._apply_matches(d)
matches: dict[Item, dict[str, str]] = {}
for item, name in names.items():
m = self.parse_track_info(name)
matches[item] = m
self._apply_matches(matches)
def parse_track_info(self, text: str) -> dict[str, str]:
m = RE_TRACK_INFO.match(text)
matches = m.groupdict()
# if the phrase "by" is matched, swap
# artist and title
if matches["by"]:
artist = matches["title"]
matches["title"] = matches["artist"]
matches["artist"] = artist
del matches["by"]
# if all fields except track are none
# set title to track - we can't be sure if it's the
# index or track number
if set(matches.values()) == {None, matches["track"]}:
matches["title"] = matches["track"]
return matches
def _apply_matches(self, d: dict[Item, dict[str, str]]) -> None:
"""Given a mapping from items to field dicts, apply the fields to
@ -149,7 +178,7 @@ class FromFilenamePlugin(BeetsPlugin):
return
for item in d:
if not item.artist:
if not item.artist and artist:
item.artist = artist
self._log.info(f"Artist replaced with: {item.artist}")
# otherwise, if the pattern contains "title", use that for title_field
@ -165,5 +194,6 @@ class FromFilenamePlugin(BeetsPlugin):
self._log.info(f"Title replaced with: {item.title}")
if "track" in d[item] and item.track == 0:
item.track = int(d[item]["track"])
if d[item]["track"]:
item.track = int(d[item]["track"])
self._log.info(f"Track replaced with: {item.track}")

View file

@ -36,6 +36,73 @@ class Task:
self.is_album = True
@pytest.mark.parametrize(
"text,matchgroup",
[
("3", {"disc": None, "track": "3", "artist": None, "title": "3"}),
("04", {"disc": None, "track": "04", "artist": None, "title": "04"}),
("6.", {"disc": None, "track": "6", "artist": None, "title": "6"}),
("3.5", {"disc": "3", "track": "5", "artist": None, "title": None}),
("1-02", {"disc": "1", "track": "02", "artist": None, "title": None}),
("100-4", {"disc": "100", "track": "4", "artist": None, "title": None}),
(
"04.Title",
{"disc": None, "track": "04", "artist": None, "title": "Title"},
),
(
"5_-_Title",
{"disc": None, "track": "5", "artist": None, "title": "Title"},
),
(
"1-02 Title",
{"disc": "1", "track": "02", "artist": None, "title": "Title"},
),
(
"3.5 - Title",
{"disc": "3", "track": "5", "artist": None, "title": "Title"},
),
(
"5_-_Artist_-_Title",
{"disc": None, "track": "5", "artist": "Artist", "title": "Title"},
),
(
"3-8- Artist-Title",
{"disc": "3", "track": "8", "artist": "Artist", "title": "Title"},
),
(
"4-3 - Artist Name - Title",
{
"disc": "4",
"track": "3",
"artist": "Artist Name",
"title": "Title",
},
),
(
"4-3_-_Artist_Name_-_Title",
{
"disc": "4",
"track": "3",
"artist": "Artist_Name",
"title": "Title",
},
),
(
"6 Title by Artist",
{"disc": None, "track": "6", "artist": "Artist", "title": "Title"},
),
(
"Title",
{"disc": None, "track": None, "artist": None, "title": "Title"},
),
],
)
def test_parse_track_info(text, matchgroup):
f = fromfilename.FromFilenamePlugin()
m = f.parse_track_info(text)
assert matchgroup == m
@pytest.mark.parametrize(
"song1, song2",
[
@ -53,6 +120,20 @@ class Task:
"Song Two",
),
),
(
(
"/tmp/01 The Artist - Song One.m4a",
1,
"The Artist",
"Song One",
),
(
"/tmp/02 The Artist - Song Two.m4a",
2,
"The Artist",
"Song Two",
),
),
(
("/tmp/01-The_Artist-Song_One.m4a", 1, "The_Artist", "Song_One"),
("/tmp/02.-The_Artist-Song_Two.m4a", 2, "The_Artist", "Song_Two"),