mirror of
https://github.com/beetbox/beets.git
synced 2026-02-17 04:43:40 +01:00
Rewrite and extend regex, extend bad titles
This commit is contained in:
parent
af07f45b2d
commit
683786a09f
2 changed files with 137 additions and 26 deletions
|
|
@ -25,24 +25,35 @@ from beets.library import Item
|
|||
from beets.plugins import BeetsPlugin
|
||||
from beets.util import displayable_path
|
||||
|
||||
# Filename field extraction patterns.
|
||||
PATTERNS = [
|
||||
# Useful patterns.
|
||||
(
|
||||
r"^(?P<track>\d+)\.?\s*-\s*(?P<artist>.+?)\s*-\s*(?P<title>.+?)"
|
||||
r"(\s*-\s*(?P<tag>.*))?$"
|
||||
),
|
||||
r"^(?P<artist>.+?)\s*-\s*(?P<title>.+?)(\s*-\s*(?P<tag>.*))?$",
|
||||
r"^(?P<track>\d+)\.?[\s_-]+(?P<title>.+)$",
|
||||
r"^(?P<title>.+) by (?P<artist>.+)$",
|
||||
r"^(?P<track>\d+).*$",
|
||||
r"^(?P<title>.+)$",
|
||||
]
|
||||
# Filename field extraction patterns
|
||||
RE_TRACK_INFO = re.compile(
|
||||
r"""
|
||||
(?P<disc>\d+(?=[\.\-_]\d))?
|
||||
# a disc must be followed by punctuation and a digit
|
||||
[\.\-]{,1}
|
||||
# disc punctuation
|
||||
(?P<track>\d+)?
|
||||
# match the track number
|
||||
[\.\-_\s]*
|
||||
# artist separators
|
||||
(?P<artist>.+?(?=[\s*_]?[\.\-by].+))?
|
||||
# artist match depends on title existing
|
||||
[\.\-_\s]*
|
||||
(?P<by>by)?
|
||||
# if 'by' is found, artist and title will need to be swapped
|
||||
[\.\-_\s]*
|
||||
# title separators
|
||||
(?P<title>.+)?
|
||||
# match the track title
|
||||
""",
|
||||
re.VERBOSE | re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Titles considered "empty" and in need of replacement.
|
||||
BAD_TITLE_PATTERNS = [
|
||||
r"^$",
|
||||
]
|
||||
# Match the disc names of parent folders
|
||||
RE_DISC = re.compile(r"((?:cd|disc)\s*\d+)", re.IGNORECASE)
|
||||
|
||||
# Matches fields that are empty or only whitespace
|
||||
RE_BAD_TITLE = re.compile(r"^\s*$")
|
||||
|
||||
|
||||
def equal(seq: list[str]):
|
||||
|
|
@ -83,9 +94,8 @@ def bad_title(title: str) -> bool:
|
|||
"""Determine whether a given title is "bad" (empty or otherwise
|
||||
meaningless) and in need of replacement.
|
||||
"""
|
||||
for pat in BAD_TITLE_PATTERNS:
|
||||
if re.match(pat, title, re.IGNORECASE):
|
||||
return True
|
||||
if RE_BAD_TITLE.match(title):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
|
@ -117,10 +127,29 @@ class FromFilenamePlugin(BeetsPlugin):
|
|||
names[item] = name
|
||||
|
||||
# Look for useful information in the filenames.
|
||||
for pattern in PATTERNS:
|
||||
self._log.debug(f"Trying pattern: {pattern}")
|
||||
if d := all_matches(names, pattern):
|
||||
self._apply_matches(d)
|
||||
matches: dict[Item, dict[str, str]] = {}
|
||||
for item, name in names.items():
|
||||
m = self.parse_track_info(name)
|
||||
matches[item] = m
|
||||
self._apply_matches(matches)
|
||||
|
||||
def parse_track_info(self, text: str) -> dict[str, str]:
|
||||
m = RE_TRACK_INFO.match(text)
|
||||
matches = m.groupdict()
|
||||
# if the phrase "by" is matched, swap
|
||||
# artist and title
|
||||
if matches["by"]:
|
||||
artist = matches["title"]
|
||||
matches["title"] = matches["artist"]
|
||||
matches["artist"] = artist
|
||||
del matches["by"]
|
||||
# if all fields except track are none
|
||||
# set title to track - we can't be sure if it's the
|
||||
# index or track number
|
||||
if set(matches.values()) == {None, matches["track"]}:
|
||||
matches["title"] = matches["track"]
|
||||
|
||||
return matches
|
||||
|
||||
def _apply_matches(self, d: dict[Item, dict[str, str]]) -> None:
|
||||
"""Given a mapping from items to field dicts, apply the fields to
|
||||
|
|
@ -149,7 +178,7 @@ class FromFilenamePlugin(BeetsPlugin):
|
|||
return
|
||||
|
||||
for item in d:
|
||||
if not item.artist:
|
||||
if not item.artist and artist:
|
||||
item.artist = artist
|
||||
self._log.info(f"Artist replaced with: {item.artist}")
|
||||
# otherwise, if the pattern contains "title", use that for title_field
|
||||
|
|
@ -165,5 +194,6 @@ class FromFilenamePlugin(BeetsPlugin):
|
|||
self._log.info(f"Title replaced with: {item.title}")
|
||||
|
||||
if "track" in d[item] and item.track == 0:
|
||||
item.track = int(d[item]["track"])
|
||||
if d[item]["track"]:
|
||||
item.track = int(d[item]["track"])
|
||||
self._log.info(f"Track replaced with: {item.track}")
|
||||
|
|
|
|||
|
|
@ -36,6 +36,73 @@ class Task:
|
|||
self.is_album = True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,matchgroup",
|
||||
[
|
||||
("3", {"disc": None, "track": "3", "artist": None, "title": "3"}),
|
||||
("04", {"disc": None, "track": "04", "artist": None, "title": "04"}),
|
||||
("6.", {"disc": None, "track": "6", "artist": None, "title": "6"}),
|
||||
("3.5", {"disc": "3", "track": "5", "artist": None, "title": None}),
|
||||
("1-02", {"disc": "1", "track": "02", "artist": None, "title": None}),
|
||||
("100-4", {"disc": "100", "track": "4", "artist": None, "title": None}),
|
||||
(
|
||||
"04.Title",
|
||||
{"disc": None, "track": "04", "artist": None, "title": "Title"},
|
||||
),
|
||||
(
|
||||
"5_-_Title",
|
||||
{"disc": None, "track": "5", "artist": None, "title": "Title"},
|
||||
),
|
||||
(
|
||||
"1-02 Title",
|
||||
{"disc": "1", "track": "02", "artist": None, "title": "Title"},
|
||||
),
|
||||
(
|
||||
"3.5 - Title",
|
||||
{"disc": "3", "track": "5", "artist": None, "title": "Title"},
|
||||
),
|
||||
(
|
||||
"5_-_Artist_-_Title",
|
||||
{"disc": None, "track": "5", "artist": "Artist", "title": "Title"},
|
||||
),
|
||||
(
|
||||
"3-8- Artist-Title",
|
||||
{"disc": "3", "track": "8", "artist": "Artist", "title": "Title"},
|
||||
),
|
||||
(
|
||||
"4-3 - Artist Name - Title",
|
||||
{
|
||||
"disc": "4",
|
||||
"track": "3",
|
||||
"artist": "Artist Name",
|
||||
"title": "Title",
|
||||
},
|
||||
),
|
||||
(
|
||||
"4-3_-_Artist_Name_-_Title",
|
||||
{
|
||||
"disc": "4",
|
||||
"track": "3",
|
||||
"artist": "Artist_Name",
|
||||
"title": "Title",
|
||||
},
|
||||
),
|
||||
(
|
||||
"6 Title by Artist",
|
||||
{"disc": None, "track": "6", "artist": "Artist", "title": "Title"},
|
||||
),
|
||||
(
|
||||
"Title",
|
||||
{"disc": None, "track": None, "artist": None, "title": "Title"},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_parse_track_info(text, matchgroup):
|
||||
f = fromfilename.FromFilenamePlugin()
|
||||
m = f.parse_track_info(text)
|
||||
assert matchgroup == m
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"song1, song2",
|
||||
[
|
||||
|
|
@ -53,6 +120,20 @@ class Task:
|
|||
"Song Two",
|
||||
),
|
||||
),
|
||||
(
|
||||
(
|
||||
"/tmp/01 The Artist - Song One.m4a",
|
||||
1,
|
||||
"The Artist",
|
||||
"Song One",
|
||||
),
|
||||
(
|
||||
"/tmp/02 The Artist - Song Two.m4a",
|
||||
2,
|
||||
"The Artist",
|
||||
"Song Two",
|
||||
),
|
||||
),
|
||||
(
|
||||
("/tmp/01-The_Artist-Song_One.m4a", 1, "The_Artist", "Song_One"),
|
||||
("/tmp/02.-The_Artist-Song_Two.m4a", 2, "The_Artist", "Song_Two"),
|
||||
|
|
|
|||
Loading…
Reference in a new issue