mirror of
https://github.com/beetbox/beets.git
synced 2026-02-17 04:43:40 +01:00
Add album fields parsing, refactored tag updating, expanded testing.
This commit is contained in:
parent
683786a09f
commit
a7a5e1e12a
3 changed files with 663 additions and 152 deletions
|
|
@ -16,10 +16,15 @@
|
|||
(possibly also extract track and artist)
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import typing
|
||||
from datetime import datetime
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import Any, TypedDict
|
||||
|
||||
from typing_extensions import NotRequired
|
||||
|
||||
from beets import config
|
||||
from beets.importer import ImportSession, ImportTask
|
||||
from beets.library import Item
|
||||
from beets.plugins import BeetsPlugin
|
||||
|
|
@ -28,24 +33,41 @@ from beets.util import displayable_path
|
|||
# Filename field extraction patterns
|
||||
RE_TRACK_INFO = re.compile(
|
||||
r"""
|
||||
(?P<disc>\d+(?=[\.\-_]\d))?
|
||||
# a disc must be followed by punctuation and a digit
|
||||
[\.\-]{,1}
|
||||
# disc punctuation
|
||||
(?P<track>\d+)?
|
||||
# match the track number
|
||||
[\.\-_\s]*
|
||||
# artist separators
|
||||
(?P<artist>.+?(?=[\s*_]?[\.\-by].+))?
|
||||
# artist match depends on title existing
|
||||
[\.\-_\s]*
|
||||
(?P<by>by)?
|
||||
# if 'by' is found, artist and title will need to be swapped
|
||||
[\.\-_\s]*
|
||||
# title separators
|
||||
(?P<title>.+)?
|
||||
# match the track title
|
||||
""",
|
||||
(?P<disc>\d+(?=[\.\-_]\d))?
|
||||
# a disc must be followed by punctuation and a digit
|
||||
[\.\-]{,1}
|
||||
# disc punctuation
|
||||
(?P<track>\d+)?
|
||||
# match the track number
|
||||
[\.\-_\s]*
|
||||
# artist separators
|
||||
(?P<artist>.+?(?=[\s*_]?[\.\-by].+))?
|
||||
# artist match depends on title existing
|
||||
[\.\-_\s]*
|
||||
(?P<by>by)?
|
||||
# if 'by' is found, artist and title will need to be swapped
|
||||
[\.\-_\s]*
|
||||
# title separators
|
||||
(?P<title>.+)?
|
||||
# match the track title
|
||||
""",
|
||||
re.VERBOSE | re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Catalog number extraction pattern
|
||||
RE_CATALOGNUM = re.compile(
|
||||
r"""
|
||||
[\(\[\{]
|
||||
# starts with a bracket
|
||||
(?!flac|mp3|wav)
|
||||
# does not start with file format
|
||||
(?P<catalognum>[\w\s]+)
|
||||
# actual catalog number
|
||||
(?<!flac|.mp3|.wav)
|
||||
# does not end with file format
|
||||
[\)\]\}]
|
||||
# ends with a bracker
|
||||
""",
|
||||
re.VERBOSE | re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
|
@ -53,25 +75,72 @@ RE_TRACK_INFO = re.compile(
|
|||
RE_DISC = re.compile(r"((?:cd|disc)\s*\d+)", re.IGNORECASE)
|
||||
|
||||
# Matches fields that are empty or only whitespace
|
||||
RE_BAD_TITLE = re.compile(r"^\s*$")
|
||||
RE_BAD_FIELD = re.compile(r"^\s*$")
|
||||
|
||||
# First priority for matching a year is a year surrounded
|
||||
# by brackets, dashes, or punctuation
|
||||
RE_YEAR_BRACKETED = re.compile(
|
||||
r"[\(\[\{\-\_]\s*(?P<year>\d{4}).*?[\)\]\}\-\_,]"
|
||||
)
|
||||
|
||||
# Look for a year at the start
|
||||
RE_YEAR_START = re.compile(r"^(?P<year>\d{4})")
|
||||
|
||||
# Look for a year at the end
|
||||
RE_YEAR_END = re.compile(r"$(?P<year>\d{4})")
|
||||
|
||||
# Just look for four digits
|
||||
RE_YEAR_ANY = re.compile(r"(?P<year>\d{4})")
|
||||
|
||||
# All year regexp in order of preference
|
||||
YEAR_REGEX = [RE_YEAR_BRACKETED, RE_YEAR_START, RE_YEAR_END, RE_YEAR_ANY]
|
||||
|
||||
RE_MEDIA = re.compile(
|
||||
r"""
|
||||
[\(\[\{].*?
|
||||
((?P<vinyl>vinyl)|
|
||||
(?P<cd>cd)|
|
||||
(?P<web>web)|
|
||||
(?P<cassette>cassette))
|
||||
.*?[\)\]\}]
|
||||
""",
|
||||
re.VERBOSE | re.IGNORECASE,
|
||||
)
|
||||
|
||||
RE_VARIOUS = re.compile(r"va(rious)?(\sartists)?", re.IGNORECASE)
|
||||
|
||||
RE_SPLIT = re.compile(r"[\-\_]+")
|
||||
|
||||
RE_BRACKETS = re.compile(r"[\(\[\{].*?[\)\]\}]")
|
||||
|
||||
|
||||
def equal(seq: list[str]):
|
||||
"""Determine whether a sequence holds identical elements."""
|
||||
return len(set(seq)) <= 1
|
||||
class TrackMatches(TypedDict):
|
||||
disc: str | None
|
||||
track: str | None
|
||||
by: NotRequired[str | None]
|
||||
artist: str | None
|
||||
title: str | None
|
||||
|
||||
|
||||
def equal_fields(matchdict: dict[typing.Any, dict[str, str]], field: str):
|
||||
class AlbumMatches(TypedDict):
|
||||
albumartist: str | None
|
||||
album: str | None
|
||||
year: str | None
|
||||
catalognum: str | None
|
||||
media: str | None
|
||||
|
||||
|
||||
def equal_fields(matchdict: dict[Any, TrackMatches], field: str) -> bool:
|
||||
"""Do all items in `matchdict`, whose values are dictionaries, have
|
||||
the same value for `field`? (If they do, the field is probably not
|
||||
the title.)
|
||||
"""
|
||||
return equal(list(m[field] for m in matchdict.values()))
|
||||
return len(set(m[field] for m in matchdict.values())) <= 1
|
||||
|
||||
|
||||
def all_matches(
|
||||
names: dict[Item, str], pattern: str
|
||||
) -> dict[Item, dict[str, str]] | None:
|
||||
) -> dict[Item, TrackMatches] | None:
|
||||
"""If all the filenames in the item/filename mapping match the
|
||||
pattern, return a dictionary mapping the items to dictionaries
|
||||
giving the value for each named subpattern in the match. Otherwise,
|
||||
|
|
@ -90,20 +159,32 @@ def all_matches(
|
|||
return matches
|
||||
|
||||
|
||||
def bad_title(title: str) -> bool:
|
||||
"""Determine whether a given title is "bad" (empty or otherwise
|
||||
meaningless) and in need of replacement.
|
||||
"""
|
||||
if RE_BAD_TITLE.match(title):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class FromFilenamePlugin(BeetsPlugin):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.config.add(
|
||||
{
|
||||
"fields": [
|
||||
"disc",
|
||||
"track",
|
||||
"title",
|
||||
"artist",
|
||||
"albumartist",
|
||||
"media",
|
||||
"catalognum",
|
||||
]
|
||||
}
|
||||
)
|
||||
self.register_listener("import_task_start", self.filename_task)
|
||||
|
||||
@cached_property
|
||||
def current_year(self) -> int:
|
||||
return datetime.now().year
|
||||
|
||||
@cached_property
|
||||
def fields(self) -> set[str]:
|
||||
return set(self.config["fields"].as_str_seq())
|
||||
|
||||
def filename_task(self, task: ImportTask, session: ImportSession) -> None:
|
||||
"""Examine each item in the task to see if we can extract a title
|
||||
from the filename. Try to match all filenames to a number of
|
||||
|
|
@ -113,87 +194,252 @@ class FromFilenamePlugin(BeetsPlugin):
|
|||
regex that contains the title.
|
||||
"""
|
||||
# Create the list of items to process
|
||||
|
||||
# TODO: If it's a singleton import task, use the .item field
|
||||
items: list[Item] = task.items
|
||||
|
||||
# TODO: Switch this to gather data anyway, but only
|
||||
# update where missing
|
||||
# Look for suspicious (empty or meaningless) titles.
|
||||
missing_titles = sum(bad_title(i.title) for i in items)
|
||||
missing_titles = sum(self._bad_field(i.title) for i in items)
|
||||
|
||||
if missing_titles:
|
||||
# Get the base filenames (no path or extension).
|
||||
parent_path: str = ""
|
||||
names: dict[Item, str] = {}
|
||||
for item in items:
|
||||
path = displayable_path(item.path)
|
||||
name, _ = os.path.splitext(os.path.basename(path))
|
||||
path: Path = Path(displayable_path(item.path))
|
||||
name = path.stem
|
||||
names[item] = name
|
||||
if not parent_path:
|
||||
parent_path = path.parent.stem
|
||||
self._log.debug(f"Parent Path: {parent_path}")
|
||||
|
||||
album_matches: AlbumMatches = self.parse_album_info(parent_path)
|
||||
self._log.debug(album_matches)
|
||||
# Look for useful information in the filenames.
|
||||
matches: dict[Item, dict[str, str]] = {}
|
||||
track_matches: dict[Item, TrackMatches] = {}
|
||||
for item, name in names.items():
|
||||
m = self.parse_track_info(name)
|
||||
matches[item] = m
|
||||
self._apply_matches(matches)
|
||||
track_matches[item] = m
|
||||
self._apply_matches(album_matches, track_matches)
|
||||
|
||||
def parse_track_info(self, text: str) -> dict[str, str]:
|
||||
def parse_track_info(self, text: str) -> TrackMatches:
|
||||
m = RE_TRACK_INFO.match(text)
|
||||
matches = m.groupdict()
|
||||
matches: TrackMatches = m.groupdict()
|
||||
# if the phrase "by" is matched, swap
|
||||
# artist and title
|
||||
if matches["by"]:
|
||||
artist = matches["title"]
|
||||
matches["title"] = matches["artist"]
|
||||
matches["artist"] = artist
|
||||
# remove that key
|
||||
del matches["by"]
|
||||
# if all fields except track are none
|
||||
# set title to track - we can't be sure if it's the
|
||||
# index or track number
|
||||
# if all fields except `track` are none
|
||||
# set title to track number as well
|
||||
# we can't be sure if it's actually the track number
|
||||
# or track title
|
||||
if set(matches.values()) == {None, matches["track"]}:
|
||||
matches["title"] = matches["track"]
|
||||
|
||||
return matches
|
||||
|
||||
def _apply_matches(self, d: dict[Item, dict[str, str]]) -> None:
|
||||
"""Given a mapping from items to field dicts, apply the fields to
|
||||
the objects.
|
||||
"""
|
||||
some_map = list(d.values())[0]
|
||||
keys = some_map.keys()
|
||||
def parse_album_info(self, text: str) -> AlbumMatches:
|
||||
matches: AlbumMatches = {
|
||||
"albumartist": None,
|
||||
"album": None,
|
||||
"year": None,
|
||||
"catalognum": None,
|
||||
"media": None,
|
||||
}
|
||||
# Start with the extra fields to make parsing
|
||||
# the album artist and artist field easier
|
||||
year, span = self._parse_year(text)
|
||||
if year:
|
||||
# Remove it from the string if found
|
||||
text = self._mutate_string(text, span)
|
||||
matches["year"] = year
|
||||
|
||||
# Only proceed if the "tag" field is equal across all filenames.
|
||||
if "tag" in keys and not equal_fields(d, "tag"):
|
||||
return
|
||||
# Look for the catalog number, it must be in brackets
|
||||
# It will not contain the filetype, flac, mp3, wav, etc
|
||||
catalognum, span = self._parse_catalognum(text)
|
||||
if catalognum:
|
||||
text = self._mutate_string(text, span)
|
||||
matches["catalognum"] = catalognum
|
||||
# Look for a media type
|
||||
media, span = self._parse_media(text)
|
||||
if media:
|
||||
text = self._mutate_string(text, span)
|
||||
matches["media"] = media
|
||||
|
||||
# Remove anything left within brackets
|
||||
brackets = RE_BRACKETS.search(text)
|
||||
while brackets:
|
||||
span = brackets.span()
|
||||
text = self._mutate_string(text, span)
|
||||
brackets = RE_BRACKETS.search(text)
|
||||
# Remaining text used for album, albumartist
|
||||
album, albumartist = self._parse_album_and_albumartist(text)
|
||||
matches["album"] = album
|
||||
matches["albumartist"] = albumartist
|
||||
|
||||
return matches
|
||||
|
||||
def _parse_album_and_albumartist(
|
||||
self, text
|
||||
) -> tuple[str | None, str | None]:
|
||||
"""Takes the remaining string and splits it along common dividers.
|
||||
Assumes the first field to be the albumartist and the last field to be the
|
||||
album. Checks against various artist fields.
|
||||
"""
|
||||
possible_albumartist = None
|
||||
possible_album = None
|
||||
# What is left we can assume to contain the title and artist
|
||||
remaining = [
|
||||
f for field in RE_SPLIT.split(text) if (f := field.strip())
|
||||
]
|
||||
if remaining:
|
||||
# If two fields remain, assume artist and album artist
|
||||
if len(remaining) == 2:
|
||||
possible_albumartist = remaining[0]
|
||||
possible_album = remaining[1]
|
||||
# Look for known album artists
|
||||
# VA, Various, Vartious Artists will all result in
|
||||
# using the beets VA default for album artist name
|
||||
# assume the artist comes before the title in most situations
|
||||
if RE_VARIOUS.match(possible_album):
|
||||
possible_album = possible_albumartist
|
||||
possible_albumartist = config["va_name"].as_str()
|
||||
elif RE_VARIOUS.match(possible_albumartist):
|
||||
possible_albumartist = config["va_name"].as_str()
|
||||
else:
|
||||
# If one field remains, assume album title
|
||||
possible_album = remaining[0].strip()
|
||||
return possible_album, possible_albumartist
|
||||
|
||||
def _parse_year(self, text: str) -> tuple[str | None, tuple[int, int]]:
|
||||
"""The year will be a four digit number. The search goes
|
||||
through a list of ordered patterns to try and find the year.
|
||||
To be a valid year, it must be less than the current year.
|
||||
"""
|
||||
year = None
|
||||
span = (0, 0)
|
||||
for exp in YEAR_REGEX:
|
||||
match = exp.search(text)
|
||||
if not match:
|
||||
continue
|
||||
year_candidate = match.group("year")
|
||||
# If the year is matched and not in the future
|
||||
if year_candidate and int(year_candidate) <= self.current_year:
|
||||
year = year_candidate
|
||||
span = match.span()
|
||||
break
|
||||
return year, span
|
||||
|
||||
def _parse_media(self, text: str) -> tuple[str | None, tuple[int, int]]:
|
||||
"""Look for the media type, we are only interested in a few common
|
||||
types - CD, Vinyl, Cassette or WEB. To avoid overreach, in the
|
||||
case of titles containing a medium, only searches for media types
|
||||
within a pair of brackets.
|
||||
"""
|
||||
mappings = {
|
||||
"cd": "CD",
|
||||
"vinyl": "Vinyl",
|
||||
"web": "Digital Media",
|
||||
"cassette": "Cassette",
|
||||
}
|
||||
match = RE_MEDIA.search(text)
|
||||
if match:
|
||||
media = None
|
||||
for key, value in match.groupdict().items():
|
||||
if value:
|
||||
media = mappings[key]
|
||||
return media, match.span()
|
||||
return None, (0, 0)
|
||||
|
||||
def _parse_catalognum(
|
||||
self, text: str
|
||||
) -> tuple[str | None, tuple[int, int]]:
|
||||
match = RE_CATALOGNUM.search(text)
|
||||
# assert that it cannot be mistaken for a media type
|
||||
if match and not RE_MEDIA.match(match[0]):
|
||||
return match.group("catalognum"), match.span()
|
||||
return None, (0, 0)
|
||||
|
||||
def _mutate_string(self, text, span: tuple[int, int]) -> str:
|
||||
"""Replace a matched field with a seperator"""
|
||||
start, end = span
|
||||
text = text[:start] + "-" + text[end:]
|
||||
return text
|
||||
|
||||
def _sanity_check_matches(
|
||||
self, album_match: AlbumMatches, track_matches: dict[Item, TrackMatches]
|
||||
) -> None:
|
||||
"""Check to make sure data is coherent between
|
||||
track and album matches. Largely looking to see
|
||||
if the arist and album artist fields are properly
|
||||
identified.
|
||||
"""
|
||||
# If the album artist is not various artists
|
||||
# check that all artists, if any, match
|
||||
# if they do not, try seeing if all the titles match
|
||||
# if all the titles match, swap title and artist fields
|
||||
|
||||
# If the suspected title and albumartist fields are not equal
|
||||
# we have ruled out a self titled album
|
||||
# Check if the suspected title appears in the track artists
|
||||
# If so, we should swap the title and albumartist in albummatches
|
||||
|
||||
# If any track title is the same as the album artist
|
||||
# some_map = list(track_matches.values())[0]
|
||||
# keys = some_map.keys()
|
||||
|
||||
# Given both an "artist" and "title" field, assume that one is
|
||||
# *actually* the artist, which must be uniform, and use the other
|
||||
# for the title. This, of course, won't work for VA albums.
|
||||
# Only check for "artist": patterns containing it, also contain "title"
|
||||
if "artist" in keys:
|
||||
if equal_fields(d, "artist"):
|
||||
artist = some_map["artist"]
|
||||
title_field = "title"
|
||||
elif equal_fields(d, "title"):
|
||||
artist = some_map["title"]
|
||||
title_field = "artist"
|
||||
else:
|
||||
# Both vary. Abort.
|
||||
return
|
||||
# if "artist" in keys:
|
||||
# if equal_fields(track_matches, "artist"):
|
||||
# artist = some_map["artist"]
|
||||
# title_field = "title"
|
||||
# elif equal_fields(track_matches, "title"):
|
||||
# artist = some_map["title"]
|
||||
# title_field = "artist"
|
||||
# else:
|
||||
# # Both vary. Abort.
|
||||
# return
|
||||
#
|
||||
# for item in track_matches:
|
||||
# if not item.artist and artist:
|
||||
# item.artist = artist
|
||||
# self._log.info(f"Artist replaced with: {item.artist}")
|
||||
# # otherwise, if the pattern contains "title", use that for title_field
|
||||
|
||||
for item in d:
|
||||
if not item.artist and artist:
|
||||
item.artist = artist
|
||||
self._log.info(f"Artist replaced with: {item.artist}")
|
||||
# otherwise, if the pattern contains "title", use that for title_field
|
||||
elif "title" in keys:
|
||||
title_field = "title"
|
||||
else:
|
||||
title_field = None
|
||||
return
|
||||
|
||||
# Apply the title and track, if any.
|
||||
for item in d:
|
||||
if title_field and bad_title(item.title):
|
||||
item.title = str(d[item][title_field])
|
||||
self._log.info(f"Title replaced with: {item.title}")
|
||||
def _apply_matches(
|
||||
self, album_match: AlbumMatches, track_matches: dict[Item, TrackMatches]
|
||||
) -> None:
|
||||
"""Apply all valid matched fields to all items in the match dictionary."""
|
||||
match = album_match
|
||||
for item in track_matches:
|
||||
match.update(track_matches[item])
|
||||
found_data: dict[str, int | str] = {}
|
||||
self._log.debug(f"Attempting keys: {match.keys()}")
|
||||
for key in match.keys():
|
||||
if key in self.fields:
|
||||
old_value = item.get(key)
|
||||
new_value = match[key]
|
||||
if self._bad_field(old_value) and new_value:
|
||||
found_data[key] = new_value
|
||||
self._log.info(f"Item updated with: {found_data.values()}")
|
||||
item.update(found_data)
|
||||
|
||||
if "track" in d[item] and item.track == 0:
|
||||
if d[item]["track"]:
|
||||
item.track = int(d[item]["track"])
|
||||
self._log.info(f"Track replaced with: {item.track}")
|
||||
@staticmethod
|
||||
def _bad_field(field: str | int) -> bool:
|
||||
"""Determine whether a given title is "bad" (empty or otherwise
|
||||
meaningless) and in need of replacement.
|
||||
"""
|
||||
if isinstance(field, int):
|
||||
return True if field <= 0 else False
|
||||
return True if RE_BAD_FIELD.match(field) else False
|
||||
|
|
|
|||
|
|
@ -5,8 +5,72 @@ The ``fromfilename`` plugin helps to tag albums that are missing tags altogether
|
|||
but where the filenames contain useful information like the artist and title.
|
||||
|
||||
When you attempt to import a track that's missing a title, this plugin will look
|
||||
at the track's filename and guess its track number, title, and artist. These
|
||||
will be used to search in MusicBrainz and match track ordering.
|
||||
at the track's filename and guess its disc, track number, title, and artist.
|
||||
These will be used to search for metadata and match track ordering.
|
||||
|
||||
To use the ``fromfilename`` plugin, enable it in your configuration (see
|
||||
:ref:`using-plugins`).
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
Configuration for ``fromfilename`` allows you to choose what fields the plugin
|
||||
attempts to contribute to files missing information.
|
||||
|
||||
Default
|
||||
~~~~~~~
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
fromfilename:
|
||||
fields:
|
||||
- artist
|
||||
- album
|
||||
- albumartist
|
||||
- catalognum
|
||||
- disc
|
||||
- media
|
||||
- title
|
||||
- track
|
||||
|
||||
Recognized Patterns
|
||||
-------------------
|
||||
|
||||
Examples of paths that the plugin can parse successfully, and the fields
|
||||
retrieved.
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
"/Artist - Album (2025)/03.wav"
|
||||
album: Album
|
||||
albumartist: Artist
|
||||
title: "03"
|
||||
track: 3
|
||||
|
||||
"/[CAT123] Album - Various [WEB-FLAC]/2-10 - Artist - Song One.flac"
|
||||
artist: Artist
|
||||
album: Album
|
||||
albumartist: Various Artists
|
||||
catalognum: CAT123
|
||||
disc: 2
|
||||
media: Digital Media
|
||||
title: Song One
|
||||
track: 10
|
||||
|
||||
"/1-23.flac"
|
||||
disc: 1
|
||||
track: 23
|
||||
|
||||
"/04. Song.mp3"
|
||||
title: Song
|
||||
track: 4
|
||||
|
||||
"/5_-_My_Artist_-_My_Title.m4a"
|
||||
artist: My_Artist
|
||||
title: My_Title
|
||||
track: 5
|
||||
|
||||
"/8 Song by Artist.wav"
|
||||
artist: Artist
|
||||
title: Song
|
||||
track: 8
|
||||
|
|
|
|||
|
|
@ -13,8 +13,12 @@
|
|||
|
||||
"""Tests for the fromfilename plugin."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import pytest
|
||||
|
||||
from beets.library import Item
|
||||
from beets.test.helper import ConfigMixin
|
||||
from beetsplug import fromfilename
|
||||
|
||||
|
||||
|
|
@ -22,18 +26,25 @@ class Session:
|
|||
pass
|
||||
|
||||
|
||||
class Item:
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
self.track = 0
|
||||
self.artist = ""
|
||||
self.title = ""
|
||||
def mock_item(**kwargs):
|
||||
defaults = dict(
|
||||
title="",
|
||||
artist="",
|
||||
albumartist="",
|
||||
album="",
|
||||
disc=0,
|
||||
track=0,
|
||||
catalognum="",
|
||||
media="",
|
||||
mtime=12345,
|
||||
)
|
||||
return Item(**{**defaults, **kwargs})
|
||||
|
||||
|
||||
@dataclass
|
||||
class Task:
|
||||
def __init__(self, items):
|
||||
self.items = items
|
||||
self.is_album = True
|
||||
items: list[Item]
|
||||
is_album: bool = True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
|
@ -104,77 +115,267 @@ def test_parse_track_info(text, matchgroup):
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"song1, song2",
|
||||
"text,matchgroup",
|
||||
[
|
||||
(
|
||||
(
|
||||
"/tmp/01 - The Artist - Song One.m4a",
|
||||
1,
|
||||
"The Artist",
|
||||
"Song One",
|
||||
),
|
||||
(
|
||||
"/tmp/02. - The Artist - Song Two.m4a",
|
||||
2,
|
||||
"The Artist",
|
||||
"Song Two",
|
||||
),
|
||||
# highly unlikely
|
||||
"",
|
||||
{
|
||||
"albumartist": None,
|
||||
"album": None,
|
||||
"year": None,
|
||||
"catalognum": None,
|
||||
"media": None,
|
||||
},
|
||||
),
|
||||
(
|
||||
(
|
||||
"/tmp/01 The Artist - Song One.m4a",
|
||||
1,
|
||||
"The Artist",
|
||||
"Song One",
|
||||
),
|
||||
(
|
||||
"/tmp/02 The Artist - Song Two.m4a",
|
||||
2,
|
||||
"The Artist",
|
||||
"Song Two",
|
||||
),
|
||||
"1970",
|
||||
{
|
||||
"albumartist": None,
|
||||
"album": None,
|
||||
"year": "1970",
|
||||
"catalognum": None,
|
||||
"media": None,
|
||||
},
|
||||
),
|
||||
(
|
||||
("/tmp/01-The_Artist-Song_One.m4a", 1, "The_Artist", "Song_One"),
|
||||
("/tmp/02.-The_Artist-Song_Two.m4a", 2, "The_Artist", "Song_Two"),
|
||||
"Album Title",
|
||||
{
|
||||
"albumartist": None,
|
||||
"album": "Album Title",
|
||||
"year": None,
|
||||
"catalognum": None,
|
||||
"media": None,
|
||||
},
|
||||
),
|
||||
(
|
||||
("/tmp/01 - Song_One.m4a", 1, "", "Song_One"),
|
||||
("/tmp/02. - Song_Two.m4a", 2, "", "Song_Two"),
|
||||
"Artist - Album Title",
|
||||
{
|
||||
"albumartist": "Artist",
|
||||
"album": "Album Title",
|
||||
"year": None,
|
||||
"catalognum": None,
|
||||
"media": None,
|
||||
},
|
||||
),
|
||||
(
|
||||
("/tmp/Song One by The Artist.m4a", 0, "The Artist", "Song One"),
|
||||
("/tmp/Song Two by The Artist.m4a", 0, "The Artist", "Song Two"),
|
||||
"Artist - Album Title (2024)",
|
||||
{
|
||||
"albumartist": "Artist",
|
||||
"album": "Album Title",
|
||||
"year": "2024",
|
||||
"catalognum": None,
|
||||
"media": None,
|
||||
},
|
||||
),
|
||||
(("/tmp/01.m4a", 1, "", "01"), ("/tmp/02.m4a", 2, "", "02")),
|
||||
(
|
||||
("/tmp/Song One.m4a", 0, "", "Song One"),
|
||||
("/tmp/Song Two.m4a", 0, "", "Song Two"),
|
||||
"Artist - 2024 - Album Title [flac]",
|
||||
{
|
||||
"albumartist": "Artist",
|
||||
"album": "Album Title",
|
||||
"year": "2024",
|
||||
"catalognum": None,
|
||||
"media": None,
|
||||
},
|
||||
),
|
||||
(
|
||||
"(2024) Album Title [CATALOGNUM] WEB",
|
||||
# sometimes things are just going to be unparsable
|
||||
{
|
||||
"albumartist": "Album Title",
|
||||
"album": "WEB",
|
||||
"year": "2024",
|
||||
"catalognum": "CATALOGNUM",
|
||||
"media": None,
|
||||
},
|
||||
),
|
||||
(
|
||||
"{2024} Album Artist - Album Title [INFO-WAV]",
|
||||
{
|
||||
"albumartist": "Album Artist",
|
||||
"album": "Album Title",
|
||||
"year": "2024",
|
||||
"catalognum": None,
|
||||
"media": None,
|
||||
},
|
||||
),
|
||||
(
|
||||
"VA - Album Title [2025] [CD-FLAC]",
|
||||
{
|
||||
"albumartist": "Various Artists",
|
||||
"album": "Album Title",
|
||||
"year": "2025",
|
||||
"catalognum": None,
|
||||
"media": "CD",
|
||||
},
|
||||
),
|
||||
(
|
||||
"Artist - Album Title 3000 (1998) [FLAC] {CATALOGNUM}",
|
||||
{
|
||||
"albumartist": "Artist",
|
||||
"album": "Album Title 3000",
|
||||
"year": "1998",
|
||||
"catalognum": "CATALOGNUM",
|
||||
"media": None,
|
||||
},
|
||||
),
|
||||
(
|
||||
"various - cd album (2023) [catalognum 123] {vinyl mp3}",
|
||||
{
|
||||
"albumartist": "Various Artists",
|
||||
"album": "cd album",
|
||||
"year": "2023",
|
||||
"catalognum": "catalognum 123",
|
||||
"media": "Vinyl",
|
||||
},
|
||||
),
|
||||
(
|
||||
"[CATALOG567] Album - Various (2020) [WEB-FLAC]",
|
||||
{
|
||||
"albumartist": "Various Artists",
|
||||
"album": "Album",
|
||||
"year": "2020",
|
||||
"catalognum": "CATALOG567",
|
||||
"media": "Digital Media",
|
||||
},
|
||||
),
|
||||
(
|
||||
"Album 3000 {web}",
|
||||
{
|
||||
"albumartist": None,
|
||||
"album": "Album 3000",
|
||||
"year": None,
|
||||
"catalognum": None,
|
||||
"media": "Digital Media",
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_fromfilename(song1, song2):
|
||||
"""
|
||||
Each "song" is a tuple of path, expected track number, expected artist,
|
||||
expected title.
|
||||
|
||||
We use two songs for each test for two reasons:
|
||||
- The plugin needs more than one item to look for uniform strings in paths
|
||||
in order to guess if the string describes an artist or a title.
|
||||
- Sometimes we allow for an optional "." after the track number in paths.
|
||||
"""
|
||||
|
||||
session = Session()
|
||||
item1 = Item(song1[0])
|
||||
item2 = Item(song2[0])
|
||||
task = Task([item1, item2])
|
||||
|
||||
def test_parse_album_info(text, matchgroup):
|
||||
f = fromfilename.FromFilenamePlugin()
|
||||
f.filename_task(task, session)
|
||||
m = f.parse_album_info(text)
|
||||
assert matchgroup == m
|
||||
|
||||
assert task.items[0].track == song1[1]
|
||||
assert task.items[0].artist == song1[2]
|
||||
assert task.items[0].title == song1[3]
|
||||
assert task.items[1].track == song2[1]
|
||||
assert task.items[1].artist == song2[2]
|
||||
assert task.items[1].title == song2[3]
|
||||
|
||||
class TestFromFilename(ConfigMixin):
|
||||
@pytest.mark.parametrize(
|
||||
"expected_item",
|
||||
[
|
||||
mock_item(
|
||||
path="/tmp/01 - The Artist - Song One.m4a",
|
||||
artist="The Artist",
|
||||
track=1,
|
||||
title="Song One",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/01 The Artist - Song One.m4a",
|
||||
artist="The Artist",
|
||||
track=1,
|
||||
title="Song One",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/02 The Artist - Song Two.m4a",
|
||||
artist="The Artist",
|
||||
track=2,
|
||||
title="Song Two",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/01-The_Artist-Song_One.m4a",
|
||||
artist="The_Artist",
|
||||
track=1,
|
||||
title="Song_One",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/02.-The_Artist-Song_Two.m4a",
|
||||
artist="The_Artist",
|
||||
track=2,
|
||||
title="Song_Two",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/01 - Song_One.m4a",
|
||||
track=1,
|
||||
title="Song_One",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/02. - Song_Two.m4a",
|
||||
track=2,
|
||||
title="Song_Two",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/Song One by The Artist.m4a",
|
||||
artist="The Artist",
|
||||
title="Song One",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/Song Two by The Artist.m4a",
|
||||
artist="The Artist",
|
||||
title="Song Two",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/01.m4a",
|
||||
track=1,
|
||||
title="01",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/02.m4a",
|
||||
track=2,
|
||||
title="02",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/Song One.m4a",
|
||||
title="Song One",
|
||||
),
|
||||
mock_item(
|
||||
path="/tmp/Song Two.m4a",
|
||||
title="Song Two",
|
||||
),
|
||||
mock_item(
|
||||
path=(
|
||||
"/tmp/"
|
||||
"[CATALOG567] Album - Various - [WEB-FLAC]"
|
||||
"/2-10 - Artist - Song One.m4a"
|
||||
),
|
||||
album="Album",
|
||||
artist="Artist",
|
||||
track=10,
|
||||
disc=2,
|
||||
albumartist="Various Artists",
|
||||
catalognum="CATALOG567",
|
||||
title="Song One",
|
||||
media="Digital Media",
|
||||
),
|
||||
mock_item(
|
||||
path=(
|
||||
"/tmp/"
|
||||
"[CATALOG567] Album - Various - [WEB-FLAC]"
|
||||
"/03-04 - Other Artist - Song Two.m4a"
|
||||
),
|
||||
album="Album",
|
||||
artist="Other Artist",
|
||||
disc=3,
|
||||
track=4,
|
||||
albumartist="Various Artists",
|
||||
catalognum="CATALOG567",
|
||||
title="Song Two",
|
||||
media="Digital Media",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_fromfilename(self, expected_item):
|
||||
"""
|
||||
Take expected items, create a task with just the paths.
|
||||
|
||||
After parsing, compare to the original with the expected attributes defined.
|
||||
"""
|
||||
task = Task([mock_item(path=expected_item.path)])
|
||||
f = fromfilename.FromFilenamePlugin()
|
||||
f.filename_task(task, Session())
|
||||
res = task.items[0]
|
||||
exp = expected_item
|
||||
assert res.path == exp.path
|
||||
assert res.artist == exp.artist
|
||||
assert res.albumartist == exp.albumartist
|
||||
assert res.disc == exp.disc
|
||||
assert res.catalognum == exp.catalognum
|
||||
assert res.year == exp.year
|
||||
assert res.title == exp.title
|
||||
|
|
|
|||
Loading…
Reference in a new issue