diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index 8c905d486..2141b4ed5 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -113,7 +113,7 @@ RE_SPLIT = re.compile(r"[\-\_]+") RE_BRACKETS = re.compile(r"[\(\[\{].*?[\)\]\}]") -class TrackMatches(TypedDict): +class TrackMatch(TypedDict): disc: str | None track: str | None by: NotRequired[str | None] @@ -121,7 +121,7 @@ class TrackMatches(TypedDict): title: str | None -class AlbumMatches(TypedDict): +class AlbumMatch(TypedDict): albumartist: str | None album: str | None year: str | None @@ -188,91 +188,88 @@ class FromFilenamePlugin(BeetsPlugin): return re.sub("}", "}}", re.sub("{", "{{", text)) def filename_task(self, task: ImportTask, session: ImportSession) -> None: - """Examine each item in the task to see if we can extract a title - from the filename. Try to match all filenames to a number of - regexps, starting with the most complex patterns and successively - trying less complex patterns. As soon as all filenames match the - same regex we can make an educated guess of which part of the - regex that contains the title. + """ Examines all files in the given import task for any missing + information it can gather from the file and folder names. + + Once the information has been obtained and checked, it + is applied to the items to improve later metadata lookup. """ # Create the list of items to process - # TODO: If it's a singleton import task, use the .item field - items: list[Item] = [] - if isinstance(task, SingletonImportTask): - item = task.item - else: - items = task.items + items: list[Item] = task.items - # TODO: Switch this to gather data anyway, but only - # update where missing - # Look for suspicious (empty or meaningless) titles. - missing_titles = sum(self._bad_field(i.title) for i in items) + # TODO: Check each of the fields to see if any are missing + # information on the file. + parent_folder, item_filenames = self._get_path_strings(items) - if missing_titles: - # Get the base filenames (no path or extension). - parent_path: str = "" - names: dict[Item, str] = {} - for item in items: - path: Path = Path(displayable_path(item.path)) - name = path.stem - names[item] = name - if not parent_path: - parent_path = path.parent.stem - self._log.debug( - f"Parent Folder: {self._escape(parent_path)}" - ) - - album_matches: AlbumMatches = self._parse_album_info(parent_path) - self._log.debug(album_matches) - # Look for useful information in the filenames. - track_matches: dict[Item, TrackMatches] = {} - for item, name in names.items(): - m = self._parse_track_info(name) - track_matches[item] = m - # Make sure we got the fields right - self._sanity_check_matches(album_matches, track_matches) - self._apply_matches(album_matches, track_matches) + album_matches = self._parse_album_info(parent_folder) + # Look for useful information in the filenames. + track_matches = self._build_track_matches(item_filenames) + # Make sure we got the fields right + self._sanity_check_matches(album_matches, track_matches) + # Apply the information + self._apply_matches(album_matches, track_matches) @staticmethod - def _parse_track_info(text: str) -> TrackMatches: - matches: TrackMatches = { + def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]: + parent_folder: str = "" + filenames: dict[Item, str] = {} + for item in items: + path: Path = Path(displayable_path(item.path)) + filename = path.stem + filenames[item] = filename + if not parent_folder: + parent_folder = path.parent.stem + return parent_folder, filenames + + def _build_track_matches(self, + item_filenames: dict[Item, str]) -> dict[Item, TrackMatch]: + track_matches: dict[Item, TrackMatch] = {} + for item, filename in item_filenames.items(): + m = self._parse_track_info(filename) + track_matches[item] = m + return track_matches + + + @staticmethod + def _parse_track_info(text: str) -> TrackMatch: + trackmatch: TrackMatch = { "disc": None, "track": None, "by": None, "artist": None, "title": None, } - m = RE_TRACK_INFO.match(text) - if m: - if disc := m.group("disc"): - matches["disc"] = str(disc) - if track := m.group("track"): - matches["track"] = str(track).strip() - if by := m.group("by"): - matches["by"] = str(by) - if artist := m.group("artist"): - matches["artist"] = str(artist).strip() - if title := m.group("title"): - matches["title"] = str(title).strip() + match = RE_TRACK_INFO.match(text) + assert match is not None + if disc := match.group("disc"): + trackmatch["disc"] = str(disc) + if track := match.group("track"): + trackmatch["track"] = str(track).strip() + if by := match.group("by"): + trackmatch["by"] = str(by) + if artist := match.group("artist"): + trackmatch["artist"] = str(artist).strip() + if title := match.group("title"): + trackmatch["title"] = str(title).strip() # if the phrase "by" is matched, swap artist and title - if matches["by"]: - artist = matches["title"] - matches["title"] = matches["artist"] - matches["artist"] = artist + if trackmatch["by"]: + artist = trackmatch["title"] + trackmatch["title"] = trackmatch["artist"] + trackmatch["artist"] = artist # remove that key - del matches["by"] + del trackmatch["by"] # if all fields except `track` are none # set title to track number as well # we can't be sure if it's actually the track number # or track title - if set(matches.values()) == {None, matches["track"]}: - matches["title"] = matches["track"] + if set(trackmatch.values()) == {None, trackmatch["track"]}: + trackmatch["title"] = trackmatch["track"] - return matches + return trackmatch - def _parse_album_info(self, text: str) -> AlbumMatches: - matches: AlbumMatches = { + def _parse_album_info(self, text: str) -> AlbumMatch: + matches: AlbumMatch = { "albumartist": None, "album": None, "year": None, @@ -313,7 +310,7 @@ class FromFilenamePlugin(BeetsPlugin): return matches def _apply_matches( - self, album_match: AlbumMatches, track_matches: dict[Item, TrackMatches] + self, album_match: AlbumMatch, track_matches: dict[Item, TrackMatch] ) -> None: """Apply all valid matched fields to all items in the match dictionary.""" match = album_match @@ -422,7 +419,7 @@ class FromFilenamePlugin(BeetsPlugin): return text def _sanity_check_matches( - self, album_match: AlbumMatches, track_matches: dict[Item, TrackMatches] + self, album_match: AlbumMatch, track_matches: dict[Item, TrackMatch] ) -> None: """Check to make sure data is coherent between track and album matches. Largely looking to see @@ -430,7 +427,7 @@ class FromFilenamePlugin(BeetsPlugin): identified. """ - def swap_artist_title(tracks: list[TrackMatches]): + def swap_artist_title(tracks: list[TrackMatch]): for track in tracks: artist = track["title"] track["title"] = track["artist"] @@ -447,7 +444,7 @@ class FromFilenamePlugin(BeetsPlugin): # if they do not, try seeing if all the titles match # if all the titles match, swap title and artist fields # If we know that it's a VA album, then we can't assert much from the artists - tracks: list[TrackMatches] = list(track_matches.values()) + tracks: list[TrackMatch] = list(track_matches.values()) album_artist = album_match["albumartist"] one_artist = self._equal_fields(tracks, "artist") one_title = self._equal_fields(tracks, "title") @@ -472,7 +469,7 @@ class FromFilenamePlugin(BeetsPlugin): return @staticmethod - def _equal_fields(dictionaries: list[TrackMatches], field: str) -> bool: + def _equal_fields(dictionaries: list[TrackMatch], field: str) -> bool: """Checks if all values of a field on a dictionary match.""" return len(set(d[field] for d in dictionaries)) <= 1 # type: ignore diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index b1472e3db..d964c714e 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -19,6 +19,7 @@ import pytest from beets.library import Item from beets.test.helper import PluginMixin +from beets.importer.tasks import ImportTask, SingletonImportTask from beetsplug.fromfilename import FromFilenamePlugin @@ -41,10 +42,8 @@ def mock_item(**kwargs): return Item(**{**defaults, **kwargs}) -@dataclass -class Task: - items: list[Item] - is_album: bool = True +def mock_task(items): + return ImportTask(toppath=None, paths=None, items=items) @pytest.mark.parametrize( @@ -408,7 +407,7 @@ class TestFromFilename(PluginMixin): After parsing, compare to the original with the expected attributes defined. """ - task = Task([mock_item(path=expected_item.path)]) + task = mock_task(items=[mock_item(path=expected_item.path)]) f = FromFilenamePlugin() f.filename_task(task, Session()) res = task.items[0] @@ -561,7 +560,7 @@ class TestFromFilename(PluginMixin): After parsing, compare to the expected items. """ - task = Task([mock_item(path=item.path) for item in expected_items]) + task = mock_task([mock_item(path=item.path) for item in expected_items]) f = FromFilenamePlugin() f.filename_task(task, Session()) res = task.items @@ -581,7 +580,15 @@ class TestFromFilename(PluginMixin): assert res[1].year == exp[1].year assert res[1].title == exp[1].title - # TODO: Test with singleton import tasks + def test_singleton_import(self): + task = SingletonImportTask( + toppath=None, + item=mock_item(path="/01 Track.wav") + ) + f = FromFilenamePlugin() + f.filename_task(task, Session()) + assert task.item.track == 1 + assert task.item.title == "Track" # TODO: Test with items that already have data, or other types of bad data. @@ -636,7 +643,7 @@ class TestFromFilename(PluginMixin): "/Album Artist - Album (2025) [FLAC CD] {CATALOGNUM}/" "1-2 Artist - Track.wav" ) - task = Task([mock_item(path=path)]) + task = mock_task([mock_item(path=path)]) expected.path = path with self.configure_plugin({"fields": fields}): f = FromFilenamePlugin() @@ -651,5 +658,6 @@ class TestFromFilename(PluginMixin): assert res.year == expected.year assert res.title == expected.title - def test_user_regex(self): + @pytest.mark.parametrize("patterns,expected_item", []) + def test_user_regex(self, patterns, expected_item): return