From f4d41482e8e6a5ad4e3d41ea5cd82d2045c03703 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 31 Jan 2025 23:21:35 +0100 Subject: [PATCH 01/48] Fix musicbrainz genres fetching - genres are now called tags - tags needs to be in "mb fetch includes" - release-group has them - release has them - and recording as well but we don't use them - not sure what this outdated check was doing - see musicbrainz.VALID_INCLUDES for reference --- beets/autotag/mb.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 90c2013d8..19d26a9f6 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -104,8 +104,11 @@ BROWSE_MAXTRACKS = 500 TRACK_INCLUDES = ["artists", "aliases", "isrcs"] if "work-level-rels" in musicbrainzngs.VALID_INCLUDES["recording"]: TRACK_INCLUDES += ["work-level-rels", "artist-rels"] -if "genres" in musicbrainzngs.VALID_INCLUDES["recording"]: - RELEASE_INCLUDES += ["genres"] +if "tags" in [ + musicbrainzngs.VALID_INCLUDES["release"], + musicbrainzngs.VALID_INCLUDES["release-group"], +]: + RELEASE_INCLUDES += ["tags"] def track_url(trackid: str) -> str: @@ -607,8 +610,8 @@ def album_info(release: dict) -> beets.autotag.hooks.AlbumInfo: if config["musicbrainz"]["genres"]: sources = [ - release["release-group"].get("genre-list", []), - release.get("genre-list", []), + release["release-group"].get("tag-list", []), + release.get("tag-list", []), ] genres: Counter[str] = Counter() for source in sources: From 5d96509cfe0a5a39020fac04fbbaa63619b37622 Mon Sep 17 00:00:00 2001 From: Max Goltzsche Date: Thu, 26 Dec 2024 19:25:23 +0100 Subject: [PATCH 02/48] smartplaylist: change encoding of additional field URL-encode additional item `fields` within generated EXTM3U playlists instead of JSON-encoding them. This is because JSON-encoding additional fields/attributes made it difficult to parse the `EXTINF` line but using URL-encoding for these values makes parsing easy (because URL-encoded values cannot contain commas, quotation marks and spaces). I introduced the generation of additional EXTM3U item fields earlier this year and I want to correct that now. **Design/definition background:** Unfortunately, I didn't find a clear definition of how additional playlist item attributes should be encoded - apparently there is none. Given that item URIs within an M3U playlist can be URL-encoded already, defining the values of additional attributes to be URL-encoded is consistent design. I didn't find examples of additional EXTM3U item attributes in the web where the attribute value contains a comma, space or quotation mark but examples that specified numeric IDs and URLs as attribute values. Because the URL attribute examples I found didn't contain URL-encoded characters and because it is more readable and unproblematic for parsing, I've let the attribute URL encoding treat `:` and `/` as safe characters. **Breaking change:** While this is a breaking change in theory, in practice it is not since afaik all integrations of the smartplaylist plugin's additional EXTM3U item attribute generation feature (beets-webm3u) work with simple attribute values such as the item ID (numeric) whose formatting/encoding is not affected when changing from JSON to URL-encoding. In other words the change is backward-compatible with the beets-webm3u plugin (which I'll adjust correspondingly after this beets PR was merged). --- beetsplug/smartplaylist.py | 5 +++-- docs/changelog.rst | 2 ++ docs/plugins/smartplaylist.rst | 12 +++++------- test/plugins/test_smartplaylist.py | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/beetsplug/smartplaylist.py b/beetsplug/smartplaylist.py index a3b24b569..d758c0255 100644 --- a/beetsplug/smartplaylist.py +++ b/beetsplug/smartplaylist.py @@ -14,8 +14,8 @@ """Generates smart playlists based on beets queries.""" -import json import os +from urllib.parse import quote from urllib.request import pathname2url from beets import ui @@ -327,7 +327,8 @@ class SmartPlaylistPlugin(BeetsPlugin): if extm3u: attr = [(k, entry.item[k]) for k in keys] al = [ - f" {a[0]}={json.dumps(str(a[1]))}" for a in attr + f" {key}=\"{quote(str(value), safe='/:')}\"" + for key, value in attr ] attrs = "".join(al) comment = "#EXTINF:{}{},{} - {}\n".format( diff --git a/docs/changelog.rst b/docs/changelog.rst index 54d085599..bcb8135b8 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -81,6 +81,8 @@ Other changes: wrong (outdated) commit. Now the tag is created in the same workflow step right after committing the version update. :bug:`5539` +* :doc:`/plugins/smartplaylist`: URL-encode additional item `fields` within generated + EXTM3U playlists instead of JSON-encoding them. 2.2.0 (December 02, 2024) ------------------------- diff --git a/docs/plugins/smartplaylist.rst b/docs/plugins/smartplaylist.rst index af7a09f03..cb697f762 100644 --- a/docs/plugins/smartplaylist.rst +++ b/docs/plugins/smartplaylist.rst @@ -97,27 +97,25 @@ In case you want to export additional fields from the beets database into the generated playlists, you can do so by specifying them within the ``fields`` configuration option and setting the ``output`` option to ``extm3u``. For instance the following configuration exports the ``id`` and ``genre`` -fields: +fields:: smartplaylist: playlist_dir: /data/playlists relative_to: /data/playlists output: extm3u fields: - - id - genre - playlists: - - name: all.m3u query: '' -A resulting ``all.m3u`` file could look as follows: +Values of additional fields are URL-encoded. +A resulting ``all.m3u`` file could look as follows:: #EXTM3U - #EXTINF:805 id="1931" genre="Jazz",Miles Davis - Autumn Leaves - ../music/Albums/Miles Davis/Autumn Leaves/02 Autumn Leaves.mp3 + #EXTINF:805 id="1931" genre="Progressive%20Rock",Led Zeppelin - Stairway to Heaven + ../music/singles/Led Zeppelin/Stairway to Heaven.mp3 To give a usage example, the `webm3u`_ and `Beetstream`_ plugins read the exported ``id`` field, allowing you to serve your local m3u playlists via HTTP. diff --git a/test/plugins/test_smartplaylist.py b/test/plugins/test_smartplaylist.py index a50f3e622..ade745c17 100644 --- a/test/plugins/test_smartplaylist.py +++ b/test/plugins/test_smartplaylist.py @@ -283,7 +283,7 @@ class SmartPlaylistTest(BeetsTestCase): assert ( content == b"#EXTM3U\n" - + b'#EXTINF:300 id="456" genre="Fake Genre",Fake Artist - fake Title\n' + + b'#EXTINF:300 id="456" genre="Fake%20Genre",Fake Artist - fake Title\n' + b"/tagada.mp3\n" ) From 435864cb5013bdb49be4be670c2ebd8f46ca630f Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 1 Feb 2025 13:16:04 +0100 Subject: [PATCH 03/48] Removed import state functions in favor of an import state dataclass. Makes this more readable in my opinion, we also now have typehints for the import state. --- beets/importer.py | 367 +++++++++++++++++++++++----------------------- 1 file changed, 182 insertions(+), 185 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index b30e6399b..fcf6835ca 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1,18 +1,3 @@ -# This file is part of beets. -# Copyright 2016, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - - """Provides the basic, interface-agnostic workflow for importing and autotagging music files. """ @@ -23,17 +8,20 @@ import pickle import re import shutil import time +from abc import ABC, abstractmethod from bisect import bisect_left, insort from collections import defaultdict -from contextlib import contextmanager +from dataclasses import dataclass from enum import Enum from tempfile import mkdtemp +from typing import Iterable, Sequence import mediafile from beets import autotag, config, dbcore, library, logging, plugins, util from beets.util import ( MoveOperation, + PathLike, ancestry, displayable_path, normpath, @@ -49,8 +37,7 @@ action = Enum("action", ["SKIP", "ASIS", "TRACKS", "APPLY", "ALBUMS", "RETAG"]) QUEUE_SIZE = 128 SINGLE_ARTIST_THRESH = 0.25 -PROGRESS_KEY = "tagprogress" -HISTORY_KEY = "taghistory" + # Usually flexible attributes are preserved (i.e., not updated) during # reimports. The following two lists (globally) change this behaviour for # certain fields. To alter these lists only when a specific plugin is in use, @@ -80,142 +67,163 @@ class ImportAbortError(Exception): pass -# Utilities. +@dataclass +class ImportState: + """Representing the progress of an import task. + Opens the state file on creation of the class. If you want + to ensure the state is written to disk, you should use the + context manager protocol. -def _open_state(): - """Reads the state file, returning a dictionary.""" - try: - with open(config["statefile"].as_filename(), "rb") as f: - return pickle.load(f) - except Exception as exc: - # The `pickle` module can emit all sorts of exceptions during - # unpickling, including ImportError. We use a catch-all - # exception to avoid enumerating them all (the docs don't even have a - # full list!). - log.debug("state file could not be read: {0}", exc) - return {} + Tagprogress allows long tagging tasks to be resumed when they pause. + Taghistory is a utility for manipulating the "incremental" import log. + This keeps track of all directories that were ever imported, which + allows the importer to only import new stuff. -def _save_state(state): - """Writes the state dictionary out to disk.""" - try: - with open(config["statefile"].as_filename(), "wb") as f: - pickle.dump(state, f) - except OSError as exc: - log.error("state file could not be written: {0}", exc) + Usage + ----- + ``` + # Readonly + progress = ImportState().tagprogress - -# Utilities for reading and writing the beets progress file, which -# allows long tagging tasks to be resumed when they pause (or crash). - - -def progress_read(): - state = _open_state() - return state.setdefault(PROGRESS_KEY, {}) - - -@contextmanager -def progress_write(): - state = _open_state() - progress = state.setdefault(PROGRESS_KEY, {}) - yield progress - _save_state(state) - - -def progress_add(toppath, *paths): - """Record that the files under all of the `paths` have been imported - under `toppath`. + # Read and write + with ImportState() as state: + state["key"] = "value" + ``` """ - with progress_write() as state: - imported = state.setdefault(toppath, []) - for path in paths: - # Normally `progress_add` will be called with the path - # argument increasing. This is because of the ordering in - # `albums_in_dir`. We take advantage of that to make the - # code faster - if imported and imported[len(imported) - 1] <= path: - imported.append(path) - else: - insort(imported, path) + + tagprogress: dict + taghistory: set + path: PathLike + + def __init__(self, readonly=False, path: PathLike | None = None): + self.path = path or config["statefile"].as_filename() + self._open() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._save() + + def _open( + self, + ): + try: + with open(self.path, "rb") as f: + state = pickle.load(f) + # Read the states + self.tagprogress = state.get("tagprogress", {}) + self.taghistory = state.get("taghistory", set()) + except Exception as exc: + # The `pickle` module can emit all sorts of exceptions during + # unpickling, including ImportError. We use a catch-all + # exception to avoid enumerating them all (the docs don't even have a + # full list!). + log.debug("state file could not be read: {0}", exc) + + def _save(self): + try: + with open(self.path, "wb") as f: + pickle.dump( + { + "tagprogress": self.tagprogress, + "taghistory": self.taghistory, + }, + f, + ) + except OSError as exc: + log.error("state file could not be written: {0}", exc) + + # -------------------------------- Tagprogress ------------------------------- # + + def progress_add(self, toppath: PathLike, *paths: list[PathLike]): + """Record that the files under all of the `paths` have been imported + under `toppath`. + """ + with self as state: + imported = state.tagprogress.setdefault(toppath, []) + for path in paths: + if imported and imported[-1] <= path: + imported.append(path) + else: + insort(imported, path) + + def progress_has_element(self, toppath: PathLike, path: PathLike) -> bool: + """Return whether `path` has been imported in `toppath`.""" + imported = self.tagprogress.get(toppath, []) + i = bisect_left(imported, path) + return i != len(imported) and imported[i] == path + + def progress_has(self, toppath: PathLike) -> bool: + """Return `True` if there exist paths that have already been + imported under `toppath`. + """ + return toppath in self.tagprogress + + def progress_reset(self, toppath: PathLike): + """Reset the progress for `toppath`.""" + with self as state: + if toppath in state.tagprogress: + del state.tagprogress[toppath] + + # -------------------------------- Taghistory -------------------------------- # + + def history_add(self, paths: list[PathLike]): + """Add the paths to the history.""" + with self as state: + state.taghistory.add(tuple(paths)) -def progress_element(toppath, path): - """Return whether `path` has been imported in `toppath`.""" - state = progress_read() - if toppath not in state: - return False - imported = state[toppath] - i = bisect_left(imported, path) - return i != len(imported) and imported[i] == path - - -def has_progress(toppath): - """Return `True` if there exist paths that have already been - imported under `toppath`. - """ - state = progress_read() - return toppath in state - - -def progress_reset(toppath): - with progress_write() as state: - if toppath in state: - del state[toppath] - - -# Similarly, utilities for manipulating the "incremental" import log. -# This keeps track of all directories that were ever imported, which -# allows the importer to only import new stuff. - - -def history_add(paths): - """Indicate that the import of the album in `paths` is completed and - should not be repeated in incremental imports. - """ - state = _open_state() - if HISTORY_KEY not in state: - state[HISTORY_KEY] = set() - - state[HISTORY_KEY].add(tuple(paths)) - - _save_state(state) - - -def history_get(): - """Get the set of completed path tuples in incremental imports.""" - state = _open_state() - if HISTORY_KEY not in state: - return set() - return state[HISTORY_KEY] - - -# Abstract session class. - - -class ImportSession: +class ImportSession(ABC): """Controls an import action. Subclasses should implement methods to communicate with the user or otherwise make decisions. """ - def __init__(self, lib, loghandler, paths, query): - """Create a session. `lib` is a Library object. `loghandler` is a - logging.Handler. Either `paths` or `query` is non-null and indicates - the source of files to be imported. + logger: logging.Logger + paths: list[bytes] | None + lib: library.Library + + _is_resuming: dict[bytes, bool] + _merged_items: set + _merged_dirs: set + + def __init__( + self, + lib: library.Library, + loghandler: logging.Handler | None, + paths: Iterable[PathLike] | None, + query: dbcore.Query | None, + ): + """Create a session. + + Parameters + ---------- + lib : library.Library + The library instance to which items will be imported. + loghandler : logging.Handler or None + A logging handler to use for the session's logger. If None, a + NullHandler will be used. + paths : os.PathLike or None + The paths to be imported. If None, no paths are specified. + query : dbcore.Query or None + A query to filter items for import. If None, no query is applied. """ self.lib = lib self.logger = self._setup_logging(loghandler) - self.paths = paths self.query = query self._is_resuming = {} self._merged_items = set() self._merged_dirs = set() # Normalize the paths. - if self.paths: - self.paths = list(map(normpath, self.paths)) + if paths is not None: + self.paths = list(map(normpath, paths)) + else: + self.paths = None - def _setup_logging(self, loghandler): + def _setup_logging(self, loghandler: logging.Handler | None): logger = logging.getLogger(__name__) logger.propagate = False if not loghandler: @@ -243,9 +251,7 @@ class ImportSession: iconfig["incremental"] = False if iconfig["reflink"]: - iconfig["reflink"] = iconfig["reflink"].as_choice( - ["auto", True, False] - ) + iconfig["reflink"] = iconfig["reflink"].as_choice(["auto", True, False]) # Copy, move, reflink, link, and hardlink are mutually exclusive. if iconfig["move"]: @@ -302,17 +308,21 @@ class ImportSession: elif task.choice_flag is action.SKIP: self.tag_log("skip", paths) + @abstractmethod def should_resume(self, path): - raise NotImplementedError + raise NotImplementedError("Inheriting class must implement `should_resume`") + @abstractmethod def choose_match(self, task): - raise NotImplementedError + raise NotImplementedError("Inheriting class must implement `choose_match`") + @abstractmethod def resolve_duplicate(self, task, found_duplicates): - raise NotImplementedError + raise NotImplementedError("Inheriting class must implement `resolve_duplicate`") + @abstractmethod def choose_item(self, task): - raise NotImplementedError + raise NotImplementedError("Inheriting class must implement `choose_item`") def run(self): """Run the import task.""" @@ -366,12 +376,13 @@ class ImportSession: # Incremental and resumed imports - def already_imported(self, toppath, paths): + def already_imported(self, toppath: PathLike, paths: Sequence[PathLike]): """Returns true if the files belonging to this task have already been imported in a previous session. """ + state = ImportState() if self.is_resuming(toppath) and all( - [progress_element(toppath, p) for p in paths] + [state.progress_has_element(toppath, p) for p in paths] ): return True if self.config["incremental"] and tuple(paths) in self.history_dirs: @@ -379,13 +390,15 @@ class ImportSession: return False + _history_dirs = None + @property def history_dirs(self): - if not hasattr(self, "_history_dirs"): - self._history_dirs = history_get() + if self._history_dirs is None: + self._history_dirs = ImportState().taghistory return self._history_dirs - def already_merged(self, paths): + def already_merged(self, paths: Sequence[PathLike]): """Returns true if all the paths being imported were part of a merge during previous tasks. """ @@ -394,7 +407,7 @@ class ImportSession: return False return True - def mark_merged(self, paths): + def mark_merged(self, paths: Sequence[PathLike]): """Mark paths and directories as merged for future reimport tasks.""" self._merged_items.update(paths) dirs = { @@ -403,30 +416,31 @@ class ImportSession: } self._merged_dirs.update(dirs) - def is_resuming(self, toppath): + def is_resuming(self, toppath: PathLike): """Return `True` if user wants to resume import of this path. You have to call `ask_resume` first to determine the return value. """ - return self._is_resuming.get(toppath, False) + return self._is_resuming.get(normpath(toppath), False) - def ask_resume(self, toppath): + def ask_resume(self, toppath: PathLike): """If import of `toppath` was aborted in an earlier session, ask user if they want to resume the import. Determines the return value of `is_resuming(toppath)`. """ - if self.want_resume and has_progress(toppath): + state = ImportState() + if self.want_resume and state.progress_has(toppath): # Either accept immediately or prompt for input to decide. if self.want_resume is True or self.should_resume(toppath): log.warning( "Resuming interrupted import of {0}", - util.displayable_path(toppath), + util.displayable_path(normpath(toppath)), ) - self._is_resuming[toppath] = True + self._is_resuming[normpath(toppath)] = True else: # Clear progress; we're starting from the top. - progress_reset(toppath) + state.progress_reset(toppath) # The importer task class. @@ -528,12 +542,12 @@ class ImportTask(BaseImportTask): finished. """ if self.toppath: - progress_add(self.toppath, *self.paths) + ImportState().progress_add(self.toppath, *self.paths) def save_history(self): """Save the directory in the history for incremental imports.""" if self.paths: - history_add(self.paths) + ImportState().history_add(self.paths) # Logical decisions. @@ -593,9 +607,7 @@ class ImportTask(BaseImportTask): for item in duplicate_items: item.remove() if lib.directory in util.ancestry(item.path): - log.debug( - "deleting duplicate {0}", util.displayable_path(item.path) - ) + log.debug("deleting duplicate {0}", util.displayable_path(item.path)) util.remove(item.path) util.prune_dirs(os.path.dirname(item.path), lib.directory) @@ -627,7 +639,8 @@ class ImportTask(BaseImportTask): self.save_progress() if session.config["incremental"] and not ( # Should we skip recording to incremental list? - self.skip and session.config["incremental_skip_later"] + self.skip + and session.config["incremental_skip_later"] ): self.save_history() @@ -684,9 +697,7 @@ class ImportTask(BaseImportTask): candidate IDs are stored in self.search_ids: if present, the initial lookup is restricted to only those IDs. """ - artist, album, prop = autotag.tag_album( - self.items, search_ids=self.search_ids - ) + artist, album, prop = autotag.tag_album(self.items, search_ids=self.search_ids) self.cur_artist = artist self.cur_album = album self.candidates = prop.candidates @@ -737,8 +748,7 @@ class ImportTask(BaseImportTask): [i.albumartist or i.artist for i in self.items] ) if freq == len(self.items) or ( - freq > 1 - and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH + freq > 1 and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH ): # Single-artist album. changes["albumartist"] = plur_albumartist @@ -832,15 +842,10 @@ class ImportTask(BaseImportTask): self.replaced_albums = defaultdict(list) replaced_album_ids = set() for item in self.imported_items(): - dup_items = list( - lib.items(dbcore.query.BytesQuery("path", item.path)) - ) + dup_items = list(lib.items(dbcore.query.BytesQuery("path", item.path))) self.replaced_items[item] = dup_items for dup_item in dup_items: - if ( - not dup_item.album_id - or dup_item.album_id in replaced_album_ids - ): + if not dup_item.album_id or dup_item.album_id in replaced_album_ids: continue replaced_album = dup_item._cached_album if replaced_album: @@ -893,8 +898,7 @@ class ImportTask(BaseImportTask): self.album.artpath = replaced_album.artpath self.album.store() log.debug( - "Reimported album {}. Preserving attribute ['added']. " - "Path: {}", + "Reimported album {}. Preserving attribute ['added']. " "Path: {}", self.album.id, displayable_path(self.album.path), ) @@ -1094,10 +1098,10 @@ class SentinelImportTask(ImportTask): def save_progress(self): if self.paths is None: # "Done" sentinel. - progress_reset(self.toppath) + ImportState().progress_reset(self.toppath) else: # "Directory progress" sentinel for singletons - progress_add(self.toppath, *self.paths) + ImportState().progress_add(self.toppath, *self.paths) def skip(self): return True @@ -1308,9 +1312,7 @@ class ImportTaskFactory: def singleton(self, path): """Return a `SingletonImportTask` for the music file.""" if self.session.already_imported(self.toppath, [path]): - log.debug( - "Skipping previously-imported path: {0}", displayable_path(path) - ) + log.debug("Skipping previously-imported path: {0}", displayable_path(path)) self.skipped += 1 return None @@ -1333,9 +1335,7 @@ class ImportTaskFactory: dirs = list({os.path.dirname(p) for p in paths}) if self.session.already_imported(self.toppath, dirs): - log.debug( - "Skipping previously-imported path: {0}", displayable_path(dirs) - ) + log.debug("Skipping previously-imported path: {0}", displayable_path(dirs)) self.skipped += 1 return None @@ -1364,8 +1364,7 @@ class ImportTaskFactory: if not (self.session.config["move"] or self.session.config["copy"]): log.warning( - "Archive importing requires either " - "'copy' or 'move' to be enabled." + "Archive importing requires either " "'copy' or 'move' to be enabled." ) return @@ -1578,9 +1577,7 @@ def resolve_duplicates(session, task): if task.choice_flag in (action.ASIS, action.APPLY, action.RETAG): found_duplicates = task.find_duplicates(session.lib) if found_duplicates: - log.debug( - "found duplicates: {}".format([o.id for o in found_duplicates]) - ) + log.debug("found duplicates: {}".format([o.id for o in found_duplicates])) # Get the default action to follow from config. duplicate_action = config["import"]["duplicate_action"].as_choice( From c81a2c9b1809c7c615b1f421a73ade63ac8b97e1 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 1 Feb 2025 13:25:25 +0100 Subject: [PATCH 04/48] Using 3.9 unions instead of new 3.10 style unions for typehints --- beets/importer.py | 16 ++++++++-------- test/plugins/test_lyrics.py | 19 +++++++++---------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index fcf6835ca..af61afda2 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -14,7 +14,7 @@ from collections import defaultdict from dataclasses import dataclass from enum import Enum from tempfile import mkdtemp -from typing import Iterable, Sequence +from typing import Dict, Iterable, Optional, Sequence, Union import mediafile @@ -97,7 +97,7 @@ class ImportState: taghistory: set path: PathLike - def __init__(self, readonly=False, path: PathLike | None = None): + def __init__(self, readonly=False, path: Union[PathLike, None] = None): self.path = path or config["statefile"].as_filename() self._open() @@ -182,19 +182,19 @@ class ImportSession(ABC): """ logger: logging.Logger - paths: list[bytes] | None + paths: Union[list[bytes], None] lib: library.Library - _is_resuming: dict[bytes, bool] + _is_resuming: Dict[bytes, bool] _merged_items: set _merged_dirs: set def __init__( self, lib: library.Library, - loghandler: logging.Handler | None, - paths: Iterable[PathLike] | None, - query: dbcore.Query | None, + loghandler: Optional[logging.Handler], + paths: Optional[Sequence[PathLike]], + query: Optional[dbcore.Query], ): """Create a session. @@ -223,7 +223,7 @@ class ImportSession(ABC): else: self.paths = None - def _setup_logging(self, loghandler: logging.Handler | None): + def _setup_logging(self, loghandler: Optional[logging.Handler]): logger = logging.getLogger(__name__) logger.propagate = False if not loghandler: diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index c6d48c3bd..1305a21d3 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -22,7 +22,12 @@ import pytest from beets.library import Item from beets.test.helper import PluginMixin -from beetsplug import lyrics + +try: + from beetsplug import lyrics +except Exception: + pytest.skip("lyrics plugin couldn't be loaded", allow_module_level=True) + from .lyrics_pages import LyricsPage, lyrics_pages @@ -69,9 +74,7 @@ class TestLyricsUtils: ("横山克", "Masaru Yokoyama", ["Masaru Yokoyama"]), ], ) - def test_search_pairs_artists( - self, artist, artist_sort, expected_extra_artists - ): + def test_search_pairs_artists(self, artist, artist_sort, expected_extra_artists): item = Item(artist=artist, artist_sort=artist_sort, title="song") actual_artists = [a for a, _ in lyrics.search_pairs(item)] @@ -96,9 +99,7 @@ class TestLyricsUtils: def test_search_pairs_titles(self, title, expected_extra_titles): item = Item(title=title, artist="A") - actual_titles = { - t: None for _, tit in lyrics.search_pairs(item) for t in tit - } + actual_titles = {t: None for _, tit in lyrics.search_pairs(item) for t in tit} assert list(actual_titles) == [title, *expected_extra_titles] @@ -240,9 +241,7 @@ class LyricsBackendTest(LyricsPluginMixin): @pytest.fixture def lyrics_html(self, lyrics_root_dir, file_name): - return (lyrics_root_dir / f"{file_name}.txt").read_text( - encoding="utf-8" - ) + return (lyrics_root_dir / f"{file_name}.txt").read_text(encoding="utf-8") @pytest.mark.on_lyrics_update From a7ea60356b02526967bc2d83a05c920d60bd2356 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 1 Feb 2025 13:26:23 +0100 Subject: [PATCH 05/48] Init taghistory before loading --- beets/importer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/beets/importer.py b/beets/importer.py index af61afda2..dfc802a38 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -99,6 +99,8 @@ class ImportState: def __init__(self, readonly=False, path: Union[PathLike, None] = None): self.path = path or config["statefile"].as_filename() + self.tagprogress = {} + self.taghistory = set() self._open() def __enter__(self): From 6f2ee5c61451fde9602b695dfdef47d2eda42424 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 1 Feb 2025 13:37:24 +0100 Subject: [PATCH 06/48] Removed abc as the importer class is used directly sometimes... --- beets/importer.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index dfc802a38..e72aac514 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -178,7 +178,7 @@ class ImportState: state.taghistory.add(tuple(paths)) -class ImportSession(ABC): +class ImportSession: """Controls an import action. Subclasses should implement methods to communicate with the user or otherwise make decisions. """ @@ -310,19 +310,15 @@ class ImportSession(ABC): elif task.choice_flag is action.SKIP: self.tag_log("skip", paths) - @abstractmethod def should_resume(self, path): raise NotImplementedError("Inheriting class must implement `should_resume`") - @abstractmethod def choose_match(self, task): raise NotImplementedError("Inheriting class must implement `choose_match`") - @abstractmethod def resolve_duplicate(self, task, found_duplicates): raise NotImplementedError("Inheriting class must implement `resolve_duplicate`") - @abstractmethod def choose_item(self, task): raise NotImplementedError("Inheriting class must implement `choose_item`") From c83f2e4e7127ae746b4f3d8107e10a4e9170f44b Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 1 Feb 2025 13:45:18 +0100 Subject: [PATCH 07/48] Recreating importstate class to imitate previous code, otherwise we have slightly different behavior. --- beets/importer.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index e72aac514..3586445fd 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -378,9 +378,8 @@ class ImportSession: """Returns true if the files belonging to this task have already been imported in a previous session. """ - state = ImportState() if self.is_resuming(toppath) and all( - [state.progress_has_element(toppath, p) for p in paths] + [ImportState().progress_has_element(toppath, p) for p in paths] ): return True if self.config["incremental"] and tuple(paths) in self.history_dirs: @@ -427,8 +426,7 @@ class ImportSession: Determines the return value of `is_resuming(toppath)`. """ - state = ImportState() - if self.want_resume and state.progress_has(toppath): + if self.want_resume and ImportState().progress_has(toppath): # Either accept immediately or prompt for input to decide. if self.want_resume is True or self.should_resume(toppath): log.warning( @@ -438,7 +436,7 @@ class ImportSession: self._is_resuming[normpath(toppath)] = True else: # Clear progress; we're starting from the top. - state.progress_reset(toppath) + ImportState().progress_reset(toppath) # The importer task class. From 09b15aaf524250acd9906e91d521d1e5fefe7755 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 1 Feb 2025 15:08:41 +0100 Subject: [PATCH 08/48] Added type hints for pipeline stage decorators --- beets/util/pipeline.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/beets/util/pipeline.py b/beets/util/pipeline.py index d23b1bd10..e79325fe2 100644 --- a/beets/util/pipeline.py +++ b/beets/util/pipeline.py @@ -31,9 +31,13 @@ To do so, pass an iterable of coroutines to the Pipeline constructor in place of any single coroutine. """ + import queue import sys from threading import Lock, Thread +from typing import Callable, Generator, Optional, Union + +from typing_extensions import TypeVar, TypeVarTuple, Unpack BUBBLE = "__PIPELINE_BUBBLE__" POISON = "__PIPELINE_POISON__" @@ -149,7 +153,17 @@ def multiple(messages): return MultiMessage(messages) -def stage(func): +A = TypeVarTuple("A") +T = TypeVar("T") +R = TypeVar("R") + + +def stage( + func: Callable[ + [Unpack[A], T], + R, + ], +): """Decorate a function to become a simple stage. >>> @stage @@ -163,7 +177,7 @@ def stage(func): [3, 4, 5] """ - def coro(*args): + def coro(*args: Unpack[A]) -> Generator[Union[R, T, None], T, None]: task = None while True: task = yield task @@ -172,7 +186,7 @@ def stage(func): return coro -def mutator_stage(func): +def mutator_stage(func: Callable[[Unpack[A], T], None]): """Decorate a function that manipulates items in a coroutine to become a simple stage. @@ -187,7 +201,7 @@ def mutator_stage(func): [{'x': True}, {'a': False, 'x': True}] """ - def coro(*args): + def coro(*args: Unpack[A]) -> Generator[Optional[T], T, None]: task = None while True: task = yield task @@ -406,9 +420,7 @@ class Pipeline: for i in range(1, queue_count): for coro in self.stages[i]: threads.append( - MiddlePipelineThread( - coro, queues[i - 1], queues[i], threads - ) + MiddlePipelineThread(coro, queues[i - 1], queues[i], threads) ) # Last stage. From 04aa1b8ddbfbc980039a4d820f7e7d3a2011dbf9 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 1 Feb 2025 15:52:11 +0100 Subject: [PATCH 09/48] Added last missing typehints in importer and fixed some typehint issues in util. Also run poe format --- beets/importer.py | 191 +++++++++++++++++++++++++----------- beets/util/__init__.py | 13 +-- beets/util/pipeline.py | 5 +- test/plugins/test_lyrics.py | 12 ++- 4 files changed, 153 insertions(+), 68 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 3586445fd..a81c97df0 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -2,19 +2,20 @@ autotagging music files. """ +from __future__ import annotations + import itertools import os import pickle import re import shutil import time -from abc import ABC, abstractmethod from bisect import bisect_left, insort from collections import defaultdict from dataclasses import dataclass from enum import Enum from tempfile import mkdtemp -from typing import Dict, Iterable, Optional, Sequence, Union +from typing import Callable, Dict, Iterable, Optional, Sequence, Union, cast import mediafile @@ -98,7 +99,7 @@ class ImportState: path: PathLike def __init__(self, readonly=False, path: Union[PathLike, None] = None): - self.path = path or config["statefile"].as_filename() + self.path = path or cast(PathLike, config["statefile"].as_filename()) self.tagprogress = {} self.taghistory = set() self._open() @@ -140,7 +141,7 @@ class ImportState: # -------------------------------- Tagprogress ------------------------------- # - def progress_add(self, toppath: PathLike, *paths: list[PathLike]): + def progress_add(self, toppath: PathLike, *paths: PathLike): """Record that the files under all of the `paths` have been imported under `toppath`. """ @@ -253,7 +254,9 @@ class ImportSession: iconfig["incremental"] = False if iconfig["reflink"]: - iconfig["reflink"] = iconfig["reflink"].as_choice(["auto", True, False]) + iconfig["reflink"] = iconfig["reflink"].as_choice( + ["auto", True, False] + ) # Copy, move, reflink, link, and hardlink are mutually exclusive. if iconfig["move"]: @@ -311,16 +314,24 @@ class ImportSession: self.tag_log("skip", paths) def should_resume(self, path): - raise NotImplementedError("Inheriting class must implement `should_resume`") + raise NotImplementedError( + "Inheriting class must implement `should_resume`" + ) def choose_match(self, task): - raise NotImplementedError("Inheriting class must implement `choose_match`") + raise NotImplementedError( + "Inheriting class must implement `choose_match`" + ) def resolve_duplicate(self, task, found_duplicates): - raise NotImplementedError("Inheriting class must implement `resolve_duplicate`") + raise NotImplementedError( + "Inheriting class must implement `resolve_duplicate`" + ) def choose_item(self, task): - raise NotImplementedError("Inheriting class must implement `choose_item`") + raise NotImplementedError( + "Inheriting class must implement `choose_item`" + ) def run(self): """Run the import task.""" @@ -448,7 +459,16 @@ class BaseImportTask: Tasks flow through the importer pipeline. Each stage can update them.""" - def __init__(self, toppath, paths, items): + toppath: Optional[PathLike] + paths: list[PathLike] + items: list[library.Item] + + def __init__( + self, + toppath: Optional[PathLike], + paths: Optional[Iterable[PathLike]], + items: Optional[Iterable[library.Item]], + ): """Create a task. The primary fields that define a task are: * `toppath`: The user-specified base directory that contains the @@ -466,8 +486,8 @@ class BaseImportTask: These fields should not change after initialization. """ self.toppath = toppath - self.paths = paths - self.items = items + self.paths = list(paths) if paths is not None else [] + self.items = list(items) if items is not None else [] class ImportTask(BaseImportTask): @@ -514,9 +534,14 @@ class ImportTask(BaseImportTask): self.is_album = True self.search_ids = [] # user-supplied candidate IDs. - def set_choice(self, choice): + def set_choice( + self, choice: Union[action, autotag.AlbumMatch, autotag.TrackMatch] + ): """Given an AlbumMatch or TrackMatch object or an action constant, indicates that an action has been selected for this task. + + Album and trackmatch are implemented as tuples, so we can't + use isinstance to check for them. """ # Not part of the task structure: assert choice != action.APPLY # Only used internally. @@ -566,7 +591,7 @@ class ImportTask(BaseImportTask): if self.choice_flag in (action.ASIS, action.RETAG): likelies, consensus = autotag.current_metadata(self.items) return likelies - elif self.choice_flag is action.APPLY: + elif self.choice_flag is action.APPLY and self.match: return self.match.info.copy() assert False @@ -578,13 +603,19 @@ class ImportTask(BaseImportTask): """ if self.choice_flag in (action.ASIS, action.RETAG): return list(self.items) - elif self.choice_flag == action.APPLY: + elif self.choice_flag == action.APPLY and isinstance( + self.match, autotag.AlbumMatch + ): return list(self.match.mapping.keys()) else: assert False def apply_metadata(self): """Copy metadata from match info to the items.""" + assert isinstance( + self.match, autotag.AlbumMatch + ), "apply_metadata() only works for albums" + if config["import"]["from_scratch"]: for item in self.match.mapping: item.clear() @@ -603,7 +634,9 @@ class ImportTask(BaseImportTask): for item in duplicate_items: item.remove() if lib.directory in util.ancestry(item.path): - log.debug("deleting duplicate {0}", util.displayable_path(item.path)) + log.debug( + "deleting duplicate {0}", util.displayable_path(item.path) + ) util.remove(item.path) util.prune_dirs(os.path.dirname(item.path), lib.directory) @@ -635,8 +668,7 @@ class ImportTask(BaseImportTask): self.save_progress() if session.config["incremental"] and not ( # Should we skip recording to incremental list? - self.skip - and session.config["incremental_skip_later"] + self.skip and session.config["incremental_skip_later"] ): self.save_history() @@ -663,7 +695,7 @@ class ImportTask(BaseImportTask): for old_path in self.old_paths: # Only delete files that were actually copied. if old_path not in new_paths: - util.remove(syspath(old_path), False) + util.remove(old_path, False) self.prune(old_path) # When moving, prune empty directories containing the original files. @@ -671,7 +703,7 @@ class ImportTask(BaseImportTask): for old_path in self.old_paths: self.prune(old_path) - def _emit_imported(self, lib): + def _emit_imported(self, lib: library.Library): plugins.send("album_imported", lib=lib, album=self.album) def handle_created(self, session): @@ -693,7 +725,9 @@ class ImportTask(BaseImportTask): candidate IDs are stored in self.search_ids: if present, the initial lookup is restricted to only those IDs. """ - artist, album, prop = autotag.tag_album(self.items, search_ids=self.search_ids) + artist, album, prop = autotag.tag_album( + self.items, search_ids=self.search_ids + ) self.cur_artist = artist self.cur_album = album self.candidates = prop.candidates @@ -713,7 +747,9 @@ class ImportTask(BaseImportTask): # Construct a query to find duplicates with this metadata. We # use a temporary Album object to generate any computed fields. tmp_album = library.Album(lib, **info) - keys = config["import"]["duplicate_keys"]["album"].as_str_seq() + keys = cast( + list[str], config["import"]["duplicate_keys"]["album"].as_str_seq() + ) dup_query = tmp_album.duplicates_query(keys) # Don't count albums with the same files as duplicates. @@ -744,7 +780,8 @@ class ImportTask(BaseImportTask): [i.albumartist or i.artist for i in self.items] ) if freq == len(self.items) or ( - freq > 1 and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH + freq > 1 + and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH ): # Single-artist album. changes["albumartist"] = plur_albumartist @@ -770,7 +807,12 @@ class ImportTask(BaseImportTask): for item in self.items: item.update(changes) - def manipulate_files(self, operation=None, write=False, session=None): + def manipulate_files( + self, + operation: Optional[MoveOperation] = None, + write=False, + session: Optional[ImportSession] = None, + ): """Copy, move, link, hardlink or reflink (depending on `operation`) the files as well as write metadata. @@ -778,11 +820,12 @@ class ImportTask(BaseImportTask): If `write` is `True` metadata is written to the files. """ + assert session is not None, "session must be provided" items = self.imported_items() # Save the original paths of all items for deletion and pruning # in the next step (finalization). - self.old_paths = [item.path for item in items] + self.old_paths: list[PathLike] = [item.path for item in items] for item in items: if operation is not None: # In copy and link modes, treat re-imports specially: @@ -820,7 +863,9 @@ class ImportTask(BaseImportTask): self.remove_replaced(lib) self.album = lib.add_album(self.imported_items()) - if self.choice_flag == action.APPLY: + if self.choice_flag == action.APPLY and isinstance( + self.match, autotag.AlbumMatch + ): # Copy album flexible fields to the DB # TODO: change the flow so we create the `Album` object earlier, # and we can move this into `self.apply_metadata`, just like @@ -830,25 +875,30 @@ class ImportTask(BaseImportTask): self.reimport_metadata(lib) - def record_replaced(self, lib): + def record_replaced(self, lib: library.Library): """Records the replaced items and albums in the `replaced_items` and `replaced_albums` dictionaries. """ self.replaced_items = defaultdict(list) - self.replaced_albums = defaultdict(list) + self.replaced_albums: dict[PathLike, library.Album] = defaultdict() replaced_album_ids = set() for item in self.imported_items(): - dup_items = list(lib.items(dbcore.query.BytesQuery("path", item.path))) + dup_items = list( + lib.items(dbcore.query.BytesQuery("path", item.path)) + ) self.replaced_items[item] = dup_items for dup_item in dup_items: - if not dup_item.album_id or dup_item.album_id in replaced_album_ids: + if ( + not dup_item.album_id + or dup_item.album_id in replaced_album_ids + ): continue replaced_album = dup_item._cached_album if replaced_album: replaced_album_ids.add(dup_item.album_id) self.replaced_albums[replaced_album.path] = replaced_album - def reimport_metadata(self, lib): + def reimport_metadata(self, lib: library.Library): """For reimports, preserves metadata for reimported items and albums. """ @@ -894,7 +944,8 @@ class ImportTask(BaseImportTask): self.album.artpath = replaced_album.artpath self.album.store() log.debug( - "Reimported album {}. Preserving attribute ['added']. " "Path: {}", + "Reimported album {}. Preserving attribute ['added']. " + "Path: {}", self.album.id, displayable_path(self.album.path), ) @@ -973,14 +1024,14 @@ class ImportTask(BaseImportTask): util.prune_dirs( os.path.dirname(filename), self.toppath, - clutter=config["clutter"].as_str_seq(), + clutter=cast(list[str], config["clutter"].as_str_seq()), ) class SingletonImportTask(ImportTask): """ImportTask for a single track that is not associated to an album.""" - def __init__(self, toppath, item): + def __init__(self, toppath: Optional[PathLike], item: library.Item): super().__init__(toppath, [item.path], [item]) self.item = item self.is_album = False @@ -995,13 +1046,17 @@ class SingletonImportTask(ImportTask): assert self.choice_flag in (action.ASIS, action.RETAG, action.APPLY) if self.choice_flag in (action.ASIS, action.RETAG): return dict(self.item) - elif self.choice_flag is action.APPLY: + elif self.choice_flag is action.APPLY and self.match: return self.match.info.copy() + assert False def imported_items(self): return [self.item] def apply_metadata(self): + assert isinstance( + self.match, autotag.TrackMatch + ), "apply_metadata() only works for tracks" autotag.apply_item_metadata(self.item, self.match.info) def _emit_imported(self, lib): @@ -1022,7 +1077,9 @@ class SingletonImportTask(ImportTask): # Query for existing items using the same metadata. We use a # temporary `Item` object to generate any computed fields. tmp_item = library.Item(lib, **info) - keys = config["import"]["duplicate_keys"]["item"].as_str_seq() + keys = cast( + list[str], config["import"]["duplicate_keys"]["item"].as_str_seq() + ) dup_query = tmp_item.duplicates_query(keys) found_items = [] @@ -1092,23 +1149,24 @@ class SentinelImportTask(ImportTask): pass def save_progress(self): - if self.paths is None: + if self.paths is None and self.toppath: # "Done" sentinel. ImportState().progress_reset(self.toppath) - else: + elif self.toppath: # "Directory progress" sentinel for singletons ImportState().progress_add(self.toppath, *self.paths) - def skip(self): + @property + def skip(self) -> bool: return True def set_choice(self, choice): raise NotImplementedError - def cleanup(self, **kwargs): + def cleanup(self, copy=False, delete=False, move=False): pass - def _emit_imported(self, session): + def _emit_imported(self, lib): pass @@ -1152,7 +1210,7 @@ class ArchiveImportTask(SentinelImportTask): implements the same interface as `tarfile.TarFile`. """ if not hasattr(cls, "_handlers"): - cls._handlers = [] + cls._handlers: list[tuple[Callable, ...]] = [] from zipfile import ZipFile, is_zipfile cls._handlers.append((is_zipfile, ZipFile)) @@ -1174,9 +1232,9 @@ class ArchiveImportTask(SentinelImportTask): return cls._handlers - def cleanup(self, **kwargs): + def cleanup(self, copy=False, delete=False, move=False): """Removes the temporary directory the archive was extracted to.""" - if self.extracted: + if self.extracted and self.toppath: log.debug( "Removing extracted directory: {0}", displayable_path(self.toppath), @@ -1187,10 +1245,18 @@ class ArchiveImportTask(SentinelImportTask): """Extracts the archive to a temporary directory and sets `toppath` to that directory. """ + assert self.toppath is not None, "toppath must be set" + + handler_class = None for path_test, handler_class in self.handlers(): if path_test(os.fsdecode(self.toppath)): break + if handler_class is None: + raise ValueError( + "No handler found for archive: {0}".format(self.toppath) + ) + extract_to = mkdtemp() archive = handler_class(os.fsdecode(self.toppath), mode="r") try: @@ -1268,7 +1334,7 @@ class ImportTaskFactory: # for archive imports, send the archive task instead (to remove # the extracted directory). if self.is_archive: - yield archive_task + yield archive_task # type: ignore else: yield self.sentinel() @@ -1308,7 +1374,9 @@ class ImportTaskFactory: def singleton(self, path): """Return a `SingletonImportTask` for the music file.""" if self.session.already_imported(self.toppath, [path]): - log.debug("Skipping previously-imported path: {0}", displayable_path(path)) + log.debug( + "Skipping previously-imported path: {0}", displayable_path(path) + ) self.skipped += 1 return None @@ -1331,7 +1399,9 @@ class ImportTaskFactory: dirs = list({os.path.dirname(p) for p in paths}) if self.session.already_imported(self.toppath, dirs): - log.debug("Skipping previously-imported path: {0}", displayable_path(dirs)) + log.debug( + "Skipping previously-imported path: {0}", displayable_path(dirs) + ) self.skipped += 1 return None @@ -1360,7 +1430,8 @@ class ImportTaskFactory: if not (self.session.config["move"] or self.session.config["copy"]): log.warning( - "Archive importing requires either " "'copy' or 'move' to be enabled." + "Archive importing requires either " + "'copy' or 'move' to be enabled." ) return @@ -1473,7 +1544,7 @@ def query_tasks(session): @pipeline.mutator_stage -def lookup_candidates(session, task): +def lookup_candidates(session: ImportSession, task: ImportTask): """A coroutine for performing the initial MusicBrainz lookup for an album. It accepts lists of Items and yields (items, cur_artist, cur_album, candidates, rec) tuples. If no match @@ -1495,7 +1566,7 @@ def lookup_candidates(session, task): @pipeline.stage -def user_query(session, task): +def user_query(session: ImportSession, task: ImportTask): """A coroutine for interfacing with the user about the tagging process. @@ -1528,7 +1599,9 @@ def user_query(session, task): yield SentinelImportTask(task.toppath, task.paths) return _extend_pipeline( - emitter(task), lookup_candidates(session), user_query(session) + emitter(task), + lookup_candidates(session), + user_query(session), # type: ignore # Recursion in decorators ) # As albums: group items by albums and create task for each album @@ -1537,7 +1610,7 @@ def user_query(session, task): [task], group_albums(session), lookup_candidates(session), - user_query(session), + user_query(session), # type: ignore # Recursion in decorators ) resolve_duplicates(session, task) @@ -1559,7 +1632,9 @@ def user_query(session, task): ) return _extend_pipeline( - [merged_task], lookup_candidates(session), user_query(session) + [merged_task], + lookup_candidates(session), + user_query(session), # type: ignore # Recursion in decorators ) apply_choice(session, task) @@ -1573,7 +1648,9 @@ def resolve_duplicates(session, task): if task.choice_flag in (action.ASIS, action.APPLY, action.RETAG): found_duplicates = task.find_duplicates(session.lib) if found_duplicates: - log.debug("found duplicates: {}".format([o.id for o in found_duplicates])) + log.debug( + "found duplicates: {}".format([o.id for o in found_duplicates]) + ) # Get the default action to follow from config. duplicate_action = config["import"]["duplicate_action"].as_choice( @@ -1697,7 +1774,7 @@ def manipulate_files(session, task): @pipeline.stage -def log_files(session, task): +def log_files(session: ImportSession, task: ImportTask): """A coroutine (pipeline stage) to log each file to be imported.""" if isinstance(task, SingletonImportTask): log.info("Singleton: {0}", displayable_path(task.item["path"])) @@ -1753,8 +1830,8 @@ def albums_in_dir(path): containing any media files is an album. """ collapse_pat = collapse_paths = collapse_items = None - ignore = config["ignore"].as_str_seq() - ignore_hidden = config["ignore_hidden"].get(bool) + ignore = cast(list[str], config["ignore"].as_str_seq()) + ignore_hidden = cast(bool, config["ignore_hidden"].get(bool)) for root, dirs, files in sorted_walk( path, ignore=ignore, ignore_hidden=ignore_hidden, logger=log diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 32a63b216..e13b67604 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -198,8 +198,8 @@ def ancestry(path: AnyStr) -> list[AnyStr]: def sorted_walk( - path: AnyStr, - ignore: Sequence[bytes] = (), + path: PathLike, + ignore: Sequence[PathLike] = (), ignore_hidden: bool = False, logger: Logger | None = None, ) -> Iterator[tuple[bytes, Sequence[bytes], Sequence[bytes]]]: @@ -297,8 +297,8 @@ def fnmatch_all(names: Sequence[bytes], patterns: Sequence[bytes]) -> bool: def prune_dirs( - path: bytes, - root: bytes | None = None, + path: PathLike, + root: PathLike | None = None, clutter: Sequence[str] = (".DS_Store", "Thumbs.db"), ): """If path is an empty directory, then remove it. Recursively remove @@ -419,12 +419,13 @@ PATH_SEP: bytes = bytestring_path(os.sep) def displayable_path( - path: BytesOrStr | tuple[BytesOrStr, ...], separator: str = "; " + path: PathLike | Iterable[PathLike], separator: str = "; " ) -> str: """Attempts to decode a bytestring path to a unicode object for the purpose of displaying it to the user. If the `path` argument is a list or a tuple, the elements are joined with `separator`. """ + if isinstance(path, (list, tuple)): return separator.join(displayable_path(p) for p in path) elif isinstance(path, str): @@ -472,7 +473,7 @@ def samefile(p1: bytes, p2: bytes) -> bool: return False -def remove(path: bytes, soft: bool = True): +def remove(path: PathLike, soft: bool = True): """Remove the file. If `soft`, then no error will be raised if the file does not exist. """ diff --git a/beets/util/pipeline.py b/beets/util/pipeline.py index e79325fe2..5387186b4 100644 --- a/beets/util/pipeline.py +++ b/beets/util/pipeline.py @@ -31,7 +31,6 @@ To do so, pass an iterable of coroutines to the Pipeline constructor in place of any single coroutine. """ - import queue import sys from threading import Lock, Thread @@ -420,7 +419,9 @@ class Pipeline: for i in range(1, queue_count): for coro in self.stages[i]: threads.append( - MiddlePipelineThread(coro, queues[i - 1], queues[i], threads) + MiddlePipelineThread( + coro, queues[i - 1], queues[i], threads + ) ) # Last stage. diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 1305a21d3..fd6112d7c 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -74,7 +74,9 @@ class TestLyricsUtils: ("横山克", "Masaru Yokoyama", ["Masaru Yokoyama"]), ], ) - def test_search_pairs_artists(self, artist, artist_sort, expected_extra_artists): + def test_search_pairs_artists( + self, artist, artist_sort, expected_extra_artists + ): item = Item(artist=artist, artist_sort=artist_sort, title="song") actual_artists = [a for a, _ in lyrics.search_pairs(item)] @@ -99,7 +101,9 @@ class TestLyricsUtils: def test_search_pairs_titles(self, title, expected_extra_titles): item = Item(title=title, artist="A") - actual_titles = {t: None for _, tit in lyrics.search_pairs(item) for t in tit} + actual_titles = { + t: None for _, tit in lyrics.search_pairs(item) for t in tit + } assert list(actual_titles) == [title, *expected_extra_titles] @@ -241,7 +245,9 @@ class LyricsBackendTest(LyricsPluginMixin): @pytest.fixture def lyrics_html(self, lyrics_root_dir, file_name): - return (lyrics_root_dir / f"{file_name}.txt").read_text(encoding="utf-8") + return (lyrics_root_dir / f"{file_name}.txt").read_text( + encoding="utf-8" + ) @pytest.mark.on_lyrics_update From ed92f9b997af93b5d4bb0fca54d61d9ee4642dad Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Mon, 3 Feb 2025 11:37:33 +0100 Subject: [PATCH 10/48] Do not skip tests in ci. The return type of the stage decorator should in theory be `T|None` but the return of task types is not consistent in its usage. Would need some bigger changes for which I'm not ready at the moment. --- beets/util/pipeline.py | 8 ++++---- test/plugins/test_lyrics.py | 14 ++++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/beets/util/pipeline.py b/beets/util/pipeline.py index 5387186b4..d797dea13 100644 --- a/beets/util/pipeline.py +++ b/beets/util/pipeline.py @@ -160,7 +160,7 @@ R = TypeVar("R") def stage( func: Callable[ [Unpack[A], T], - R, + Optional[R], ], ): """Decorate a function to become a simple stage. @@ -176,7 +176,7 @@ def stage( [3, 4, 5] """ - def coro(*args: Unpack[A]) -> Generator[Union[R, T, None], T, None]: + def coro(*args: Unpack[A]) -> Generator[Union[R, T, None], T, R]: task = None while True: task = yield task @@ -185,7 +185,7 @@ def stage( return coro -def mutator_stage(func: Callable[[Unpack[A], T], None]): +def mutator_stage(func: Callable[[Unpack[A], T], R]): """Decorate a function that manipulates items in a coroutine to become a simple stage. @@ -200,7 +200,7 @@ def mutator_stage(func: Callable[[Unpack[A], T], None]): [{'x': True}, {'a': False, 'x': True}] """ - def coro(*args: Unpack[A]) -> Generator[Optional[T], T, None]: + def coro(*args: Unpack[A]) -> Generator[Union[T, None], T, None]: task = None while True: task = yield task diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index fd6112d7c..a3c640109 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -14,6 +14,8 @@ """Tests for the 'lyrics' plugin.""" +import importlib.util +import os import re from functools import partial from http import HTTPStatus @@ -22,15 +24,15 @@ import pytest from beets.library import Item from beets.test.helper import PluginMixin - -try: - from beetsplug import lyrics -except Exception: - pytest.skip("lyrics plugin couldn't be loaded", allow_module_level=True) - +from beetsplug import lyrics from .lyrics_pages import LyricsPage, lyrics_pages +github_ci = os.environ.get("GITHUB_ACTIONS") == "true" +if not github_ci and not importlib.util.find_spec("langdetect"): + pytest.skip("langdetect isn't available", allow_module_level=True) + + PHRASE_BY_TITLE = { "Lady Madonna": "friday night arrives without a suitcase", "Jazz'n'blues": "as i check my balance i kiss the screen", From f52079071376bc230a056c8e9edd9bf94205837c Mon Sep 17 00:00:00 2001 From: valrus Date: Sat, 11 Jan 2025 20:03:28 -0800 Subject: [PATCH 11/48] s/macthin/matching/ --- test/plugins/test_importadded.py | 2 +- test/test_importer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/plugins/test_importadded.py b/test/plugins/test_importadded.py index d48ec6c46..453824dfb 100644 --- a/test/plugins/test_importadded.py +++ b/test/plugins/test_importadded.py @@ -57,7 +57,7 @@ class ImportAddedTest(PluginMixin, ImportTestCase): os.path.getmtime(mfile.path) for mfile in self.import_media ) self.matcher = AutotagStub().install() - self.matcher.macthin = AutotagStub.GOOD + self.matcher.matching = AutotagStub.GOOD self.importer = self.setup_importer() self.importer.add_choice(importer.action.APPLY) diff --git a/test/test_importer.py b/test/test_importer.py index ad6b837f5..b282277e2 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -440,7 +440,7 @@ class ImportTest(ImportTestCase): self.prepare_album_for_import(1) self.setup_importer() self.matcher = AutotagStub().install() - self.matcher.macthin = AutotagStub.GOOD + self.matcher.matching = AutotagStub.GOOD def tearDown(self): super().tearDown() From d298738612323341e2462801b6d67acd28f02de3 Mon Sep 17 00:00:00 2001 From: valrus Date: Sat, 11 Jan 2025 20:03:38 -0800 Subject: [PATCH 12/48] add missing space in comment --- beets/test/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beets/test/helper.py b/beets/test/helper.py index 4effa47f8..c0d785d6e 100644 --- a/beets/test/helper.py +++ b/beets/test/helper.py @@ -675,7 +675,7 @@ class ImportSessionFixture(ImportSession): >>> importer.run() This imports ``/path/to/import`` into `lib`. It skips the first - album and imports thesecond one with metadata from the tags. For the + album and imports the second one with metadata from the tags. For the remaining albums, the metadata from the autotagger will be applied. """ From 99d2da66dc8196c60a85786c5d0f965b4f0a4c45 Mon Sep 17 00:00:00 2001 From: valrus Date: Sat, 11 Jan 2025 20:41:19 -0800 Subject: [PATCH 13/48] use actual value of matcher, not typo'd one --- test/plugins/test_importadded.py | 2 +- test/test_importer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/plugins/test_importadded.py b/test/plugins/test_importadded.py index 453824dfb..608afb399 100644 --- a/test/plugins/test_importadded.py +++ b/test/plugins/test_importadded.py @@ -57,7 +57,7 @@ class ImportAddedTest(PluginMixin, ImportTestCase): os.path.getmtime(mfile.path) for mfile in self.import_media ) self.matcher = AutotagStub().install() - self.matcher.matching = AutotagStub.GOOD + self.matcher.matching = AutotagStub.IDENT self.importer = self.setup_importer() self.importer.add_choice(importer.action.APPLY) diff --git a/test/test_importer.py b/test/test_importer.py index b282277e2..a28b646cf 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -440,7 +440,7 @@ class ImportTest(ImportTestCase): self.prepare_album_for_import(1) self.setup_importer() self.matcher = AutotagStub().install() - self.matcher.matching = AutotagStub.GOOD + self.matcher.matching = AutotagStub.IDENT def tearDown(self): super().tearDown() From 23f4f8261c157a00abd2a401fc8b6c483c8582bb Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Tue, 4 Feb 2025 16:38:24 +0100 Subject: [PATCH 14/48] Added some more typehints --- beets/importer.py | 82 ++++++++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index a81c97df0..8f64e25df 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -185,7 +185,7 @@ class ImportSession: """ logger: logging.Logger - paths: Union[list[bytes], None] + paths: Union[list[bytes], None] = None lib: library.Library _is_resuming: Dict[bytes, bool] @@ -223,8 +223,6 @@ class ImportSession: # Normalize the paths. if paths is not None: self.paths = list(map(normpath, paths)) - else: - self.paths = None def _setup_logging(self, loghandler: Optional[logging.Handler]): logger = logging.getLogger(__name__) @@ -286,13 +284,13 @@ class ImportSession: self.want_resume = config["resume"].as_choice([True, False, "ask"]) - def tag_log(self, status, paths): + def tag_log(self, status, paths: Sequence[PathLike]): """Log a message about a given album to the importer log. The status should reflect the reason the album couldn't be tagged. """ self.logger.info("{0} {1}", status, displayable_path(paths)) - def log_choice(self, task, duplicate=False): + def log_choice(self, task: ImportTask, duplicate=False): """Logs the task's current choice if it should be logged. If ``duplicate``, then this is a secondary choice after a duplicate was detected and a decision was made. @@ -313,22 +311,22 @@ class ImportSession: elif task.choice_flag is action.SKIP: self.tag_log("skip", paths) - def should_resume(self, path): + def should_resume(self, path: PathLike): raise NotImplementedError( "Inheriting class must implement `should_resume`" ) - def choose_match(self, task): + def choose_match(self, task: ImportTask): raise NotImplementedError( "Inheriting class must implement `choose_match`" ) - def resolve_duplicate(self, task, found_duplicates): + def resolve_duplicate(self, task: ImportTask, found_duplicates): raise NotImplementedError( "Inheriting class must implement `resolve_duplicate`" ) - def choose_item(self, task): + def choose_item(self, task: ImportTask): raise NotImplementedError( "Inheriting class must implement `choose_item`" ) @@ -522,12 +520,16 @@ class ImportTask(BaseImportTask): system. """ + choice_flag: Optional[action] = None + match: Union[autotag.AlbumMatch, autotag.TrackMatch, None] = None + + # Keep track of the current task item + cur_album: Optional[str] = None + cur_artist: Optional[str] = None + candidates: Sequence[autotag.AlbumMatch | autotag.TrackMatch] = [] + def __init__(self, toppath, paths, items): super().__init__(toppath, paths, items) - self.choice_flag = None - self.cur_album = None - self.cur_artist = None - self.candidates = [] self.rec = None self.should_remove_duplicates = False self.should_merge_duplicates = False @@ -622,13 +624,13 @@ class ImportTask(BaseImportTask): autotag.apply_metadata(self.match.info, self.match.mapping) - def duplicate_items(self, lib): + def duplicate_items(self, lib: library.Library): duplicate_items = [] for album in self.find_duplicates(lib): duplicate_items += album.items() return duplicate_items - def remove_duplicates(self, lib): + def remove_duplicates(self, lib: library.Library): duplicate_items = self.duplicate_items(lib) log.debug("removing {0} old duplicated items", len(duplicate_items)) for item in duplicate_items: @@ -640,7 +642,7 @@ class ImportTask(BaseImportTask): util.remove(item.path) util.prune_dirs(os.path.dirname(item.path), lib.directory) - def set_fields(self, lib): + def set_fields(self, lib: library.Library): """Sets the fields given at CLI or configuration to the specified values, for both the album and all its items. """ @@ -661,7 +663,7 @@ class ImportTask(BaseImportTask): item.store() self.album.store() - def finalize(self, session): + def finalize(self, session: ImportSession): """Save progress, clean up files, and emit plugin event.""" # Update progress. if session.want_resume: @@ -706,7 +708,7 @@ class ImportTask(BaseImportTask): def _emit_imported(self, lib: library.Library): plugins.send("album_imported", lib=lib, album=self.album) - def handle_created(self, session): + def handle_created(self, session: ImportSession): """Send the `import_task_created` event for this task. Return a list of tasks that should continue through the pipeline. By default, this is a list containing only the task itself, but plugins can replace the task @@ -733,7 +735,7 @@ class ImportTask(BaseImportTask): self.candidates = prop.candidates self.rec = prop.recommendation - def find_duplicates(self, lib): + def find_duplicates(self, lib: library.Library): """Return a list of albums from `lib` with the same artist and album name as the task. """ @@ -855,7 +857,7 @@ class ImportTask(BaseImportTask): plugins.send("import_task_files", session=session, task=self) - def add(self, lib): + def add(self, lib: library.Library): """Add the items as an album to the library and remove replaced items.""" self.align_album_level_fields() with lib.transaction(): @@ -1101,7 +1103,7 @@ class SingletonImportTask(ImportTask): def infer_album_fields(self): raise NotImplementedError - def choose_match(self, session): + def choose_match(self, session: ImportSession): """Ask the session which match should apply and apply it.""" choice = session.choose_item(self) self.set_choice(choice) @@ -1285,7 +1287,7 @@ class ImportTaskFactory: indicated by a path. """ - def __init__(self, toppath, session): + def __init__(self, toppath: PathLike, session: ImportSession): """Create a new task factory. `toppath` is the user-specified path to search for music to @@ -1338,7 +1340,7 @@ class ImportTaskFactory: else: yield self.sentinel() - def _create(self, task): + def _create(self, task: Optional[ImportTask]): """Handle a new task to be emitted by the factory. Emit the `import_task_created` event and increment the @@ -1371,7 +1373,7 @@ class ImportTaskFactory: for dirs, paths in albums_in_dir(self.toppath): yield dirs, paths - def singleton(self, path): + def singleton(self, path: PathLike): """Return a `SingletonImportTask` for the music file.""" if self.session.already_imported(self.toppath, [path]): log.debug( @@ -1386,7 +1388,7 @@ class ImportTaskFactory: else: return None - def album(self, paths, dirs=None): + def album(self, paths: Iterable[PathLike], dirs=None): """Return a `ImportTask` with all media files from paths. `dirs` is a list of parent directories used to record already @@ -1413,7 +1415,7 @@ class ImportTaskFactory: else: return None - def sentinel(self, paths=None): + def sentinel(self, paths: Optional[Iterable[PathLike]] = None): """Return a `SentinelImportTask` indicating the end of a top-level directory import. """ @@ -1448,7 +1450,7 @@ class ImportTaskFactory: log.debug("Archive extracted to: {0}", self.toppath) return archive_task - def read_item(self, path): + def read_item(self, path: PathLike): """Return an `Item` read from the path. If an item cannot be read, return `None` instead and log an @@ -1491,12 +1493,16 @@ def _extend_pipeline(tasks, *stages): # Full-album pipeline stages. -def read_tasks(session): +def read_tasks(session: ImportSession): """A generator yielding all the albums (as ImportTask objects) found in the user-specified list of paths. In the case of a singleton import, yields single-item tasks instead. """ skipped = 0 + if session.paths is None: + log.warning("No path specified in session.") + return + for toppath in session.paths: # Check whether we need to resume the import. session.ask_resume(toppath) @@ -1514,7 +1520,7 @@ def read_tasks(session): log.info("Skipped {0} paths.", skipped) -def query_tasks(session): +def query_tasks(session: ImportSession): """A generator that works as a drop-in-replacement for read_tasks. Instead of finding files from the filesystem, a query is used to match items from the library. @@ -1641,7 +1647,7 @@ def user_query(session: ImportSession, task: ImportTask): return task -def resolve_duplicates(session, task): +def resolve_duplicates(session: ImportSession, task: ImportTask): """Check if a task conflicts with items or albums already imported and ask the session to resolve this. """ @@ -1684,7 +1690,7 @@ def resolve_duplicates(session, task): @pipeline.mutator_stage -def import_asis(session, task): +def import_asis(session: ImportSession, task: ImportTask): """Select the `action.ASIS` choice for all tasks. This stage replaces the initial_lookup and user_query stages @@ -1698,7 +1704,7 @@ def import_asis(session, task): apply_choice(session, task) -def apply_choice(session, task): +def apply_choice(session: ImportSession, task: ImportTask): """Apply the task's choice to the Album or Item it contains and add it to the library. """ @@ -1722,7 +1728,11 @@ def apply_choice(session, task): @pipeline.mutator_stage -def plugin_stage(session, func, task): +def plugin_stage( + session: ImportSession, + func: Callable[[ImportSession, ImportTask]], + task: ImportTask, +): """A coroutine (pipeline stage) that calls the given function with each non-skipped import task. These stages occur between applying metadata changes and moving/copying/writing files. @@ -1739,7 +1749,7 @@ def plugin_stage(session, func, task): @pipeline.stage -def manipulate_files(session, task): +def manipulate_files(session: ImportSession, task: ImportTask): """A coroutine (pipeline stage) that performs necessary file manipulations *after* items have been added to the library and finalizes each task. @@ -1784,7 +1794,7 @@ def log_files(session: ImportSession, task: ImportTask): log.info(" {0}", displayable_path(item["path"])) -def group_albums(session): +def group_albums(session: ImportSession): """A pipeline stage that groups the items of each task into albums using their metadata. @@ -1823,7 +1833,7 @@ def is_subdir_of_any_in_list(path, dirs): return any(d in ancestors for d in dirs) -def albums_in_dir(path): +def albums_in_dir(path: PathLike): """Recursively searches the given directory and returns an iterable of (paths, items) where paths is a list of directories and items is a list of Items that is probably an album. Specifically, any folder From 12b21c48e9fc02979167bf14e414d94509319b3f Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Fri, 7 Feb 2025 16:55:26 +0100 Subject: [PATCH 15/48] Changelog addition --- docs/changelog.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 62cd0c4cc..33a4710e6 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -83,6 +83,8 @@ Other changes: wrong (outdated) commit. Now the tag is created in the same workflow step right after committing the version update. :bug:`5539` +* Added some typehints: ImportSession and Pipeline have typehints now. Should + improve useability for new developers. 2.2.0 (December 02, 2024) ------------------------- From 12fa3432a9a2ba447e211365166e875963033b03 Mon Sep 17 00:00:00 2001 From: seth-milojevic <52462300+seth-milojevic@users.noreply.github.com> Date: Sat, 8 Feb 2025 00:20:10 -0500 Subject: [PATCH 16/48] Close file descriptor generated from tempfile.mkstemp() Without explicitly closing this file descriptor, the temp file would be kept open until the program exited and could not be deleted by the fetchart plugin. --- beets/util/__init__.py | 5 ++++- docs/changelog.rst | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 32a63b216..ac4e3bc3c 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -1130,7 +1130,10 @@ def get_temp_filename( tempdir = get_module_tempdir(module) tempdir.mkdir(parents=True, exist_ok=True) - _, filename = tempfile.mkstemp(dir=tempdir, prefix=prefix, suffix=suffix) + descriptor, filename = tempfile.mkstemp( + dir=tempdir, prefix=prefix, suffix=suffix + ) + os.close(descriptor) return bytestring_path(filename) diff --git a/docs/changelog.rst b/docs/changelog.rst index 62cd0c4cc..b39e1b291 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -22,6 +22,9 @@ New features: Bug fixes: +* Fix fetchart bug where a tempfile could not be deleted due to never being + properly closed. + :bug:`5521` * :doc:`plugins/lyrics`: LRCLib will fallback to plain lyrics if synced lyrics are not found and `synced` flag is set to `yes`. * Synchronise files included in the source distribution with what we used to From 5685cf43cf14a03e7be2e8d3c16866c8ea772daa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sat, 8 Feb 2025 08:52:35 +0000 Subject: [PATCH 17/48] Align musica lyrics source expected lyrics with whats returned --- test/plugins/lyrics_pages.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/test/plugins/lyrics_pages.py b/test/plugins/lyrics_pages.py index 84c9e2441..2d681e111 100644 --- a/test/plugins/lyrics_pages.py +++ b/test/plugins/lyrics_pages.py @@ -456,16 +456,6 @@ lyrics_pages = [ LyricsPage.make( "https://www.musica.com/letras.asp?letra=59862", """ - Lady Madonna, children at your feet - Wonder how you manage to make ends meet - Who finds the money when you pay the rent? - Did you think that money was heaven sent? - - Friday night arrives without a suitcase - Sunday morning creeping like a nun - Monday's child has learned to tie his bootlace - See how they run - Lady Madonna, baby at your breast Wonders how you manage to feed the rest From 6205e19b74bc529d3b0a5a95bccd142dd5b1c127 Mon Sep 17 00:00:00 2001 From: seth-milojevic <52462300+seth-milojevic@users.noreply.github.com> Date: Sat, 8 Feb 2025 12:41:37 -0500 Subject: [PATCH 18/48] Update docs/changelog.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Šarūnas Nejus --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index b39e1b291..fd1149178 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -22,7 +22,7 @@ New features: Bug fixes: -* Fix fetchart bug where a tempfile could not be deleted due to never being +* :doc:`plugins/fetchart`: Fix fetchart bug where a tempfile could not be deleted due to never being properly closed. :bug:`5521` * :doc:`plugins/lyrics`: LRCLib will fallback to plain lyrics if synced lyrics From 10c0aa3f6ab442bf8144bec2dec85ca908617749 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 8 Feb 2025 22:03:03 +0100 Subject: [PATCH 19/48] Replaced pathlike with pathbytes and remove unnecessary type ignores --- beets/importer.py | 118 ++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 57 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 8f64e25df..202e8f1ef 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -15,14 +15,13 @@ from collections import defaultdict from dataclasses import dataclass from enum import Enum from tempfile import mkdtemp -from typing import Callable, Dict, Iterable, Optional, Sequence, Union, cast +from typing import Callable, Iterable, Optional, Sequence, cast import mediafile from beets import autotag, config, dbcore, library, logging, plugins, util from beets.util import ( MoveOperation, - PathLike, ancestry, displayable_path, normpath, @@ -61,6 +60,10 @@ REIMPORT_FRESH_FIELDS_ITEM = list(REIMPORT_FRESH_FIELDS_ALBUM) # Global logger. log = logging.getLogger("beets") +# Here for now to allow for a easy replace later on +# once we can move to a PathLike +PathBytes = bytes + class ImportAbortError(Exception): """Raised when the user aborts the tagging operation.""" @@ -94,12 +97,12 @@ class ImportState: ``` """ - tagprogress: dict - taghistory: set - path: PathLike + tagprogress: dict[PathBytes, list[PathBytes]] + taghistory: set[tuple[PathBytes, ...]] + path: PathBytes - def __init__(self, readonly=False, path: Union[PathLike, None] = None): - self.path = path or cast(PathLike, config["statefile"].as_filename()) + def __init__(self, readonly=False, path: Optional[PathBytes] = None): + self.path = path or cast(PathBytes, config["statefile"].as_filename()) self.tagprogress = {} self.taghistory = set() self._open() @@ -141,7 +144,7 @@ class ImportState: # -------------------------------- Tagprogress ------------------------------- # - def progress_add(self, toppath: PathLike, *paths: PathLike): + def progress_add(self, toppath: PathBytes, *paths: PathBytes): """Record that the files under all of the `paths` have been imported under `toppath`. """ @@ -153,19 +156,19 @@ class ImportState: else: insort(imported, path) - def progress_has_element(self, toppath: PathLike, path: PathLike) -> bool: + def progress_has_element(self, toppath: PathBytes, path: PathBytes) -> bool: """Return whether `path` has been imported in `toppath`.""" imported = self.tagprogress.get(toppath, []) i = bisect_left(imported, path) return i != len(imported) and imported[i] == path - def progress_has(self, toppath: PathLike) -> bool: + def progress_has(self, toppath: PathBytes) -> bool: """Return `True` if there exist paths that have already been imported under `toppath`. """ return toppath in self.tagprogress - def progress_reset(self, toppath: PathLike): + def progress_reset(self, toppath: PathBytes): """Reset the progress for `toppath`.""" with self as state: if toppath in state.tagprogress: @@ -173,7 +176,7 @@ class ImportState: # -------------------------------- Taghistory -------------------------------- # - def history_add(self, paths: list[PathLike]): + def history_add(self, paths: list[PathBytes]): """Add the paths to the history.""" with self as state: state.taghistory.add(tuple(paths)) @@ -185,18 +188,18 @@ class ImportSession: """ logger: logging.Logger - paths: Union[list[bytes], None] = None + paths: Optional[list[bytes]] = None lib: library.Library - _is_resuming: Dict[bytes, bool] - _merged_items: set - _merged_dirs: set + _is_resuming: dict[bytes, bool] + _merged_items: set[PathBytes] + _merged_dirs: set[PathBytes] def __init__( self, lib: library.Library, loghandler: Optional[logging.Handler], - paths: Optional[Sequence[PathLike]], + paths: Optional[Sequence[PathBytes]], query: Optional[dbcore.Query], ): """Create a session. @@ -284,7 +287,7 @@ class ImportSession: self.want_resume = config["resume"].as_choice([True, False, "ask"]) - def tag_log(self, status, paths: Sequence[PathLike]): + def tag_log(self, status, paths: Sequence[PathBytes]): """Log a message about a given album to the importer log. The status should reflect the reason the album couldn't be tagged. """ @@ -311,7 +314,7 @@ class ImportSession: elif task.choice_flag is action.SKIP: self.tag_log("skip", paths) - def should_resume(self, path: PathLike): + def should_resume(self, path: PathBytes): raise NotImplementedError( "Inheriting class must implement `should_resume`" ) @@ -383,7 +386,7 @@ class ImportSession: # Incremental and resumed imports - def already_imported(self, toppath: PathLike, paths: Sequence[PathLike]): + def already_imported(self, toppath: PathBytes, paths: Sequence[PathBytes]): """Returns true if the files belonging to this task have already been imported in a previous session. """ @@ -404,7 +407,7 @@ class ImportSession: self._history_dirs = ImportState().taghistory return self._history_dirs - def already_merged(self, paths: Sequence[PathLike]): + def already_merged(self, paths: Sequence[PathBytes]): """Returns true if all the paths being imported were part of a merge during previous tasks. """ @@ -413,7 +416,7 @@ class ImportSession: return False return True - def mark_merged(self, paths: Sequence[PathLike]): + def mark_merged(self, paths: Sequence[PathBytes]): """Mark paths and directories as merged for future reimport tasks.""" self._merged_items.update(paths) dirs = { @@ -422,14 +425,14 @@ class ImportSession: } self._merged_dirs.update(dirs) - def is_resuming(self, toppath: PathLike): + def is_resuming(self, toppath: PathBytes): """Return `True` if user wants to resume import of this path. You have to call `ask_resume` first to determine the return value. """ return self._is_resuming.get(normpath(toppath), False) - def ask_resume(self, toppath: PathLike): + def ask_resume(self, toppath: PathBytes): """If import of `toppath` was aborted in an earlier session, ask user if they want to resume the import. @@ -457,14 +460,14 @@ class BaseImportTask: Tasks flow through the importer pipeline. Each stage can update them.""" - toppath: Optional[PathLike] - paths: list[PathLike] + toppath: Optional[PathBytes] + paths: list[PathBytes] items: list[library.Item] def __init__( self, - toppath: Optional[PathLike], - paths: Optional[Iterable[PathLike]], + toppath: Optional[PathBytes], + paths: Optional[Iterable[PathBytes]], items: Optional[Iterable[library.Item]], ): """Create a task. The primary fields that define a task are: @@ -521,7 +524,7 @@ class ImportTask(BaseImportTask): """ choice_flag: Optional[action] = None - match: Union[autotag.AlbumMatch, autotag.TrackMatch, None] = None + match: autotag.AlbumMatch | autotag.TrackMatch | None = None # Keep track of the current task item cur_album: Optional[str] = None @@ -537,7 +540,7 @@ class ImportTask(BaseImportTask): self.search_ids = [] # user-supplied candidate IDs. def set_choice( - self, choice: Union[action, autotag.AlbumMatch, autotag.TrackMatch] + self, choice: action | autotag.AlbumMatch | autotag.TrackMatch ): """Given an AlbumMatch or TrackMatch object or an action constant, indicates that an action has been selected for this task. @@ -547,6 +550,7 @@ class ImportTask(BaseImportTask): """ # Not part of the task structure: assert choice != action.APPLY # Only used internally. + if choice in ( action.SKIP, action.ASIS, @@ -554,11 +558,11 @@ class ImportTask(BaseImportTask): action.ALBUMS, action.RETAG, ): - self.choice_flag = choice + self.choice_flag = cast(action, choice) self.match = None else: self.choice_flag = action.APPLY # Implicit choice. - self.match = choice + self.match = cast(autotag.AlbumMatch | autotag.TrackMatch, choice) def save_progress(self): """Updates the progress state to indicate that this album has @@ -827,7 +831,7 @@ class ImportTask(BaseImportTask): items = self.imported_items() # Save the original paths of all items for deletion and pruning # in the next step (finalization). - self.old_paths: list[PathLike] = [item.path for item in items] + self.old_paths: list[PathBytes] = [item.path for item in items] for item in items: if operation is not None: # In copy and link modes, treat re-imports specially: @@ -882,7 +886,7 @@ class ImportTask(BaseImportTask): and `replaced_albums` dictionaries. """ self.replaced_items = defaultdict(list) - self.replaced_albums: dict[PathLike, library.Album] = defaultdict() + self.replaced_albums: dict[PathBytes, library.Album] = defaultdict() replaced_album_ids = set() for item in self.imported_items(): dup_items = list( @@ -1033,7 +1037,7 @@ class ImportTask(BaseImportTask): class SingletonImportTask(ImportTask): """ImportTask for a single track that is not associated to an album.""" - def __init__(self, toppath: Optional[PathLike], item: library.Item): + def __init__(self, toppath: Optional[PathBytes], item: library.Item): super().__init__(toppath, [item.path], [item]) self.item = item self.is_album = False @@ -1287,7 +1291,7 @@ class ImportTaskFactory: indicated by a path. """ - def __init__(self, toppath: PathLike, session: ImportSession): + def __init__(self, toppath: PathBytes, session: ImportSession): """Create a new task factory. `toppath` is the user-specified path to search for music to @@ -1314,6 +1318,7 @@ class ImportTaskFactory: extracted data. """ # Check whether this is an archive. + archive_task: ArchiveImportTask | None = None if self.is_archive: archive_task = self.unarchive() if not archive_task: @@ -1335,8 +1340,8 @@ class ImportTaskFactory: # it is finished. This is usually just a SentinelImportTask, but # for archive imports, send the archive task instead (to remove # the extracted directory). - if self.is_archive: - yield archive_task # type: ignore + if archive_task: + yield archive_task else: yield self.sentinel() @@ -1373,7 +1378,7 @@ class ImportTaskFactory: for dirs, paths in albums_in_dir(self.toppath): yield dirs, paths - def singleton(self, path: PathLike): + def singleton(self, path: PathBytes): """Return a `SingletonImportTask` for the music file.""" if self.session.already_imported(self.toppath, [path]): log.debug( @@ -1388,7 +1393,7 @@ class ImportTaskFactory: else: return None - def album(self, paths: Iterable[PathLike], dirs=None): + def album(self, paths: Iterable[PathBytes], dirs=None): """Return a `ImportTask` with all media files from paths. `dirs` is a list of parent directories used to record already @@ -1407,15 +1412,16 @@ class ImportTaskFactory: self.skipped += 1 return None - items = map(self.read_item, paths) - items = [item for item in items if item] + items: list[library.Item] = [ + item for item in map(self.read_item, paths) if item + ] if items: return ImportTask(self.toppath, dirs, items) else: return None - def sentinel(self, paths: Optional[Iterable[PathLike]] = None): + def sentinel(self, paths: Optional[Iterable[PathBytes]] = None): """Return a `SentinelImportTask` indicating the end of a top-level directory import. """ @@ -1450,7 +1456,7 @@ class ImportTaskFactory: log.debug("Archive extracted to: {0}", self.toppath) return archive_task - def read_item(self, path: PathLike): + def read_item(self, path: PathBytes): """Return an `Item` read from the path. If an item cannot be read, return `None` instead and log an @@ -1528,9 +1534,9 @@ def query_tasks(session: ImportSession): if session.config["singletons"]: # Search for items. for item in session.lib.items(session.query): - task = SingletonImportTask(None, item) - for task in task.handle_created(session): - yield task + singleton_task = SingletonImportTask(None, item) + for task in singleton_task.handle_created(session): + yield singleton_task else: # Search for albums. @@ -1607,7 +1613,7 @@ def user_query(session: ImportSession, task: ImportTask): return _extend_pipeline( emitter(task), lookup_candidates(session), - user_query(session), # type: ignore # Recursion in decorators + user_query(session), ) # As albums: group items by albums and create task for each album @@ -1616,7 +1622,7 @@ def user_query(session: ImportSession, task: ImportTask): [task], group_albums(session), lookup_candidates(session), - user_query(session), # type: ignore # Recursion in decorators + user_query(session), ) resolve_duplicates(session, task) @@ -1638,9 +1644,7 @@ def user_query(session: ImportSession, task: ImportTask): ) return _extend_pipeline( - [merged_task], - lookup_candidates(session), - user_query(session), # type: ignore # Recursion in decorators + [merged_task], lookup_candidates(session), user_query(session) ) apply_choice(session, task) @@ -1730,7 +1734,7 @@ def apply_choice(session: ImportSession, task: ImportTask): @pipeline.mutator_stage def plugin_stage( session: ImportSession, - func: Callable[[ImportSession, ImportTask]], + func: Callable[[ImportSession, ImportTask], None], task: ImportTask, ): """A coroutine (pipeline stage) that calls the given function with @@ -1811,10 +1815,10 @@ def group_albums(session: ImportSession): if task.skip: continue tasks = [] - sorted_items = sorted(task.items, key=group) + sorted_items: list[library.Item] = sorted(task.items, key=group) for _, items in itertools.groupby(sorted_items, group): - items = list(items) - task = ImportTask(task.toppath, [i.path for i in items], items) + l_items = list(items) + task = ImportTask(task.toppath, [i.path for i in l_items], l_items) tasks += task.handle_created(session) tasks.append(SentinelImportTask(task.toppath, task.paths)) @@ -1833,7 +1837,7 @@ def is_subdir_of_any_in_list(path, dirs): return any(d in ancestors for d in dirs) -def albums_in_dir(path: PathLike): +def albums_in_dir(path: PathBytes): """Recursively searches the given directory and returns an iterable of (paths, items) where paths is a list of directories and items is a list of Items that is probably an album. Specifically, any folder From bbe4fb454b21a32869ffd807d1a6ea8e31ba8a48 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 8 Feb 2025 22:12:55 +0100 Subject: [PATCH 20/48] Renamed ignore to _ignore to prevent mypy error --- beets/util/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index e13b67604..411202075 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -210,7 +210,9 @@ def sorted_walk( """ # Make sure the paths aren't Unicode strings. bytes_path = bytestring_path(path) - ignore = [bytestring_path(i) for i in ignore] + _ignore = [ # rename prevents mypy variable shadowing issue + bytestring_path(i) for i in ignore + ] # Get all the directories and files at this level. try: @@ -230,7 +232,7 @@ def sorted_walk( # Skip ignored filenames. skip = False - for pat in ignore: + for pat in _ignore: if fnmatch.fnmatch(base, pat): if logger: logger.debug( @@ -257,7 +259,7 @@ def sorted_walk( # Recurse into directories. for base in dirs: cur = os.path.join(bytes_path, base) - yield from sorted_walk(cur, ignore, ignore_hidden, logger) + yield from sorted_walk(cur, _ignore, ignore_hidden, logger) def path_as_posix(path: bytes) -> bytes: From 44074d746449eb7dc157eeb39e53722f67476a39 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 8 Feb 2025 22:19:37 +0100 Subject: [PATCH 21/48] Readded copyright and union. --- beets/importer.py | 96 +++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 58 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 202e8f1ef..22f6273df 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1,3 +1,17 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + """Provides the basic, interface-agnostic workflow for importing and autotagging music files. """ @@ -15,7 +29,7 @@ from collections import defaultdict from dataclasses import dataclass from enum import Enum from tempfile import mkdtemp -from typing import Callable, Iterable, Optional, Sequence, cast +from typing import Callable, Iterable, Optional, Sequence, Union, cast import mediafile @@ -255,9 +269,7 @@ class ImportSession: iconfig["incremental"] = False if iconfig["reflink"]: - iconfig["reflink"] = iconfig["reflink"].as_choice( - ["auto", True, False] - ) + iconfig["reflink"] = iconfig["reflink"].as_choice(["auto", True, False]) # Copy, move, reflink, link, and hardlink are mutually exclusive. if iconfig["move"]: @@ -315,24 +327,16 @@ class ImportSession: self.tag_log("skip", paths) def should_resume(self, path: PathBytes): - raise NotImplementedError( - "Inheriting class must implement `should_resume`" - ) + raise NotImplementedError("Inheriting class must implement `should_resume`") def choose_match(self, task: ImportTask): - raise NotImplementedError( - "Inheriting class must implement `choose_match`" - ) + raise NotImplementedError("Inheriting class must implement `choose_match`") def resolve_duplicate(self, task: ImportTask, found_duplicates): - raise NotImplementedError( - "Inheriting class must implement `resolve_duplicate`" - ) + raise NotImplementedError("Inheriting class must implement `resolve_duplicate`") def choose_item(self, task: ImportTask): - raise NotImplementedError( - "Inheriting class must implement `choose_item`" - ) + raise NotImplementedError("Inheriting class must implement `choose_item`") def run(self): """Run the import task.""" @@ -539,9 +543,7 @@ class ImportTask(BaseImportTask): self.is_album = True self.search_ids = [] # user-supplied candidate IDs. - def set_choice( - self, choice: action | autotag.AlbumMatch | autotag.TrackMatch - ): + def set_choice(self, choice: action | autotag.AlbumMatch | autotag.TrackMatch): """Given an AlbumMatch or TrackMatch object or an action constant, indicates that an action has been selected for this task. @@ -562,7 +564,8 @@ class ImportTask(BaseImportTask): self.match = None else: self.choice_flag = action.APPLY # Implicit choice. - self.match = cast(autotag.AlbumMatch | autotag.TrackMatch, choice) + # Union is needed here for python 3.9 compatibility! + self.match = cast(Union[autotag.AlbumMatch, autotag.TrackMatch], choice) def save_progress(self): """Updates the progress state to indicate that this album has @@ -640,9 +643,7 @@ class ImportTask(BaseImportTask): for item in duplicate_items: item.remove() if lib.directory in util.ancestry(item.path): - log.debug( - "deleting duplicate {0}", util.displayable_path(item.path) - ) + log.debug("deleting duplicate {0}", util.displayable_path(item.path)) util.remove(item.path) util.prune_dirs(os.path.dirname(item.path), lib.directory) @@ -674,7 +675,8 @@ class ImportTask(BaseImportTask): self.save_progress() if session.config["incremental"] and not ( # Should we skip recording to incremental list? - self.skip and session.config["incremental_skip_later"] + self.skip + and session.config["incremental_skip_later"] ): self.save_history() @@ -731,9 +733,7 @@ class ImportTask(BaseImportTask): candidate IDs are stored in self.search_ids: if present, the initial lookup is restricted to only those IDs. """ - artist, album, prop = autotag.tag_album( - self.items, search_ids=self.search_ids - ) + artist, album, prop = autotag.tag_album(self.items, search_ids=self.search_ids) self.cur_artist = artist self.cur_album = album self.candidates = prop.candidates @@ -753,9 +753,7 @@ class ImportTask(BaseImportTask): # Construct a query to find duplicates with this metadata. We # use a temporary Album object to generate any computed fields. tmp_album = library.Album(lib, **info) - keys = cast( - list[str], config["import"]["duplicate_keys"]["album"].as_str_seq() - ) + keys = cast(list[str], config["import"]["duplicate_keys"]["album"].as_str_seq()) dup_query = tmp_album.duplicates_query(keys) # Don't count albums with the same files as duplicates. @@ -786,8 +784,7 @@ class ImportTask(BaseImportTask): [i.albumartist or i.artist for i in self.items] ) if freq == len(self.items) or ( - freq > 1 - and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH + freq > 1 and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH ): # Single-artist album. changes["albumartist"] = plur_albumartist @@ -889,15 +886,10 @@ class ImportTask(BaseImportTask): self.replaced_albums: dict[PathBytes, library.Album] = defaultdict() replaced_album_ids = set() for item in self.imported_items(): - dup_items = list( - lib.items(dbcore.query.BytesQuery("path", item.path)) - ) + dup_items = list(lib.items(dbcore.query.BytesQuery("path", item.path))) self.replaced_items[item] = dup_items for dup_item in dup_items: - if ( - not dup_item.album_id - or dup_item.album_id in replaced_album_ids - ): + if not dup_item.album_id or dup_item.album_id in replaced_album_ids: continue replaced_album = dup_item._cached_album if replaced_album: @@ -950,8 +942,7 @@ class ImportTask(BaseImportTask): self.album.artpath = replaced_album.artpath self.album.store() log.debug( - "Reimported album {}. Preserving attribute ['added']. " - "Path: {}", + "Reimported album {}. Preserving attribute ['added']. " "Path: {}", self.album.id, displayable_path(self.album.path), ) @@ -1083,9 +1074,7 @@ class SingletonImportTask(ImportTask): # Query for existing items using the same metadata. We use a # temporary `Item` object to generate any computed fields. tmp_item = library.Item(lib, **info) - keys = cast( - list[str], config["import"]["duplicate_keys"]["item"].as_str_seq() - ) + keys = cast(list[str], config["import"]["duplicate_keys"]["item"].as_str_seq()) dup_query = tmp_item.duplicates_query(keys) found_items = [] @@ -1259,9 +1248,7 @@ class ArchiveImportTask(SentinelImportTask): break if handler_class is None: - raise ValueError( - "No handler found for archive: {0}".format(self.toppath) - ) + raise ValueError("No handler found for archive: {0}".format(self.toppath)) extract_to = mkdtemp() archive = handler_class(os.fsdecode(self.toppath), mode="r") @@ -1381,9 +1368,7 @@ class ImportTaskFactory: def singleton(self, path: PathBytes): """Return a `SingletonImportTask` for the music file.""" if self.session.already_imported(self.toppath, [path]): - log.debug( - "Skipping previously-imported path: {0}", displayable_path(path) - ) + log.debug("Skipping previously-imported path: {0}", displayable_path(path)) self.skipped += 1 return None @@ -1406,9 +1391,7 @@ class ImportTaskFactory: dirs = list({os.path.dirname(p) for p in paths}) if self.session.already_imported(self.toppath, dirs): - log.debug( - "Skipping previously-imported path: {0}", displayable_path(dirs) - ) + log.debug("Skipping previously-imported path: {0}", displayable_path(dirs)) self.skipped += 1 return None @@ -1438,8 +1421,7 @@ class ImportTaskFactory: if not (self.session.config["move"] or self.session.config["copy"]): log.warning( - "Archive importing requires either " - "'copy' or 'move' to be enabled." + "Archive importing requires either " "'copy' or 'move' to be enabled." ) return @@ -1658,9 +1640,7 @@ def resolve_duplicates(session: ImportSession, task: ImportTask): if task.choice_flag in (action.ASIS, action.APPLY, action.RETAG): found_duplicates = task.find_duplicates(session.lib) if found_duplicates: - log.debug( - "found duplicates: {}".format([o.id for o in found_duplicates]) - ) + log.debug("found duplicates: {}".format([o.id for o in found_duplicates])) # Get the default action to follow from config. duplicate_action = config["import"]["duplicate_action"].as_choice( From 1a24e3f1d0977d69843a3db365f59773dff30940 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 8 Feb 2025 22:20:40 +0100 Subject: [PATCH 22/48] Formatting --- beets/importer.py | 81 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 22f6273df..0fd83b561 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -269,7 +269,9 @@ class ImportSession: iconfig["incremental"] = False if iconfig["reflink"]: - iconfig["reflink"] = iconfig["reflink"].as_choice(["auto", True, False]) + iconfig["reflink"] = iconfig["reflink"].as_choice( + ["auto", True, False] + ) # Copy, move, reflink, link, and hardlink are mutually exclusive. if iconfig["move"]: @@ -327,16 +329,24 @@ class ImportSession: self.tag_log("skip", paths) def should_resume(self, path: PathBytes): - raise NotImplementedError("Inheriting class must implement `should_resume`") + raise NotImplementedError( + "Inheriting class must implement `should_resume`" + ) def choose_match(self, task: ImportTask): - raise NotImplementedError("Inheriting class must implement `choose_match`") + raise NotImplementedError( + "Inheriting class must implement `choose_match`" + ) def resolve_duplicate(self, task: ImportTask, found_duplicates): - raise NotImplementedError("Inheriting class must implement `resolve_duplicate`") + raise NotImplementedError( + "Inheriting class must implement `resolve_duplicate`" + ) def choose_item(self, task: ImportTask): - raise NotImplementedError("Inheriting class must implement `choose_item`") + raise NotImplementedError( + "Inheriting class must implement `choose_item`" + ) def run(self): """Run the import task.""" @@ -543,7 +553,9 @@ class ImportTask(BaseImportTask): self.is_album = True self.search_ids = [] # user-supplied candidate IDs. - def set_choice(self, choice: action | autotag.AlbumMatch | autotag.TrackMatch): + def set_choice( + self, choice: action | autotag.AlbumMatch | autotag.TrackMatch + ): """Given an AlbumMatch or TrackMatch object or an action constant, indicates that an action has been selected for this task. @@ -565,7 +577,9 @@ class ImportTask(BaseImportTask): else: self.choice_flag = action.APPLY # Implicit choice. # Union is needed here for python 3.9 compatibility! - self.match = cast(Union[autotag.AlbumMatch, autotag.TrackMatch], choice) + self.match = cast( + Union[autotag.AlbumMatch, autotag.TrackMatch], choice + ) def save_progress(self): """Updates the progress state to indicate that this album has @@ -643,7 +657,9 @@ class ImportTask(BaseImportTask): for item in duplicate_items: item.remove() if lib.directory in util.ancestry(item.path): - log.debug("deleting duplicate {0}", util.displayable_path(item.path)) + log.debug( + "deleting duplicate {0}", util.displayable_path(item.path) + ) util.remove(item.path) util.prune_dirs(os.path.dirname(item.path), lib.directory) @@ -675,8 +691,7 @@ class ImportTask(BaseImportTask): self.save_progress() if session.config["incremental"] and not ( # Should we skip recording to incremental list? - self.skip - and session.config["incremental_skip_later"] + self.skip and session.config["incremental_skip_later"] ): self.save_history() @@ -733,7 +748,9 @@ class ImportTask(BaseImportTask): candidate IDs are stored in self.search_ids: if present, the initial lookup is restricted to only those IDs. """ - artist, album, prop = autotag.tag_album(self.items, search_ids=self.search_ids) + artist, album, prop = autotag.tag_album( + self.items, search_ids=self.search_ids + ) self.cur_artist = artist self.cur_album = album self.candidates = prop.candidates @@ -753,7 +770,9 @@ class ImportTask(BaseImportTask): # Construct a query to find duplicates with this metadata. We # use a temporary Album object to generate any computed fields. tmp_album = library.Album(lib, **info) - keys = cast(list[str], config["import"]["duplicate_keys"]["album"].as_str_seq()) + keys = cast( + list[str], config["import"]["duplicate_keys"]["album"].as_str_seq() + ) dup_query = tmp_album.duplicates_query(keys) # Don't count albums with the same files as duplicates. @@ -784,7 +803,8 @@ class ImportTask(BaseImportTask): [i.albumartist or i.artist for i in self.items] ) if freq == len(self.items) or ( - freq > 1 and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH + freq > 1 + and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH ): # Single-artist album. changes["albumartist"] = plur_albumartist @@ -886,10 +906,15 @@ class ImportTask(BaseImportTask): self.replaced_albums: dict[PathBytes, library.Album] = defaultdict() replaced_album_ids = set() for item in self.imported_items(): - dup_items = list(lib.items(dbcore.query.BytesQuery("path", item.path))) + dup_items = list( + lib.items(dbcore.query.BytesQuery("path", item.path)) + ) self.replaced_items[item] = dup_items for dup_item in dup_items: - if not dup_item.album_id or dup_item.album_id in replaced_album_ids: + if ( + not dup_item.album_id + or dup_item.album_id in replaced_album_ids + ): continue replaced_album = dup_item._cached_album if replaced_album: @@ -942,7 +967,8 @@ class ImportTask(BaseImportTask): self.album.artpath = replaced_album.artpath self.album.store() log.debug( - "Reimported album {}. Preserving attribute ['added']. " "Path: {}", + "Reimported album {}. Preserving attribute ['added']. " + "Path: {}", self.album.id, displayable_path(self.album.path), ) @@ -1074,7 +1100,9 @@ class SingletonImportTask(ImportTask): # Query for existing items using the same metadata. We use a # temporary `Item` object to generate any computed fields. tmp_item = library.Item(lib, **info) - keys = cast(list[str], config["import"]["duplicate_keys"]["item"].as_str_seq()) + keys = cast( + list[str], config["import"]["duplicate_keys"]["item"].as_str_seq() + ) dup_query = tmp_item.duplicates_query(keys) found_items = [] @@ -1248,7 +1276,9 @@ class ArchiveImportTask(SentinelImportTask): break if handler_class is None: - raise ValueError("No handler found for archive: {0}".format(self.toppath)) + raise ValueError( + "No handler found for archive: {0}".format(self.toppath) + ) extract_to = mkdtemp() archive = handler_class(os.fsdecode(self.toppath), mode="r") @@ -1368,7 +1398,9 @@ class ImportTaskFactory: def singleton(self, path: PathBytes): """Return a `SingletonImportTask` for the music file.""" if self.session.already_imported(self.toppath, [path]): - log.debug("Skipping previously-imported path: {0}", displayable_path(path)) + log.debug( + "Skipping previously-imported path: {0}", displayable_path(path) + ) self.skipped += 1 return None @@ -1391,7 +1423,9 @@ class ImportTaskFactory: dirs = list({os.path.dirname(p) for p in paths}) if self.session.already_imported(self.toppath, dirs): - log.debug("Skipping previously-imported path: {0}", displayable_path(dirs)) + log.debug( + "Skipping previously-imported path: {0}", displayable_path(dirs) + ) self.skipped += 1 return None @@ -1421,7 +1455,8 @@ class ImportTaskFactory: if not (self.session.config["move"] or self.session.config["copy"]): log.warning( - "Archive importing requires either " "'copy' or 'move' to be enabled." + "Archive importing requires either " + "'copy' or 'move' to be enabled." ) return @@ -1640,7 +1675,9 @@ def resolve_duplicates(session: ImportSession, task: ImportTask): if task.choice_flag in (action.ASIS, action.APPLY, action.RETAG): found_duplicates = task.find_duplicates(session.lib) if found_duplicates: - log.debug("found duplicates: {}".format([o.id for o in found_duplicates])) + log.debug( + "found duplicates: {}".format([o.id for o in found_duplicates]) + ) # Get the default action to follow from config. duplicate_action = config["import"]["duplicate_action"].as_choice( From bbd92d97ab7cbaa3dfd0735560f7b980c28c6d6e Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sun, 9 Feb 2025 11:05:09 +0100 Subject: [PATCH 23/48] Removed optional type hints for pipe based optional with None --- beets/importer.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 0fd83b561..2c9e89f74 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -115,8 +115,10 @@ class ImportState: taghistory: set[tuple[PathBytes, ...]] path: PathBytes - def __init__(self, readonly=False, path: Optional[PathBytes] = None): - self.path = path or cast(PathBytes, config["statefile"].as_filename()) + def __init__(self, readonly=False, path: PathBytes | None = None): + self.path = path or os.fsencode( + cast(str, config["statefile"].as_filename()) + ) self.tagprogress = {} self.taghistory = set() self._open() @@ -202,7 +204,7 @@ class ImportSession: """ logger: logging.Logger - paths: Optional[list[bytes]] = None + paths: list[bytes] | None = None lib: library.Library _is_resuming: dict[bytes, bool] @@ -212,9 +214,9 @@ class ImportSession: def __init__( self, lib: library.Library, - loghandler: Optional[logging.Handler], - paths: Optional[Sequence[PathBytes]], - query: Optional[dbcore.Query], + loghandler: logging.Handler | None, + paths: Sequence[PathBytes] | None, + query: dbcore.Query | None, ): """Create a session. @@ -241,7 +243,7 @@ class ImportSession: if paths is not None: self.paths = list(map(normpath, paths)) - def _setup_logging(self, loghandler: Optional[logging.Handler]): + def _setup_logging(self, loghandler: logging.Handler | None): logger = logging.getLogger(__name__) logger.propagate = False if not loghandler: @@ -474,15 +476,15 @@ class BaseImportTask: Tasks flow through the importer pipeline. Each stage can update them.""" - toppath: Optional[PathBytes] + toppath: PathBytes | None paths: list[PathBytes] items: list[library.Item] def __init__( self, - toppath: Optional[PathBytes], - paths: Optional[Iterable[PathBytes]], - items: Optional[Iterable[library.Item]], + toppath: PathBytes | None, + paths: Iterable[PathBytes] | None, + items: Iterable[library.Item] | None, ): """Create a task. The primary fields that define a task are: @@ -537,12 +539,12 @@ class ImportTask(BaseImportTask): system. """ - choice_flag: Optional[action] = None + choice_flag: action | None = None match: autotag.AlbumMatch | autotag.TrackMatch | None = None # Keep track of the current task item - cur_album: Optional[str] = None - cur_artist: Optional[str] = None + cur_album: str | None = None + cur_artist: str | None = None candidates: Sequence[autotag.AlbumMatch | autotag.TrackMatch] = [] def __init__(self, toppath, paths, items): @@ -834,7 +836,7 @@ class ImportTask(BaseImportTask): self, operation: Optional[MoveOperation] = None, write=False, - session: Optional[ImportSession] = None, + session: ImportSession | None = None, ): """Copy, move, link, hardlink or reflink (depending on `operation`) the files as well as write metadata. @@ -1054,7 +1056,7 @@ class ImportTask(BaseImportTask): class SingletonImportTask(ImportTask): """ImportTask for a single track that is not associated to an album.""" - def __init__(self, toppath: Optional[PathBytes], item: library.Item): + def __init__(self, toppath: PathBytes | None, item: library.Item): super().__init__(toppath, [item.path], [item]) self.item = item self.is_album = False @@ -1362,7 +1364,7 @@ class ImportTaskFactory: else: yield self.sentinel() - def _create(self, task: Optional[ImportTask]): + def _create(self, task: ImportTask | None): """Handle a new task to be emitted by the factory. Emit the `import_task_created` event and increment the @@ -1438,7 +1440,7 @@ class ImportTaskFactory: else: return None - def sentinel(self, paths: Optional[Iterable[PathBytes]] = None): + def sentinel(self, paths: Iterable[PathBytes] | None = None): """Return a `SentinelImportTask` indicating the end of a top-level directory import. """ From 716720d2a5d123594a31230e126df82fa00ffca0 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sun, 9 Feb 2025 11:23:01 +0100 Subject: [PATCH 24/48] Removed unnecessary cast even tho it now produces issues locally. --- beets/importer.py | 97 +++++++++++++++-------------------------------- 1 file changed, 30 insertions(+), 67 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 2c9e89f74..a0094f497 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -29,7 +29,7 @@ from collections import defaultdict from dataclasses import dataclass from enum import Enum from tempfile import mkdtemp -from typing import Callable, Iterable, Optional, Sequence, Union, cast +from typing import Callable, Iterable, Sequence, Union, cast import mediafile @@ -116,9 +116,7 @@ class ImportState: path: PathBytes def __init__(self, readonly=False, path: PathBytes | None = None): - self.path = path or os.fsencode( - cast(str, config["statefile"].as_filename()) - ) + self.path = path or os.fsencode(config["statefile"].as_filename()) self.tagprogress = {} self.taghistory = set() self._open() @@ -271,9 +269,7 @@ class ImportSession: iconfig["incremental"] = False if iconfig["reflink"]: - iconfig["reflink"] = iconfig["reflink"].as_choice( - ["auto", True, False] - ) + iconfig["reflink"] = iconfig["reflink"].as_choice(["auto", True, False]) # Copy, move, reflink, link, and hardlink are mutually exclusive. if iconfig["move"]: @@ -331,24 +327,16 @@ class ImportSession: self.tag_log("skip", paths) def should_resume(self, path: PathBytes): - raise NotImplementedError( - "Inheriting class must implement `should_resume`" - ) + raise NotImplementedError("Inheriting class must implement `should_resume`") def choose_match(self, task: ImportTask): - raise NotImplementedError( - "Inheriting class must implement `choose_match`" - ) + raise NotImplementedError("Inheriting class must implement `choose_match`") def resolve_duplicate(self, task: ImportTask, found_duplicates): - raise NotImplementedError( - "Inheriting class must implement `resolve_duplicate`" - ) + raise NotImplementedError("Inheriting class must implement `resolve_duplicate`") def choose_item(self, task: ImportTask): - raise NotImplementedError( - "Inheriting class must implement `choose_item`" - ) + raise NotImplementedError("Inheriting class must implement `choose_item`") def run(self): """Run the import task.""" @@ -555,9 +543,7 @@ class ImportTask(BaseImportTask): self.is_album = True self.search_ids = [] # user-supplied candidate IDs. - def set_choice( - self, choice: action | autotag.AlbumMatch | autotag.TrackMatch - ): + def set_choice(self, choice: action | autotag.AlbumMatch | autotag.TrackMatch): """Given an AlbumMatch or TrackMatch object or an action constant, indicates that an action has been selected for this task. @@ -574,14 +560,14 @@ class ImportTask(BaseImportTask): action.ALBUMS, action.RETAG, ): + # Cast needed as mypy can't infer the type self.choice_flag = cast(action, choice) self.match = None else: self.choice_flag = action.APPLY # Implicit choice. # Union is needed here for python 3.9 compatibility! - self.match = cast( - Union[autotag.AlbumMatch, autotag.TrackMatch], choice - ) + # Cast needed as mypy can't infer the type + self.match = cast(Union[autotag.AlbumMatch, autotag.TrackMatch], choice) def save_progress(self): """Updates the progress state to indicate that this album has @@ -659,9 +645,7 @@ class ImportTask(BaseImportTask): for item in duplicate_items: item.remove() if lib.directory in util.ancestry(item.path): - log.debug( - "deleting duplicate {0}", util.displayable_path(item.path) - ) + log.debug("deleting duplicate {0}", util.displayable_path(item.path)) util.remove(item.path) util.prune_dirs(os.path.dirname(item.path), lib.directory) @@ -693,7 +677,8 @@ class ImportTask(BaseImportTask): self.save_progress() if session.config["incremental"] and not ( # Should we skip recording to incremental list? - self.skip and session.config["incremental_skip_later"] + self.skip + and session.config["incremental_skip_later"] ): self.save_history() @@ -750,9 +735,7 @@ class ImportTask(BaseImportTask): candidate IDs are stored in self.search_ids: if present, the initial lookup is restricted to only those IDs. """ - artist, album, prop = autotag.tag_album( - self.items, search_ids=self.search_ids - ) + artist, album, prop = autotag.tag_album(self.items, search_ids=self.search_ids) self.cur_artist = artist self.cur_album = album self.candidates = prop.candidates @@ -772,9 +755,7 @@ class ImportTask(BaseImportTask): # Construct a query to find duplicates with this metadata. We # use a temporary Album object to generate any computed fields. tmp_album = library.Album(lib, **info) - keys = cast( - list[str], config["import"]["duplicate_keys"]["album"].as_str_seq() - ) + keys: list[str] = config["import"]["duplicate_keys"]["album"].as_str_seq() dup_query = tmp_album.duplicates_query(keys) # Don't count albums with the same files as duplicates. @@ -805,8 +786,7 @@ class ImportTask(BaseImportTask): [i.albumartist or i.artist for i in self.items] ) if freq == len(self.items) or ( - freq > 1 - and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH + freq > 1 and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH ): # Single-artist album. changes["albumartist"] = plur_albumartist @@ -834,7 +814,7 @@ class ImportTask(BaseImportTask): def manipulate_files( self, - operation: Optional[MoveOperation] = None, + operation: MoveOperation | None = None, write=False, session: ImportSession | None = None, ): @@ -908,15 +888,10 @@ class ImportTask(BaseImportTask): self.replaced_albums: dict[PathBytes, library.Album] = defaultdict() replaced_album_ids = set() for item in self.imported_items(): - dup_items = list( - lib.items(dbcore.query.BytesQuery("path", item.path)) - ) + dup_items = list(lib.items(dbcore.query.BytesQuery("path", item.path))) self.replaced_items[item] = dup_items for dup_item in dup_items: - if ( - not dup_item.album_id - or dup_item.album_id in replaced_album_ids - ): + if not dup_item.album_id or dup_item.album_id in replaced_album_ids: continue replaced_album = dup_item._cached_album if replaced_album: @@ -969,8 +944,7 @@ class ImportTask(BaseImportTask): self.album.artpath = replaced_album.artpath self.album.store() log.debug( - "Reimported album {}. Preserving attribute ['added']. " - "Path: {}", + "Reimported album {}. Preserving attribute ['added']. " "Path: {}", self.album.id, displayable_path(self.album.path), ) @@ -1049,7 +1023,7 @@ class ImportTask(BaseImportTask): util.prune_dirs( os.path.dirname(filename), self.toppath, - clutter=cast(list[str], config["clutter"].as_str_seq()), + clutter=config["clutter"].as_str_seq(), ) @@ -1102,9 +1076,7 @@ class SingletonImportTask(ImportTask): # Query for existing items using the same metadata. We use a # temporary `Item` object to generate any computed fields. tmp_item = library.Item(lib, **info) - keys = cast( - list[str], config["import"]["duplicate_keys"]["item"].as_str_seq() - ) + keys: list[str] = config["import"]["duplicate_keys"]["item"].as_str_seq() dup_query = tmp_item.duplicates_query(keys) found_items = [] @@ -1278,9 +1250,7 @@ class ArchiveImportTask(SentinelImportTask): break if handler_class is None: - raise ValueError( - "No handler found for archive: {0}".format(self.toppath) - ) + raise ValueError("No handler found for archive: {0}".format(self.toppath)) extract_to = mkdtemp() archive = handler_class(os.fsdecode(self.toppath), mode="r") @@ -1400,9 +1370,7 @@ class ImportTaskFactory: def singleton(self, path: PathBytes): """Return a `SingletonImportTask` for the music file.""" if self.session.already_imported(self.toppath, [path]): - log.debug( - "Skipping previously-imported path: {0}", displayable_path(path) - ) + log.debug("Skipping previously-imported path: {0}", displayable_path(path)) self.skipped += 1 return None @@ -1425,9 +1393,7 @@ class ImportTaskFactory: dirs = list({os.path.dirname(p) for p in paths}) if self.session.already_imported(self.toppath, dirs): - log.debug( - "Skipping previously-imported path: {0}", displayable_path(dirs) - ) + log.debug("Skipping previously-imported path: {0}", displayable_path(dirs)) self.skipped += 1 return None @@ -1457,8 +1423,7 @@ class ImportTaskFactory: if not (self.session.config["move"] or self.session.config["copy"]): log.warning( - "Archive importing requires either " - "'copy' or 'move' to be enabled." + "Archive importing requires either " "'copy' or 'move' to be enabled." ) return @@ -1677,9 +1642,7 @@ def resolve_duplicates(session: ImportSession, task: ImportTask): if task.choice_flag in (action.ASIS, action.APPLY, action.RETAG): found_duplicates = task.find_duplicates(session.lib) if found_duplicates: - log.debug( - "found duplicates: {}".format([o.id for o in found_duplicates]) - ) + log.debug("found duplicates: {}".format([o.id for o in found_duplicates])) # Get the default action to follow from config. duplicate_action = config["import"]["duplicate_action"].as_choice( @@ -1863,8 +1826,8 @@ def albums_in_dir(path: PathBytes): containing any media files is an album. """ collapse_pat = collapse_paths = collapse_items = None - ignore = cast(list[str], config["ignore"].as_str_seq()) - ignore_hidden = cast(bool, config["ignore_hidden"].get(bool)) + ignore: list[str] = config["ignore"].as_str_seq() + ignore_hidden: bool = config["ignore_hidden"].get(bool) for root, dirs, files in sorted_walk( path, ignore=ignore, ignore_hidden=ignore_hidden, logger=log From fdf7afbfe3c0b2258cd1e32e5a9aa4dca0d6a46d Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sun, 9 Feb 2025 11:23:22 +0100 Subject: [PATCH 25/48] Formatting --- beets/importer.py | 81 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index a0094f497..6992adbbb 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -269,7 +269,9 @@ class ImportSession: iconfig["incremental"] = False if iconfig["reflink"]: - iconfig["reflink"] = iconfig["reflink"].as_choice(["auto", True, False]) + iconfig["reflink"] = iconfig["reflink"].as_choice( + ["auto", True, False] + ) # Copy, move, reflink, link, and hardlink are mutually exclusive. if iconfig["move"]: @@ -327,16 +329,24 @@ class ImportSession: self.tag_log("skip", paths) def should_resume(self, path: PathBytes): - raise NotImplementedError("Inheriting class must implement `should_resume`") + raise NotImplementedError( + "Inheriting class must implement `should_resume`" + ) def choose_match(self, task: ImportTask): - raise NotImplementedError("Inheriting class must implement `choose_match`") + raise NotImplementedError( + "Inheriting class must implement `choose_match`" + ) def resolve_duplicate(self, task: ImportTask, found_duplicates): - raise NotImplementedError("Inheriting class must implement `resolve_duplicate`") + raise NotImplementedError( + "Inheriting class must implement `resolve_duplicate`" + ) def choose_item(self, task: ImportTask): - raise NotImplementedError("Inheriting class must implement `choose_item`") + raise NotImplementedError( + "Inheriting class must implement `choose_item`" + ) def run(self): """Run the import task.""" @@ -543,7 +553,9 @@ class ImportTask(BaseImportTask): self.is_album = True self.search_ids = [] # user-supplied candidate IDs. - def set_choice(self, choice: action | autotag.AlbumMatch | autotag.TrackMatch): + def set_choice( + self, choice: action | autotag.AlbumMatch | autotag.TrackMatch + ): """Given an AlbumMatch or TrackMatch object or an action constant, indicates that an action has been selected for this task. @@ -567,7 +579,9 @@ class ImportTask(BaseImportTask): self.choice_flag = action.APPLY # Implicit choice. # Union is needed here for python 3.9 compatibility! # Cast needed as mypy can't infer the type - self.match = cast(Union[autotag.AlbumMatch, autotag.TrackMatch], choice) + self.match = cast( + Union[autotag.AlbumMatch, autotag.TrackMatch], choice + ) def save_progress(self): """Updates the progress state to indicate that this album has @@ -645,7 +659,9 @@ class ImportTask(BaseImportTask): for item in duplicate_items: item.remove() if lib.directory in util.ancestry(item.path): - log.debug("deleting duplicate {0}", util.displayable_path(item.path)) + log.debug( + "deleting duplicate {0}", util.displayable_path(item.path) + ) util.remove(item.path) util.prune_dirs(os.path.dirname(item.path), lib.directory) @@ -677,8 +693,7 @@ class ImportTask(BaseImportTask): self.save_progress() if session.config["incremental"] and not ( # Should we skip recording to incremental list? - self.skip - and session.config["incremental_skip_later"] + self.skip and session.config["incremental_skip_later"] ): self.save_history() @@ -735,7 +750,9 @@ class ImportTask(BaseImportTask): candidate IDs are stored in self.search_ids: if present, the initial lookup is restricted to only those IDs. """ - artist, album, prop = autotag.tag_album(self.items, search_ids=self.search_ids) + artist, album, prop = autotag.tag_album( + self.items, search_ids=self.search_ids + ) self.cur_artist = artist self.cur_album = album self.candidates = prop.candidates @@ -755,7 +772,9 @@ class ImportTask(BaseImportTask): # Construct a query to find duplicates with this metadata. We # use a temporary Album object to generate any computed fields. tmp_album = library.Album(lib, **info) - keys: list[str] = config["import"]["duplicate_keys"]["album"].as_str_seq() + keys: list[str] = config["import"]["duplicate_keys"][ + "album" + ].as_str_seq() dup_query = tmp_album.duplicates_query(keys) # Don't count albums with the same files as duplicates. @@ -786,7 +805,8 @@ class ImportTask(BaseImportTask): [i.albumartist or i.artist for i in self.items] ) if freq == len(self.items) or ( - freq > 1 and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH + freq > 1 + and float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH ): # Single-artist album. changes["albumartist"] = plur_albumartist @@ -888,10 +908,15 @@ class ImportTask(BaseImportTask): self.replaced_albums: dict[PathBytes, library.Album] = defaultdict() replaced_album_ids = set() for item in self.imported_items(): - dup_items = list(lib.items(dbcore.query.BytesQuery("path", item.path))) + dup_items = list( + lib.items(dbcore.query.BytesQuery("path", item.path)) + ) self.replaced_items[item] = dup_items for dup_item in dup_items: - if not dup_item.album_id or dup_item.album_id in replaced_album_ids: + if ( + not dup_item.album_id + or dup_item.album_id in replaced_album_ids + ): continue replaced_album = dup_item._cached_album if replaced_album: @@ -944,7 +969,8 @@ class ImportTask(BaseImportTask): self.album.artpath = replaced_album.artpath self.album.store() log.debug( - "Reimported album {}. Preserving attribute ['added']. " "Path: {}", + "Reimported album {}. Preserving attribute ['added']. " + "Path: {}", self.album.id, displayable_path(self.album.path), ) @@ -1076,7 +1102,9 @@ class SingletonImportTask(ImportTask): # Query for existing items using the same metadata. We use a # temporary `Item` object to generate any computed fields. tmp_item = library.Item(lib, **info) - keys: list[str] = config["import"]["duplicate_keys"]["item"].as_str_seq() + keys: list[str] = config["import"]["duplicate_keys"][ + "item" + ].as_str_seq() dup_query = tmp_item.duplicates_query(keys) found_items = [] @@ -1250,7 +1278,9 @@ class ArchiveImportTask(SentinelImportTask): break if handler_class is None: - raise ValueError("No handler found for archive: {0}".format(self.toppath)) + raise ValueError( + "No handler found for archive: {0}".format(self.toppath) + ) extract_to = mkdtemp() archive = handler_class(os.fsdecode(self.toppath), mode="r") @@ -1370,7 +1400,9 @@ class ImportTaskFactory: def singleton(self, path: PathBytes): """Return a `SingletonImportTask` for the music file.""" if self.session.already_imported(self.toppath, [path]): - log.debug("Skipping previously-imported path: {0}", displayable_path(path)) + log.debug( + "Skipping previously-imported path: {0}", displayable_path(path) + ) self.skipped += 1 return None @@ -1393,7 +1425,9 @@ class ImportTaskFactory: dirs = list({os.path.dirname(p) for p in paths}) if self.session.already_imported(self.toppath, dirs): - log.debug("Skipping previously-imported path: {0}", displayable_path(dirs)) + log.debug( + "Skipping previously-imported path: {0}", displayable_path(dirs) + ) self.skipped += 1 return None @@ -1423,7 +1457,8 @@ class ImportTaskFactory: if not (self.session.config["move"] or self.session.config["copy"]): log.warning( - "Archive importing requires either " "'copy' or 'move' to be enabled." + "Archive importing requires either " + "'copy' or 'move' to be enabled." ) return @@ -1642,7 +1677,9 @@ def resolve_duplicates(session: ImportSession, task: ImportTask): if task.choice_flag in (action.ASIS, action.APPLY, action.RETAG): found_duplicates = task.find_duplicates(session.lib) if found_duplicates: - log.debug("found duplicates: {}".format([o.id for o in found_duplicates])) + log.debug( + "found duplicates: {}".format([o.id for o in found_duplicates]) + ) # Get the default action to follow from config. duplicate_action = config["import"]["duplicate_action"].as_choice( From c17a774dd6aa484f877d256a1edecd9ce319c442 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sun, 9 Feb 2025 20:23:19 +0100 Subject: [PATCH 26/48] Removed Optional and Union and resolved a minor mypy shadowing issue. --- beets/util/pipeline.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/beets/util/pipeline.py b/beets/util/pipeline.py index d797dea13..3a31175b6 100644 --- a/beets/util/pipeline.py +++ b/beets/util/pipeline.py @@ -30,11 +30,12 @@ up a bottleneck stage by dividing its work among multiple threads. To do so, pass an iterable of coroutines to the Pipeline constructor in place of any single coroutine. """ +from __future__ import annotations import queue import sys from threading import Lock, Thread -from typing import Callable, Generator, Optional, Union +from typing import Callable, Generator from typing_extensions import TypeVar, TypeVarTuple, Unpack @@ -152,15 +153,20 @@ def multiple(messages): return MultiMessage(messages) -A = TypeVarTuple("A") -T = TypeVar("T") +# Arguments of the function (omitting the task) +Args = TypeVarTuple("Args") +# Task as an additional argument to the function +Task = TypeVar("Task") +# Return type of the function (should normally be task but sadly +# we cant enforce this with the current stage functions without +# a refactor) R = TypeVar("R") def stage( func: Callable[ - [Unpack[A], T], - Optional[R], + [Unpack[Args], Task], + R | None, ], ): """Decorate a function to become a simple stage. @@ -176,8 +182,8 @@ def stage( [3, 4, 5] """ - def coro(*args: Unpack[A]) -> Generator[Union[R, T, None], T, R]: - task = None + def coro(*args: Unpack[Args]) -> Generator[R | Task | None, Task, None]: + task: R | Task | None = None while True: task = yield task task = func(*(args + (task,))) @@ -185,7 +191,7 @@ def stage( return coro -def mutator_stage(func: Callable[[Unpack[A], T], R]): +def mutator_stage(func: Callable[[Unpack[Args], Task], R]): """Decorate a function that manipulates items in a coroutine to become a simple stage. @@ -200,7 +206,7 @@ def mutator_stage(func: Callable[[Unpack[A], T], R]): [{'x': True}, {'a': False, 'x': True}] """ - def coro(*args: Unpack[A]) -> Generator[Union[T, None], T, None]: + def coro(*args: Unpack[Args]) -> Generator[Task | None, Task, None]: task = None while True: task = yield task @@ -419,9 +425,7 @@ class Pipeline: for i in range(1, queue_count): for coro in self.stages[i]: threads.append( - MiddlePipelineThread( - coro, queues[i - 1], queues[i], threads - ) + MiddlePipelineThread(coro, queues[i - 1], queues[i], threads) ) # Last stage. From ebe88885c58cdc80be3283f537359ba8702b8cf1 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Mon, 10 Feb 2025 11:46:50 +0100 Subject: [PATCH 27/48] Renamed to ignore_bytes and reverted typevar rename --- beets/util/__init__.py | 6 +++--- beets/util/pipeline.py | 23 +++++++++++++---------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index a0053c260..b882ed626 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -210,7 +210,7 @@ def sorted_walk( """ # Make sure the paths aren't Unicode strings. bytes_path = bytestring_path(path) - _ignore = [ # rename prevents mypy variable shadowing issue + ignore_bytes = [ # rename prevents mypy variable shadowing issue bytestring_path(i) for i in ignore ] @@ -232,7 +232,7 @@ def sorted_walk( # Skip ignored filenames. skip = False - for pat in _ignore: + for pat in ignore_bytes: if fnmatch.fnmatch(base, pat): if logger: logger.debug( @@ -259,7 +259,7 @@ def sorted_walk( # Recurse into directories. for base in dirs: cur = os.path.join(bytes_path, base) - yield from sorted_walk(cur, _ignore, ignore_hidden, logger) + yield from sorted_walk(cur, ignore_bytes, ignore_hidden, logger) def path_as_posix(path: bytes) -> bytes: diff --git a/beets/util/pipeline.py b/beets/util/pipeline.py index 3a31175b6..8f7be8194 100644 --- a/beets/util/pipeline.py +++ b/beets/util/pipeline.py @@ -30,6 +30,7 @@ up a bottleneck stage by dividing its work among multiple threads. To do so, pass an iterable of coroutines to the Pipeline constructor in place of any single coroutine. """ + from __future__ import annotations import queue @@ -153,10 +154,10 @@ def multiple(messages): return MultiMessage(messages) -# Arguments of the function (omitting the task) -Args = TypeVarTuple("Args") -# Task as an additional argument to the function -Task = TypeVar("Task") +A = TypeVarTuple("A") # Arguments of a function (omitting the task) +T = TypeVar("T") # Type of the task +# Normally these are concatenated i.e. (*args, task) + # Return type of the function (should normally be task but sadly # we cant enforce this with the current stage functions without # a refactor) @@ -165,7 +166,7 @@ R = TypeVar("R") def stage( func: Callable[ - [Unpack[Args], Task], + [Unpack[A], T], R | None, ], ): @@ -182,8 +183,8 @@ def stage( [3, 4, 5] """ - def coro(*args: Unpack[Args]) -> Generator[R | Task | None, Task, None]: - task: R | Task | None = None + def coro(*args: Unpack[A]) -> Generator[R | T | None, T, None]: + task: R | T | None = None while True: task = yield task task = func(*(args + (task,))) @@ -191,7 +192,7 @@ def stage( return coro -def mutator_stage(func: Callable[[Unpack[Args], Task], R]): +def mutator_stage(func: Callable[[Unpack[A], T], R]): """Decorate a function that manipulates items in a coroutine to become a simple stage. @@ -206,7 +207,7 @@ def mutator_stage(func: Callable[[Unpack[Args], Task], R]): [{'x': True}, {'a': False, 'x': True}] """ - def coro(*args: Unpack[Args]) -> Generator[Task | None, Task, None]: + def coro(*args: Unpack[A]) -> Generator[T | None, T, None]: task = None while True: task = yield task @@ -425,7 +426,9 @@ class Pipeline: for i in range(1, queue_count): for coro in self.stages[i]: threads.append( - MiddlePipelineThread(coro, queues[i - 1], queues[i], threads) + MiddlePipelineThread( + coro, queues[i - 1], queues[i], threads + ) ) # Last stage. From 459ca6498a77f1ad91c3ac7a741642e73afde873 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Tue, 4 Feb 2025 07:15:25 +0100 Subject: [PATCH 28/48] Refactor musicbrainzngs fetch-includes sanity checks in autotag.mb module. --- beets/autotag/mb.py | 56 ++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 19d26a9f6..6c2b604cd 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -74,22 +74,38 @@ class MusicBrainzAPIError(util.HumanReadableError): log = logging.getLogger("beets") -RELEASE_INCLUDES = [ - "artists", - "media", - "recordings", - "release-groups", - "labels", - "artist-credits", - "aliases", - "recording-level-rels", - "work-rels", - "work-level-rels", - "artist-rels", - "isrcs", - "url-rels", - "release-rels", -] +RELEASE_INCLUDES = list( + { + "artists", + "media", + "recordings", + "release-groups", + "labels", + "artist-credits", + "aliases", + "recording-level-rels", + "work-rels", + "work-level-rels", + "artist-rels", + "isrcs", + "url-rels", + "release-rels", + "tags", + } + & set(musicbrainzngs.VALID_INCLUDES["release"]) +) + +TRACK_INCLUDES = list( + { + "artists", + "aliases", + "isrcs", + "work-level-rels", + "artist-rels", + } + & set(musicbrainzngs.VALID_INCLUDES["recording"]) +) + BROWSE_INCLUDES = [ "artist-credits", "work-rels", @@ -101,14 +117,6 @@ if "work-level-rels" in musicbrainzngs.VALID_BROWSE_INCLUDES["recording"]: BROWSE_INCLUDES.append("work-level-rels") BROWSE_CHUNKSIZE = 100 BROWSE_MAXTRACKS = 500 -TRACK_INCLUDES = ["artists", "aliases", "isrcs"] -if "work-level-rels" in musicbrainzngs.VALID_INCLUDES["recording"]: - TRACK_INCLUDES += ["work-level-rels", "artist-rels"] -if "tags" in [ - musicbrainzngs.VALID_INCLUDES["release"], - musicbrainzngs.VALID_INCLUDES["release-group"], -]: - RELEASE_INCLUDES += ["tags"] def track_url(trackid: str) -> str: From 803a0d1fcb456e97ddd4a9b6521ae2c348377e24 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr <39738318+semohr@users.noreply.github.com> Date: Sat, 15 Feb 2025 13:17:00 +0100 Subject: [PATCH 29/48] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Šarūnas Nejus --- beets/importer.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 6992adbbb..ebffe14d8 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -202,7 +202,7 @@ class ImportSession: """ logger: logging.Logger - paths: list[bytes] | None = None + paths: list[PathBytes] lib: library.Library _is_resuming: dict[bytes, bool] @@ -226,7 +226,7 @@ class ImportSession: A logging handler to use for the session's logger. If None, a NullHandler will be used. paths : os.PathLike or None - The paths to be imported. If None, no paths are specified. + The paths to be imported. query : dbcore.Query or None A query to filter items for import. If None, no query is applied. """ @@ -238,8 +238,7 @@ class ImportSession: self._merged_dirs = set() # Normalize the paths. - if paths is not None: - self.paths = list(map(normpath, paths)) + self.paths = list(map(normpath, paths or [])) def _setup_logging(self, loghandler: logging.Handler | None): logger = logging.getLogger(__name__) @@ -329,9 +328,7 @@ class ImportSession: self.tag_log("skip", paths) def should_resume(self, path: PathBytes): - raise NotImplementedError( - "Inheriting class must implement `should_resume`" - ) + raise NotImplementedError def choose_match(self, task: ImportTask): raise NotImplementedError( @@ -637,10 +634,6 @@ class ImportTask(BaseImportTask): def apply_metadata(self): """Copy metadata from match info to the items.""" - assert isinstance( - self.match, autotag.AlbumMatch - ), "apply_metadata() only works for albums" - if config["import"]["from_scratch"]: for item in self.match.mapping: item.clear() @@ -1073,7 +1066,6 @@ class SingletonImportTask(ImportTask): return dict(self.item) elif self.choice_flag is action.APPLY and self.match: return self.match.info.copy() - assert False def imported_items(self): return [self.item] @@ -1179,7 +1171,7 @@ class SentinelImportTask(ImportTask): ImportState().progress_reset(self.toppath) elif self.toppath: # "Directory progress" sentinel for singletons - ImportState().progress_add(self.toppath, *self.paths) + super().save_progress() @property def skip(self) -> bool: @@ -1272,16 +1264,11 @@ class ArchiveImportTask(SentinelImportTask): """ assert self.toppath is not None, "toppath must be set" - handler_class = None - for path_test, handler_class in self.handlers(): + for path_test, handler_class in self.handlers(): if path_test(os.fsdecode(self.toppath)): break - - if handler_class is None: - raise ValueError( - "No handler found for archive: {0}".format(self.toppath) - ) - + else: + raise ValueError(f"No handler found for archive: {self.toppath}") extract_to = mkdtemp() archive = handler_class(os.fsdecode(self.toppath), mode="r") try: From d29ef500b95b2fc383c9833b36a380e6ad019bca Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 15 Feb 2025 13:13:55 +0100 Subject: [PATCH 30/48] Session path can't be None anymore. Empty list if none --- beets/importer.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index ebffe14d8..46fe8977a 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1405,16 +1405,12 @@ class ImportTaskFactory: `dirs` is a list of parent directories used to record already imported albums. """ - if not paths: - return None if dirs is None: dirs = list({os.path.dirname(p) for p in paths}) if self.session.already_imported(self.toppath, dirs): - log.debug( - "Skipping previously-imported path: {0}", displayable_path(dirs) - ) + log.debug("Skipping previously-imported path: {0}", displayable_path(dirs)) self.skipped += 1 return None @@ -1422,7 +1418,7 @@ class ImportTaskFactory: item for item in map(self.read_item, paths) if item ] - if items: + if len(items) > 0: return ImportTask(self.toppath, dirs, items) else: return None @@ -1511,9 +1507,6 @@ def read_tasks(session: ImportSession): import, yields single-item tasks instead. """ skipped = 0 - if session.paths is None: - log.warning("No path specified in session.") - return for toppath in session.paths: # Check whether we need to resume the import. From 0ee17c0b06eb2aa46ff9be576dacd575634e16e1 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 15 Feb 2025 13:16:26 +0100 Subject: [PATCH 31/48] Removed unnecessary comments & raise same not implemented error for all not implemented methods --- beets/importer.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 46fe8977a..63939698a 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -228,7 +228,7 @@ class ImportSession: paths : os.PathLike or None The paths to be imported. query : dbcore.Query or None - A query to filter items for import. If None, no query is applied. + A query to filter items for import. """ self.lib = lib self.logger = self._setup_logging(loghandler) @@ -331,19 +331,13 @@ class ImportSession: raise NotImplementedError def choose_match(self, task: ImportTask): - raise NotImplementedError( - "Inheriting class must implement `choose_match`" - ) + raise NotImplementedError def resolve_duplicate(self, task: ImportTask, found_duplicates): - raise NotImplementedError( - "Inheriting class must implement `resolve_duplicate`" - ) - + raise NotImplementedError + def choose_item(self, task: ImportTask): - raise NotImplementedError( - "Inheriting class must implement `choose_item`" - ) + raise NotImplementedError def run(self): """Run the import task.""" From 9acb2b4ca3f1747bcafe6e0a793aec5e4fde4553 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 15 Feb 2025 13:20:36 +0100 Subject: [PATCH 32/48] Added typehints to ImportTask init and reverted some initial changes. --- beets/importer.py | 51 ++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 63939698a..a3d98466c 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -29,7 +29,7 @@ from collections import defaultdict from dataclasses import dataclass from enum import Enum from tempfile import mkdtemp -from typing import Callable, Iterable, Sequence, Union, cast +from typing import Callable, Iterable, Sequence import mediafile @@ -335,7 +335,7 @@ class ImportSession: def resolve_duplicate(self, task: ImportTask, found_duplicates): raise NotImplementedError - + def choose_item(self, task: ImportTask): raise NotImplementedError @@ -407,7 +407,8 @@ class ImportSession: _history_dirs = None @property - def history_dirs(self): + def history_dirs(self) -> set[tuple[PathBytes, ...]]: + # FIXME: This could be simplified to a cached property if self._history_dirs is None: self._history_dirs = ImportState().taghistory return self._history_dirs @@ -536,7 +537,12 @@ class ImportTask(BaseImportTask): cur_artist: str | None = None candidates: Sequence[autotag.AlbumMatch | autotag.TrackMatch] = [] - def __init__(self, toppath, paths, items): + def __init__( + self, + toppath: PathBytes | None, + paths: Iterable[PathBytes] | None, + items: Iterable[library.Item] | None, + ): super().__init__(toppath, paths, items) self.rec = None self.should_remove_duplicates = False @@ -563,16 +569,12 @@ class ImportTask(BaseImportTask): action.ALBUMS, action.RETAG, ): - # Cast needed as mypy can't infer the type - self.choice_flag = cast(action, choice) + # TODO: redesign to stricten the type + self.choice_flag = choice # type: ignore[assignment] self.match = None else: self.choice_flag = action.APPLY # Implicit choice. - # Union is needed here for python 3.9 compatibility! - # Cast needed as mypy can't infer the type - self.match = cast( - Union[autotag.AlbumMatch, autotag.TrackMatch], choice - ) + self.match = choice # type: ignore[assignment] def save_progress(self): """Updates the progress state to indicate that this album has @@ -583,8 +585,7 @@ class ImportTask(BaseImportTask): def save_history(self): """Save the directory in the history for incremental imports.""" - if self.paths: - ImportState().history_add(self.paths) + ImportState().history_add(self.paths) # Logical decisions. @@ -821,9 +822,9 @@ class ImportTask(BaseImportTask): def manipulate_files( self, + session: ImportSession, operation: MoveOperation | None = None, write=False, - session: ImportSession | None = None, ): """Copy, move, link, hardlink or reflink (depending on `operation`) the files as well as write metadata. @@ -831,8 +832,8 @@ class ImportTask(BaseImportTask): `operation` should be an instance of `util.MoveOperation`. If `write` is `True` metadata is written to the files. + # TODO: Introduce a MoveOperation.NONE or SKIP """ - assert session is not None, "session must be provided" items = self.imported_items() # Save the original paths of all items for deletion and pruning @@ -1058,7 +1059,7 @@ class SingletonImportTask(ImportTask): assert self.choice_flag in (action.ASIS, action.RETAG, action.APPLY) if self.choice_flag in (action.ASIS, action.RETAG): return dict(self.item) - elif self.choice_flag is action.APPLY and self.match: + elif self.choice_flag is action.APPLY: return self.match.info.copy() def imported_items(self): @@ -1258,10 +1259,10 @@ class ArchiveImportTask(SentinelImportTask): """ assert self.toppath is not None, "toppath must be set" - for path_test, handler_class in self.handlers(): + for path_test, handler_class in self.handlers(): if path_test(os.fsdecode(self.toppath)): break - else: + else: raise ValueError(f"No handler found for archive: {self.toppath}") extract_to = mkdtemp() archive = handler_class(os.fsdecode(self.toppath), mode="r") @@ -1404,7 +1405,9 @@ class ImportTaskFactory: dirs = list({os.path.dirname(p) for p in paths}) if self.session.already_imported(self.toppath, dirs): - log.debug("Skipping previously-imported path: {0}", displayable_path(dirs)) + log.debug( + "Skipping previously-imported path: {0}", displayable_path(dirs) + ) self.skipped += 1 return None @@ -1527,9 +1530,9 @@ def query_tasks(session: ImportSession): if session.config["singletons"]: # Search for items. for item in session.lib.items(session.query): - singleton_task = SingletonImportTask(None, item) - for task in singleton_task.handle_created(session): - yield singleton_task + task = SingletonImportTask(None, item) + for task in task.handle_created(session): + yield task else: # Search for albums. @@ -1604,9 +1607,7 @@ def user_query(session: ImportSession, task: ImportTask): yield SentinelImportTask(task.toppath, task.paths) return _extend_pipeline( - emitter(task), - lookup_candidates(session), - user_query(session), + emitter(task), lookup_candidates(session), user_query(session) ) # As albums: group items by albums and create task for each album From 0447df65108a2ba7245454033698c971ed9a7d66 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Sat, 15 Feb 2025 13:27:45 +0100 Subject: [PATCH 33/48] Session args to kwargs in manipulate files --- beets/importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index a3d98466c..79935e215 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1772,9 +1772,9 @@ def manipulate_files(session: ImportSession, task: ImportTask): operation = None task.manipulate_files( - operation, - write=session.config["write"], session=session, + operation=operation, + write=session.config["write"], ) # Progress, cleanup, and event. From 5a9c769f5cd413eb4f92d4e2d91ed9d3067456fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sat, 1 Feb 2025 15:59:20 +0000 Subject: [PATCH 34/48] Link to specific bug report/feature request templates in docs --- docs/faq.rst | 18 ++++++++++-------- docs/guides/tagger.rst | 3 ++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index b740c6503..ac7818ab2 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -174,9 +174,8 @@ pages. …report a bug in beets? ----------------------- -We use the `issue tracker `__ -on GitHub. `Enter a new issue `__ -there to report a bug. Please follow these guidelines when reporting an issue: +We use the `issue tracker`_ on GitHub where you can `open a new ticket`_. +Please follow these guidelines when reporting an issue: - Most importantly: if beets is crashing, please `include the traceback `__. Tracebacks can be more @@ -206,6 +205,7 @@ If you've never reported a bug before, Mozilla has some well-written `general guidelines for good bug reports`_. +.. _issue tracker: https://github.com/beetbox/beets/issues .. _general guidelines for good bug reports: https://developer.mozilla.org/en-US/docs/Mozilla/QA/Bug_writing_guidelines @@ -343,11 +343,11 @@ read the file. You can also use specialized programs for checking file integrity---for example, type ``metaflac --list music.flac`` to check FLAC files. -If beets still complains about a file that seems to be valid, `file a -bug `__ and we'll look into -it. There's always a possibility that there's a bug "upstream" in the -`Mutagen `__ library used by beets, -in which case we'll forward the bug to that project's tracker. +If beets still complains about a file that seems to be valid, `open a new +ticket`_ and we'll look into it. There's always a possibility that there's +a bug "upstream" in the `Mutagen `__ +library used by beets, in which case we'll forward the bug to that project's +tracker. .. _importhang: @@ -398,3 +398,5 @@ try `this Super User answer`_. .. _this Super User answer: https://superuser.com/a/284361/4569 .. _pip: https://pip.pypa.io/en/stable/ +.. _open a new ticket: + https://github.com/beetbox/beets/issues/new?template=bug-report.md diff --git a/docs/guides/tagger.rst b/docs/guides/tagger.rst index 68ad908e8..bf1ecbd8a 100644 --- a/docs/guides/tagger.rst +++ b/docs/guides/tagger.rst @@ -76,7 +76,8 @@ all of these limitations. Musepack, Windows Media, Opus, and AIFF files are supported. (Do you use some other format? Please `file a feature request`_!) -.. _file a feature request: https://github.com/beetbox/beets/issues/new +.. _file a feature request: + https://github.com/beetbox/beets/issues/new?template=feature-request.md Now that that's out of the way, let's tag some music. From bdabafeefdee6c488694d780100d3e8756573b24 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Mon, 17 Feb 2025 15:48:11 +0100 Subject: [PATCH 35/48] See https://github.com/beetbox/beets/pull/5611#discussion_r1957443316 --- beets/importer.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 79935e215..758e1d30b 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1341,10 +1341,7 @@ class ImportTaskFactory: # it is finished. This is usually just a SentinelImportTask, but # for archive imports, send the archive task instead (to remove # the extracted directory). - if archive_task: - yield archive_task - else: - yield self.sentinel() + yield archive_task or self.sentinel() def _create(self, task: ImportTask | None): """Handle a new task to be emitted by the factory. From 87b022e48cd530e26287a6b8d0a810ca5ae5ac28 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Mon, 17 Feb 2025 16:09:18 +0100 Subject: [PATCH 36/48] See https://github.com/beetbox/beets/pull/5611#discussion_r1957443692 --- beets/importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 758e1d30b..4d796982f 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -182,7 +182,7 @@ class ImportState: """ return toppath in self.tagprogress - def progress_reset(self, toppath: PathBytes): + def progress_reset(self, toppath: PathBytes | None): """Reset the progress for `toppath`.""" with self as state: if toppath in state.tagprogress: @@ -1161,7 +1161,7 @@ class SentinelImportTask(ImportTask): pass def save_progress(self): - if self.paths is None and self.toppath: + if not self.paths: # "Done" sentinel. ImportState().progress_reset(self.toppath) elif self.toppath: From cf1b4d8913d256cf2e43e17730109516fd10b7b6 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Mon, 17 Feb 2025 16:25:41 +0100 Subject: [PATCH 37/48] See https://github.com/beetbox/beets/pull/5611#discussion_r1958356845. Revert normpath additions --- beets/importer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 4d796982f..3553e1ea0 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -436,7 +436,7 @@ class ImportSession: You have to call `ask_resume` first to determine the return value. """ - return self._is_resuming.get(normpath(toppath), False) + return self._is_resuming.get(toppath, False) def ask_resume(self, toppath: PathBytes): """If import of `toppath` was aborted in an earlier session, ask @@ -449,9 +449,9 @@ class ImportSession: if self.want_resume is True or self.should_resume(toppath): log.warning( "Resuming interrupted import of {0}", - util.displayable_path(normpath(toppath)), + util.displayable_path(toppath), ) - self._is_resuming[normpath(toppath)] = True + self._is_resuming[toppath] = True else: # Clear progress; we're starting from the top. ImportState().progress_reset(toppath) From 9683a5b9562ab18cfe73e3ec9e834f23ed578574 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr Date: Mon, 17 Feb 2025 16:30:42 +0100 Subject: [PATCH 38/48] See https://github.com/beetbox/beets/pull/5611#discussion_r1957433785 --- beets/importer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 3553e1ea0..2bdb16669 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1066,9 +1066,6 @@ class SingletonImportTask(ImportTask): return [self.item] def apply_metadata(self): - assert isinstance( - self.match, autotag.TrackMatch - ), "apply_metadata() only works for tracks" autotag.apply_item_metadata(self.item, self.match.info) def _emit_imported(self, lib): From d7201062a8f9e323e2efe36c5365adaa40ab9e55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 13 Oct 2024 23:42:55 +0100 Subject: [PATCH 39/48] Resurrect translation functionality --- beetsplug/_typing.py | 20 ++++ beetsplug/lyrics.py | 183 ++++++++++++++++++++++++------------ docs/changelog.rst | 2 + docs/plugins/lyrics.rst | 51 +++++++--- test/plugins/test_lyrics.py | 85 +++++++++++++++++ 5 files changed, 268 insertions(+), 73 deletions(-) diff --git a/beetsplug/_typing.py b/beetsplug/_typing.py index 915ea77e8..b772ffdd5 100644 --- a/beetsplug/_typing.py +++ b/beetsplug/_typing.py @@ -113,3 +113,23 @@ class GoogleCustomSearchAPI: """Pagemap data with a single meta tags dict in a list.""" metatags: list[JSONDict] + + +class TranslatorAPI: + class Language(TypedDict): + """Language data returned by the translator API.""" + + language: str + score: float + + class Translation(TypedDict): + """Translation data returned by the translator API.""" + + text: str + to: str + + class Response(TypedDict): + """Response from the translator API.""" + + detectedLanguage: TranslatorAPI.Language + translations: list[TranslatorAPI.Translation] diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 1732edbf7..9b7f39e5b 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -40,10 +40,18 @@ from beets import plugins, ui from beets.autotag.hooks import string_dist if TYPE_CHECKING: + from logging import Logger + from beets.importer import ImportTask from beets.library import Item - from ._typing import GeniusAPI, GoogleCustomSearchAPI, JSONDict, LRCLibAPI + from ._typing import ( + GeniusAPI, + GoogleCustomSearchAPI, + JSONDict, + LRCLibAPI, + TranslatorAPI, + ) USER_AGENT = f"beets/{beets.__version__}" INSTRUMENTAL_LYRICS = "[Instrumental]" @@ -252,6 +260,12 @@ class RequestHandler: self.debug("Fetching JSON from {}", url) return r_session.get(url, **kwargs).json() + def post_json(self, url: str, params: JSONDict | None = None, **kwargs): + """Send POST request and return JSON response.""" + url = self.format_url(url, params) + self.debug("Posting JSON to {}", url) + return r_session.post(url, **kwargs).json() + @contextmanager def handle_request(self) -> Iterator[None]: try: @@ -760,6 +774,97 @@ class Google(SearchBackend): return None +@dataclass +class Translator(RequestHandler): + TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate" + LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") + + _log: Logger + api_key: str + to_language: str + from_languages: list[str] + + @classmethod + def from_config( + cls, + log: Logger, + api_key: str, + to_language: str, + from_languages: list[str] | None = None, + ) -> Translator: + return cls( + log, + api_key, + to_language.upper(), + [x.upper() for x in from_languages or []], + ) + + def get_translations(self, texts: Iterable[str]) -> list[tuple[str, str]]: + """Return translations for the given texts. + + To reduce the translation 'cost', we translate unique texts, and then + map the translations back to the original texts. + """ + unique_texts = list(dict.fromkeys(texts)) + data: list[TranslatorAPI.Response] = self.post_json( + self.TRANSLATE_URL, + headers={"Ocp-Apim-Subscription-Key": self.api_key}, + json=[{"text": "|".join(unique_texts)}], + params={"api-version": "3.0", "to": self.to_language}, + ) + + translations = data[0]["translations"][0]["text"].split("|") + trans_by_text = dict(zip(unique_texts, translations)) + return list(zip(texts, (trans_by_text.get(t, "") for t in texts))) + + @classmethod + def split_line(cls, line: str) -> tuple[str, str]: + """Split line to (timestamp, text).""" + if m := cls.LINE_PARTS_RE.match(line): + return m[1], m[2] + + return "", "" + + def append_translations(self, lines: Iterable[str]) -> list[str]: + """Append translations to the given lyrics texts. + + Lines may contain timestamps from LRCLib which need to be temporarily + removed for the translation. They can take any of these forms: + - empty + Text - text only + [00:00:00] - timestamp only + [00:00:00] Text - timestamp with text + """ + # split into [(timestamp, text), ...]] + ts_and_text = list(map(self.split_line, lines)) + timestamps = [ts for ts, _ in ts_and_text] + text_pairs = self.get_translations([ln for _, ln in ts_and_text]) + + # only add the separator for non-empty translations + texts = [" / ".join(filter(None, p)) for p in text_pairs] + # only add the space between non-empty timestamps and texts + return [" ".join(filter(None, p)) for p in zip(timestamps, texts)] + + def translate(self, lyrics: str) -> str: + """Translate the given lyrics to the target language. + + If the lyrics are already in the target language or not in any of + of the source languages (if configured), they are returned as is. + + The footer with the source URL is preserved, if present. + """ + lyrics_language = langdetect.detect(lyrics).upper() + if lyrics_language == self.to_language or ( + self.from_languages and lyrics_language not in self.from_languages + ): + return lyrics + + lyrics, *url = lyrics.split("\n\nSource: ") + with self.handle_request(): + translated_lines = self.append_translations(lyrics.splitlines()) + return "\n\nSource: ".join(["\n".join(translated_lines), *url]) + + class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): BACKEND_BY_NAME = { b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch] @@ -776,15 +881,24 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): return [self.BACKEND_BY_NAME[c](self.config, self._log) for c in chosen] + @cached_property + def translator(self) -> Translator | None: + config = self.config["translate"] + if config["api_key"].get() and config["to_language"].get(): + return Translator.from_config(self._log, **config.flatten()) + return None + def __init__(self): super().__init__() self.import_stages = [self.imported] self.config.add( { "auto": True, - "bing_client_secret": None, - "bing_lang_from": [], - "bing_lang_to": None, + "translate": { + "api_key": None, + "from_languages": [], + "to_language": None, + }, "dist_thresh": 0.11, "google_API_key": None, "google_engine_ID": "009217259823014548361:lndtuqkycfu", @@ -803,7 +917,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): ], } ) - self.config["bing_client_secret"].redact = True + self.config["translate"]["api_key"].redact = True self.config["google_API_key"].redact = True self.config["google_engine_ID"].redact = True self.config["genius_api_key"].redact = True @@ -817,24 +931,6 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): # open yet. self.rest = None - self.config["bing_lang_from"] = [ - x.lower() for x in self.config["bing_lang_from"].as_str_seq() - ] - - @cached_property - def bing_access_token(self) -> str | None: - params = { - "client_id": "beets", - "client_secret": self.config["bing_client_secret"], - "scope": "https://api.microsofttranslator.com", - "grant_type": "client_credentials", - } - - oauth_url = "https://datamarket.accesscontrol.windows.net/v2/OAuth2-13" - with self.handle_request(): - r = r_session.post(oauth_url, params=params) - return r.json()["access_token"] - def commands(self): cmd = ui.Subcommand("lyrics", help="fetch song lyrics") cmd.parser.add_option( @@ -996,14 +1092,12 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): if lyrics: self.info("🟢 Found lyrics: {0}", item) - if self.config["bing_client_secret"].get(): - lang_from = langdetect.detect(lyrics) - if self.config["bing_lang_to"].get() != lang_from and ( - not self.config["bing_lang_from"] - or (lang_from in self.config["bing_lang_from"].as_str_seq()) - ): - lyrics = self.append_translation( - lyrics, self.config["bing_lang_to"] + if translator := self.translator: + initial_lyrics = lyrics + if (lyrics := translator.translate(lyrics)) != initial_lyrics: + self.info( + "🟢 Added translation to {}", + self.config["translate_to"].get().upper(), ) else: self.info("🔴 Lyrics not found: {}", item) @@ -1027,30 +1121,3 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): return f"{lyrics}\n\nSource: {url}" return None - - def append_translation(self, text, to_lang): - from xml.etree import ElementTree - - if not (token := self.bing_access_token): - self.warn( - "Could not get Bing Translate API access token. " - "Check your 'bing_client_secret' password." - ) - return text - - # Extract unique lines to limit API request size per song - lines = text.split("\n") - unique_lines = set(lines) - url = "https://api.microsofttranslator.com/v2/Http.svc/Translate" - with self.handle_request(): - text = self.fetch_text( - url, - headers={"Authorization": f"Bearer {token}"}, - params={"text": "|".join(unique_lines), "to": to_lang}, - ) - if translated := ElementTree.fromstring(text.encode("utf-8")).text: - # Use a translation mapping dict to build resulting lyrics - translations = dict(zip(unique_lines, translated.split("|"))) - return "".join(f"{ln} / {translations[ln]}\n" for ln in lines) - - return text diff --git a/docs/changelog.rst b/docs/changelog.rst index ecf1c01d3..43cdd1255 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -19,6 +19,8 @@ New features: control the maximum allowed distance between the lyrics search result and the tagged item's artist and title. This is useful for preventing false positives when fetching lyrics. +* :doc:`plugins/lyrics`: Rewrite lyrics translation functionality to use Azure + AI Translator API and add relevant instructions to the documentation. Bug fixes: diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index f034cf47a..3ef7ab89b 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -38,9 +38,10 @@ Default configuration: lyrics: auto: yes - bing_client_secret: null - bing_lang_from: [] - bing_lang_to: null + translate: + api_key: + from_languages: [] + to_language: dist_thresh: 0.11 fallback: null force: no @@ -52,12 +53,14 @@ Default configuration: The available options are: - **auto**: Fetch lyrics automatically during import. -- **bing_client_secret**: Your Bing Translation application password - (see :ref:`lyrics-translation`) -- **bing_lang_from**: By default all lyrics with a language other than - ``bing_lang_to`` are translated. Use a list of lang codes to restrict the set - of source languages to translate. -- **bing_lang_to**: Language to translate lyrics into. +- **translate**: + + - **api_key**: Api key to access your Azure Translator resource. (see + :ref:`lyrics-translation`) + - **from_languages**: By default all lyrics with a language other than + ``translate_to`` are translated. Use a list of language codes to restrict + them. + - **to_language**: Language code to translate lyrics to. - **dist_thresh**: The maximum distance between the artist and title combination of the music file and lyrics candidate to consider them a match. Lower values will make the plugin more strict, higher values will make it @@ -165,10 +168,28 @@ After that, the lyrics plugin will fall back on other declared data sources. Activate On-the-Fly Translation ------------------------------- -You need to register for a Microsoft Azure Marketplace free account and -to the `Microsoft Translator API`_. Follow the four steps process, specifically -at step 3 enter ``beets`` as *Client ID* and copy/paste the generated -*Client secret* into your ``bing_client_secret`` configuration, alongside -``bing_lang_to`` target ``language code``. +We use Azure to optionally translate your lyrics. To set up the integration, +follow these steps: -.. _Microsoft Translator API: https://docs.microsoft.com/en-us/azure/cognitive-services/translator/translator-how-to-signup +1. `Create a Translator resource`_ on Azure. +2. `Obtain its API key`_. +3. Add the API key to your configuration as ``translate.api_key``. +4. Configure your target language using the ``translate.to_language`` option. + + +For example, with the following configuration + +.. code-block:: yaml + + lyrics: + translate: + api_key: YOUR_TRANSLATOR_API_KEY + to_language: de + +You should expect lyrics like this:: + + Original verse / Ursprünglicher Vers + Some other verse / Ein anderer Vers + +.. _create a Translator resource: https://learn.microsoft.com/en-us/azure/ai-services/translator/create-translator-resource +.. _obtain its API key: https://learn.microsoft.com/en-us/python/api/overview/azure/ai-translation-text-readme?view=azure-python&preserve-view=true#get-an-api-key diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index a3c640109..a1591aa24 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -17,6 +17,7 @@ import importlib.util import os import re +import textwrap from functools import partial from http import HTTPStatus @@ -509,3 +510,87 @@ class TestLRCLibLyrics(LyricsBackendTest): lyrics, _ = fetch_lyrics() assert lyrics == expected_lyrics + + +class TestTranslation: + @pytest.fixture(autouse=True) + def _patch_bing(self, requests_mock): + def callback(request, _): + if b"Refrain" in request.body: + translations = ( + "" + "|[Refrain : Doja Cat]" + "|Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501 + "|Mon corps ne me laissait pas le cacher (Cachez-le)" + "|Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501 + "|Chevauchant à travers le tonnerre, la foudre" + ) + elif b"00:00.00" in request.body: + translations = ( + "" + "|[00:00.00] Quelques paroles synchronisées" + "|[00:01.00] Quelques paroles plus synchronisées" + ) + else: + translations = ( + "" + "|Quelques paroles synchronisées" + "|Quelques paroles plus synchronisées" + ) + + return [ + { + "detectedLanguage": {"language": "en", "score": 1.0}, + "translations": [{"text": translations, "to": "fr"}], + } + ] + + requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback) + + @pytest.mark.parametrize( + "initial_lyrics, expected", + [ + pytest.param( + """ + [Refrain: Doja Cat] + Hard for me to let you go (Let you go, let you go) + My body wouldn't let me hide it (Hide it) + No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) + Ridin' through the thunder, lightnin'""", + """ + [Refrain: Doja Cat] / [Refrain : Doja Cat] + Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir) + My body wouldn't let me hide it (Hide it) / Mon corps ne me laissait pas le cacher (Cachez-le) + No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) / Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas) + Ridin' through the thunder, lightnin' / Chevauchant à travers le tonnerre, la foudre""", # noqa: E501 + id="plain", + ), + pytest.param( + """ + [00:00.00] Some synced lyrics + [00:00:50] + [00:01.00] Some more synced lyrics + + Source: https://lrclib.net/api/123""", + """ + [00:00.00] Some synced lyrics / Quelques paroles synchronisées + [00:00:50] + [00:01.00] Some more synced lyrics / Quelques paroles plus synchronisées + + Source: https://lrclib.net/api/123""", # noqa: E501 + id="synced", + ), + pytest.param( + "Quelques paroles", + "Quelques paroles", + id="already in the target language", + ), + ], + ) + def test_translate(self, initial_lyrics, expected): + plugin = lyrics.LyricsPlugin() + bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"]) + + assert bing.translate( + textwrap.dedent(initial_lyrics) + ) == textwrap.dedent(expected) From c95156adcd656a1ac1a3c34428b2978e0cc55d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 14 Oct 2024 00:04:50 +0100 Subject: [PATCH 40/48] Refactor writing rest files --- beetsplug/lyrics.py | 233 ++++++++++++++++-------------------- docs/plugins/lyrics.rst | 11 +- test/plugins/test_lyrics.py | 49 ++++++++ 3 files changed, 158 insertions(+), 135 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 9b7f39e5b..7e09cc0fe 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -17,16 +17,17 @@ from __future__ import annotations import atexit -import errno import itertools import math -import os.path import re +import textwrap from contextlib import contextmanager, suppress from dataclasses import dataclass from functools import cached_property, partial, total_ordering from html import unescape from http import HTTPStatus +from itertools import groupby +from pathlib import Path from typing import TYPE_CHECKING, Iterable, Iterator, NamedTuple from urllib.parse import quote, quote_plus, urlencode, urlparse @@ -56,41 +57,6 @@ if TYPE_CHECKING: USER_AGENT = f"beets/{beets.__version__}" INSTRUMENTAL_LYRICS = "[Instrumental]" -# The content for the base index.rst generated in ReST mode. -REST_INDEX_TEMPLATE = """Lyrics -====== - -* :ref:`Song index ` -* :ref:`search` - -Artist index: - -.. toctree:: - :maxdepth: 1 - :glob: - - artists/* -""" - -# The content for the base conf.py generated. -REST_CONF_TEMPLATE = """# -*- coding: utf-8 -*- -master_doc = 'index' -project = 'Lyrics' -copyright = 'none' -author = 'Various Authors' -latex_documents = [ - (master_doc, 'Lyrics.tex', project, - author, 'manual'), -] -epub_title = project -epub_author = author -epub_publisher = author -epub_copyright = copyright -epub_exclude_files = ['search.html'] -epub_tocdepth = 1 -epub_tocdup = False -""" - class NotFoundError(requests.exceptions.HTTPError): pass @@ -865,6 +831,97 @@ class Translator(RequestHandler): return "\n\nSource: ".join(["\n".join(translated_lines), *url]) +@dataclass +class RestFiles: + # The content for the base index.rst generated in ReST mode. + REST_INDEX_TEMPLATE = textwrap.dedent(""" + Lyrics + ====== + + * :ref:`Song index ` + * :ref:`search` + + Artist index: + + .. toctree:: + :maxdepth: 1 + :glob: + + artists/* + """).strip() + + # The content for the base conf.py generated. + REST_CONF_TEMPLATE = textwrap.dedent(""" + master_doc = "index" + project = "Lyrics" + copyright = "none" + author = "Various Authors" + latex_documents = [ + (master_doc, "Lyrics.tex", project, author, "manual"), + ] + epub_exclude_files = ["search.html"] + epub_tocdepth = 1 + epub_tocdup = False + """).strip() + + directory: Path + + @cached_property + def artists_dir(self) -> Path: + dir = self.directory / "artists" + dir.mkdir(parents=True, exist_ok=True) + return dir + + def write_indexes(self) -> None: + """Write conf.py and index.rst files necessary for Sphinx + + We write minimal configurations that are necessary for Sphinx + to operate. We do not overwrite existing files so that + customizations are respected.""" + index_file = self.directory / "index.rst" + if not index_file.exists(): + index_file.write_text(self.REST_INDEX_TEMPLATE) + conf_file = self.directory / "conf.py" + if not conf_file.exists(): + conf_file.write_text(self.REST_CONF_TEMPLATE) + + def write_artist(self, artist: str, items: Iterable[Item]) -> None: + parts = [ + f'{artist}\n{"=" * len(artist)}', + ".. contents::\n :local:", + ] + for album, items in groupby(items, key=lambda i: i.album): + parts.append(f'{album}\n{"-" * len(album)}') + parts.extend( + part + for i in items + if (title := f":index:`{i.title.strip()}`") + for part in ( + f'{title}\n{"~" * len(title)}', + textwrap.indent(i.lyrics, "| "), + ) + ) + file = self.artists_dir / f"{slug(artist)}.rst" + file.write_text("\n\n".join(parts).strip()) + + def write(self, items: list[Item]) -> None: + self.directory.mkdir(exist_ok=True, parents=True) + self.write_indexes() + + items.sort(key=lambda i: i.albumartist) + for artist, artist_items in groupby(items, key=lambda i: i.albumartist): + self.write_artist(artist.strip(), artist_items) + + d = self.directory + text = f""" + ReST files generated. to build, use one of: + sphinx-build -b html {d} {d/"html"} + sphinx-build -b epub {d} {d/"epub"} + sphinx-build -b latex {d} {d/"latex"} && make -C {d/"latex"} all-pdf + """ + ui.print_(textwrap.dedent(text)) + + class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): BACKEND_BY_NAME = { b.name: b for b in [LRCLib, Google, Genius, Tekstowo, MusiXmatch] @@ -922,15 +979,6 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): self.config["google_engine_ID"].redact = True self.config["genius_api_key"].redact = True - # State information for the ReST writer. - # First, the current artist we're writing. - self.artist = "Unknown artist" - # The current album: False means no album yet. - self.album = False - # The current rest file content. None means the file is not - # open yet. - self.rest = None - def commands(self): cmd = ui.Subcommand("lyrics", help="fetch song lyrics") cmd.parser.add_option( @@ -944,7 +992,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): cmd.parser.add_option( "-r", "--write-rest", - dest="writerest", + dest="rest_directory", action="store", default=None, metavar="dir", @@ -970,99 +1018,26 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): def func(lib, opts, args): # The "write to files" option corresponds to the # import_write config value. - write = ui.should_write() - if opts.writerest: - self.writerest_indexes(opts.writerest) - items = lib.items(ui.decargs(args)) + items = list(lib.items(ui.decargs(args))) for item in items: if not opts.local_only and not self.config["local"]: self.fetch_item_lyrics( - item, write, opts.force_refetch or self.config["force"] + item, + ui.should_write(), + opts.force_refetch or self.config["force"], ) if item.lyrics: if opts.printlyr: ui.print_(item.lyrics) - if opts.writerest: - self.appendrest(opts.writerest, item) - if opts.writerest and items: - # flush last artist & write to ReST - self.writerest(opts.writerest) - ui.print_("ReST files generated. to build, use one of:") - ui.print_( - " sphinx-build -b html %s _build/html" % opts.writerest - ) - ui.print_( - " sphinx-build -b epub %s _build/epub" % opts.writerest - ) - ui.print_( - ( - " sphinx-build -b latex %s _build/latex " - "&& make -C _build/latex all-pdf" - ) - % opts.writerest - ) + + if opts.rest_directory and ( + items := [i for i in items if i.lyrics] + ): + RestFiles(Path(opts.rest_directory)).write(items) cmd.func = func return [cmd] - def appendrest(self, directory, item): - """Append the item to an ReST file - - This will keep state (in the `rest` variable) in order to avoid - writing continuously to the same files. - """ - - if slug(self.artist) != slug(item.albumartist): - # Write current file and start a new one ~ item.albumartist - self.writerest(directory) - self.artist = item.albumartist.strip() - self.rest = "%s\n%s\n\n.. contents::\n :local:\n\n" % ( - self.artist, - "=" * len(self.artist), - ) - - if self.album != item.album: - tmpalbum = self.album = item.album.strip() - if self.album == "": - tmpalbum = "Unknown album" - self.rest += "{}\n{}\n\n".format(tmpalbum, "-" * len(tmpalbum)) - title_str = ":index:`%s`" % item.title.strip() - block = "| " + item.lyrics.replace("\n", "\n| ") - self.rest += "{}\n{}\n\n{}\n\n".format( - title_str, "~" * len(title_str), block - ) - - def writerest(self, directory): - """Write self.rest to a ReST file""" - if self.rest is not None and self.artist is not None: - path = os.path.join( - directory, "artists", slug(self.artist) + ".rst" - ) - with open(path, "wb") as output: - output.write(self.rest.encode("utf-8")) - - def writerest_indexes(self, directory): - """Write conf.py and index.rst files necessary for Sphinx - - We write minimal configurations that are necessary for Sphinx - to operate. We do not overwrite existing files so that - customizations are respected.""" - try: - os.makedirs(os.path.join(directory, "artists")) - except OSError as e: - if e.errno == errno.EEXIST: - pass - else: - raise - indexfile = os.path.join(directory, "index.rst") - if not os.path.exists(indexfile): - with open(indexfile, "w") as output: - output.write(REST_INDEX_TEMPLATE) - conffile = os.path.join(directory, "conf.py") - if not os.path.exists(conffile): - with open(conffile, "w") as output: - output.write(REST_CONF_TEMPLATE) - def imported(self, _, task: ImportTask) -> None: """Import hook for fetching lyrics automatically.""" if self.config["auto"]: diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index 3ef7ab89b..15ddea450 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -107,9 +107,8 @@ Rendering Lyrics into Other Formats ----------------------------------- The ``-r directory, --write-rest directory`` option renders all lyrics as -`reStructuredText`_ (ReST) documents in ``directory`` (by default, the current -directory). That directory, in turn, can be parsed by tools like `Sphinx`_ to -generate HTML, ePUB, or PDF documents. +`reStructuredText`_ (ReST) documents in ``directory``. That directory, in turn, +can be parsed by tools like `Sphinx`_ to generate HTML, ePUB, or PDF documents. Minimal ``conf.py`` and ``index.rst`` files are created the first time the command is run. They are not overwritten on subsequent runs, so you can safely @@ -122,19 +121,19 @@ Sphinx supports various `builders`_, see a few suggestions: :: - sphinx-build -b html . _build/html + sphinx-build -b html /html .. admonition:: Build an ePUB3 formatted file, usable on ebook readers :: - sphinx-build -b epub3 . _build/epub + sphinx-build -b epub3 /epub .. admonition:: Build a PDF file, which incidentally also builds a LaTeX file :: - sphinx-build -b latex %s _build/latex && make -C _build/latex all-pdf + sphinx-build -b latex /latex && make -C /latex all-pdf .. _Sphinx: https://www.sphinx-doc.org/ diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index a1591aa24..39d088860 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -20,6 +20,7 @@ import re import textwrap from functools import partial from http import HTTPStatus +from pathlib import Path import pytest @@ -594,3 +595,51 @@ class TestTranslation: assert bing.translate( textwrap.dedent(initial_lyrics) ) == textwrap.dedent(expected) + + +class TestRestFiles: + @pytest.fixture + def rest_dir(self, tmp_path): + return tmp_path + + @pytest.fixture + def rest_files(self, rest_dir): + return lyrics.RestFiles(rest_dir) + + def test_write(self, rest_dir: Path, rest_files): + items = [ + Item(albumartist=aa, album=a, title=t, lyrics=lyr) + for aa, a, t, lyr in [ + ("Artist One", "Album One", "Song One", "Lyrics One"), + ("Artist One", "Album One", "Song Two", "Lyrics Two"), + ("Artist Two", "Album Two", "Song Three", "Lyrics Three"), + ] + ] + + rest_files.write(items) + + assert (rest_dir / "index.rst").exists() + assert (rest_dir / "conf.py").exists() + + artist_one_file = rest_dir / "artists" / "artist-one.rst" + artist_two_file = rest_dir / "artists" / "artist-two.rst" + assert artist_one_file.exists() + assert artist_two_file.exists() + + c = artist_one_file.read_text() + assert ( + c.index("Artist One") + < c.index("Album One") + < c.index("Song One") + < c.index("Lyrics One") + < c.index("Song Two") + < c.index("Lyrics Two") + ) + + c = artist_two_file.read_text() + assert ( + c.index("Artist Two") + < c.index("Album Two") + < c.index("Song Three") + < c.index("Lyrics Three") + ) From 7893766e4cc02fe5965cfacb07f44400ec9c478b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Wed, 23 Oct 2024 13:06:21 +0100 Subject: [PATCH 41/48] Improve flags structure and add tests --- beetsplug/lyrics.py | 73 ++++++++++++++++++------------------- docs/plugins/lyrics.rst | 2 + test/plugins/test_lyrics.py | 40 ++++++++++++++++++-- 3 files changed, 74 insertions(+), 41 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 7e09cc0fe..21a164b8d 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -44,7 +44,7 @@ if TYPE_CHECKING: from logging import Logger from beets.importer import ImportTask - from beets.library import Item + from beets.library import Item, Library from ._typing import ( GeniusAPI, @@ -947,7 +947,6 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): def __init__(self): super().__init__() - self.import_stages = [self.imported] self.config.add( { "auto": True, @@ -966,6 +965,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): "fallback": None, "force": False, "local": False, + "print": False, "synced": False, # Musixmatch is disabled by default as they are currently blocking # requests with the beets user agent. @@ -979,14 +979,16 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): self.config["google_engine_ID"].redact = True self.config["genius_api_key"].redact = True + if self.config["auto"]: + self.import_stages = [self.imported] + def commands(self): cmd = ui.Subcommand("lyrics", help="fetch song lyrics") cmd.parser.add_option( "-p", "--print", - dest="printlyr", action="store_true", - default=False, + default=self.config["print"].get(), help="print lyrics to console", ) cmd.parser.add_option( @@ -1001,34 +1003,27 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): cmd.parser.add_option( "-f", "--force", - dest="force_refetch", action="store_true", - default=False, + default=self.config["force"].get(), help="always re-download lyrics", ) cmd.parser.add_option( "-l", "--local", - dest="local_only", action="store_true", - default=False, + default=self.config["local"].get(), help="do not fetch missing lyrics", ) - def func(lib, opts, args): + def func(lib: Library, opts, args) -> None: # The "write to files" option corresponds to the # import_write config value. - items = list(lib.items(ui.decargs(args))) + self.config.set(vars(opts)) + items = list(lib.items(args)) for item in items: - if not opts.local_only and not self.config["local"]: - self.fetch_item_lyrics( - item, - ui.should_write(), - opts.force_refetch or self.config["force"], - ) - if item.lyrics: - if opts.printlyr: - ui.print_(item.lyrics) + self.add_item_lyrics(item, ui.should_write()) + if item.lyrics and opts.print: + ui.print_(item.lyrics) if opts.rest_directory and ( items := [i for i in items if i.lyrics] @@ -1040,32 +1035,34 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): def imported(self, _, task: ImportTask) -> None: """Import hook for fetching lyrics automatically.""" - if self.config["auto"]: - for item in task.imported_items(): - self.fetch_item_lyrics(item, False, self.config["force"]) + for item in task.imported_items(): + self.add_item_lyrics(item, False) - def fetch_item_lyrics(self, item: Item, write: bool, force: bool) -> None: + def find_lyrics(self, item: Item) -> str: + album, length = item.album, round(item.length) + matches = ( + [ + lyrics + for t in titles + if (lyrics := self.get_lyrics(a, t, album, length)) + ] + for a, titles in search_pairs(item) + ) + + return "\n\n---\n\n".join(next(filter(None, matches), [])) + + def add_item_lyrics(self, item: Item, write: bool) -> None: """Fetch and store lyrics for a single item. If ``write``, then the lyrics will also be written to the file itself. """ - # Skip if the item already has lyrics. - if not force and item.lyrics: + if self.config["local"]: + return + + if not self.config["force"] and item.lyrics: self.info("🔵 Lyrics already present: {}", item) return - lyrics_matches = [] - album, length = item.album, round(item.length) - for artist, titles in search_pairs(item): - lyrics_matches = [ - self.get_lyrics(artist, title, album, length) - for title in titles - ] - if any(lyrics_matches): - break - - lyrics = "\n\n---\n\n".join(filter(None, lyrics_matches)) - - if lyrics: + if lyrics := self.find_lyrics(item): self.info("🟢 Found lyrics: {0}", item) if translator := self.translator: initial_lyrics = lyrics diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index 15ddea450..a20f97faf 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -47,6 +47,7 @@ Default configuration: force: no google_API_key: null google_engine_ID: 009217259823014548361:lndtuqkycfu + print: no sources: [lrclib, google, genius, tekstowo] synced: no @@ -75,6 +76,7 @@ The available options are: - **google_engine_ID**: The custom search engine to use. Default: The `beets custom search engine`_, which gathers an updated list of sources known to be scrapeable. +- **print**: Print lyrics to the console. - **sources**: List of sources to search for lyrics. An asterisk ``*`` expands to all available sources. The ``google`` source will be automatically deactivated if no ``google_API_key`` is setup. diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 39d088860..55fc3a30a 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -25,7 +25,7 @@ from pathlib import Path import pytest from beets.library import Item -from beets.test.helper import PluginMixin +from beets.test.helper import PluginMixin, TestHelper from beetsplug import lyrics from .lyrics_pages import LyricsPage, lyrics_pages @@ -42,6 +42,14 @@ PHRASE_BY_TITLE = { } +@pytest.fixture(scope="module") +def helper(): + helper = TestHelper() + helper.setup_beets() + yield helper + helper.teardown_beets() + + class TestLyricsUtils: @pytest.mark.parametrize( "artist, title", @@ -240,6 +248,27 @@ class TestLyricsPlugin(LyricsPluginMixin): assert last_log assert re.search(expected_log_match, last_log, re.I) + @pytest.mark.parametrize( + "plugin_config, found, expected", + [ + ({}, "new", "old"), + ({"force": True}, "new", "new"), + ({"force": True, "local": True}, "new", "old"), + ({"force": True, "fallback": None}, "", "old"), + ({"force": True, "fallback": ""}, "", ""), + ({"force": True, "fallback": "default"}, "", "default"), + ], + ) + def test_overwrite_config( + self, monkeypatch, helper, lyrics_plugin, found, expected + ): + monkeypatch.setattr(lyrics_plugin, "find_lyrics", lambda _: found) + item = helper.create_item(id=1, lyrics="old") + + lyrics_plugin.add_item_lyrics(item, False) + + assert item.lyrics == expected + class LyricsBackendTest(LyricsPluginMixin): @pytest.fixture @@ -289,8 +318,13 @@ class TestLyricsSources(LyricsBackendTest): def test_backend_source(self, lyrics_plugin, lyrics_page: LyricsPage): """Test parsed lyrics from each of the configured lyrics pages.""" - lyrics_info = lyrics_plugin.get_lyrics( - lyrics_page.artist, lyrics_page.track_title, "", 186 + lyrics_info = lyrics_plugin.find_lyrics( + Item( + artist=lyrics_page.artist, + title=lyrics_page.track_title, + album="", + length=186.0, + ) ) assert lyrics_info From 43032f7bc7be2eccc71be31ccd0c122a04d7b122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sat, 26 Oct 2024 14:50:22 +0100 Subject: [PATCH 42/48] translations: make sure we do not re-translate --- beetsplug/lyrics.py | 42 ++++++++++++++++++++++++++----------- test/plugins/test_lyrics.py | 15 ++++++++++--- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 21a164b8d..d7a29050e 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -744,6 +744,7 @@ class Google(SearchBackend): class Translator(RequestHandler): TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate" LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") + remove_translations = partial(re.compile(r" / [^\n]+").sub, "") _log: Logger api_key: str @@ -811,23 +812,45 @@ class Translator(RequestHandler): # only add the space between non-empty timestamps and texts return [" ".join(filter(None, p)) for p in zip(timestamps, texts)] - def translate(self, lyrics: str) -> str: + def translate(self, new_lyrics: str, old_lyrics: str) -> str: """Translate the given lyrics to the target language. + Check old lyrics for existing translations and return them if their + original text matches the new lyrics. This is to avoid translating + the same lyrics multiple times. + If the lyrics are already in the target language or not in any of of the source languages (if configured), they are returned as is. The footer with the source URL is preserved, if present. """ - lyrics_language = langdetect.detect(lyrics).upper() - if lyrics_language == self.to_language or ( - self.from_languages and lyrics_language not in self.from_languages + if ( + " / " in old_lyrics + and self.remove_translations(old_lyrics) == new_lyrics ): - return lyrics + self.info("🔵 Translations already exist") + return old_lyrics - lyrics, *url = lyrics.split("\n\nSource: ") + lyrics_language = langdetect.detect(new_lyrics).upper() + if lyrics_language == self.to_language: + self.info( + "🔵 Lyrics are already in the target language {}", + self.to_language, + ) + return new_lyrics + + if self.from_languages and lyrics_language not in self.from_languages: + self.info( + "🔵 Configuration {} does not permit translating from {}", + self.from_languages, + lyrics_language, + ) + return new_lyrics + + lyrics, *url = new_lyrics.split("\n\nSource: ") with self.handle_request(): translated_lines = self.append_translations(lyrics.splitlines()) + self.info("🟢 Translated lyrics to {}", self.to_language) return "\n\nSource: ".join(["\n".join(translated_lines), *url]) @@ -1065,12 +1088,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): if lyrics := self.find_lyrics(item): self.info("🟢 Found lyrics: {0}", item) if translator := self.translator: - initial_lyrics = lyrics - if (lyrics := translator.translate(lyrics)) != initial_lyrics: - self.info( - "🟢 Added translation to {}", - self.config["translate_to"].get().upper(), - ) + lyrics = translator.translate(lyrics, item.lyrics) else: self.info("🔴 Lyrics not found: {}", item) lyrics = self.config["fallback"].get() diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 55fc3a30a..c4daa17ba 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -583,7 +583,7 @@ class TestTranslation: requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback) @pytest.mark.parametrize( - "initial_lyrics, expected", + "new_lyrics, old_lyrics, expected", [ pytest.param( """ @@ -592,6 +592,7 @@ class TestTranslation: My body wouldn't let me hide it (Hide it) No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) Ridin' through the thunder, lightnin'""", + "", """ [Refrain: Doja Cat] / [Refrain : Doja Cat] Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir) @@ -607,6 +608,7 @@ class TestTranslation: [00:01.00] Some more synced lyrics Source: https://lrclib.net/api/123""", + "", """ [00:00.00] Some synced lyrics / Quelques paroles synchronisées [00:00:50] @@ -617,17 +619,24 @@ class TestTranslation: ), pytest.param( "Quelques paroles", + "", "Quelques paroles", id="already in the target language", ), + pytest.param( + "Some lyrics", + "Some lyrics / Some translation", + "Some lyrics / Some translation", + id="already translated", + ), ], ) - def test_translate(self, initial_lyrics, expected): + def test_translate(self, new_lyrics, old_lyrics, expected): plugin = lyrics.LyricsPlugin() bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"]) assert bing.translate( - textwrap.dedent(initial_lyrics) + textwrap.dedent(new_lyrics), old_lyrics ) == textwrap.dedent(expected) From b713d72612f752becd6c9834b11e5fdec2654df6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 13 Jan 2025 01:48:29 +0000 Subject: [PATCH 43/48] translations: use a more distinctive separator I found that the translator would sometimes replace the pipe character with another symbol (maybe it got confused thinking the character is part of the text?). Added spaces around the pipe to make it more clear that it's definitely the separator. --- beetsplug/lyrics.py | 7 +++++-- test/plugins/test_lyrics.py | 18 +++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index d7a29050e..cb48e2424 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -744,6 +744,7 @@ class Google(SearchBackend): class Translator(RequestHandler): TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate" LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") + SEPARATOR = " | " remove_translations = partial(re.compile(r" / [^\n]+").sub, "") _log: Logger @@ -773,14 +774,16 @@ class Translator(RequestHandler): map the translations back to the original texts. """ unique_texts = list(dict.fromkeys(texts)) + text = self.SEPARATOR.join(unique_texts) data: list[TranslatorAPI.Response] = self.post_json( self.TRANSLATE_URL, headers={"Ocp-Apim-Subscription-Key": self.api_key}, - json=[{"text": "|".join(unique_texts)}], + json=[{"text": text}], params={"api-version": "3.0", "to": self.to_language}, ) - translations = data[0]["translations"][0]["text"].split("|") + translated_text = data[0]["translations"][0]["text"] + translations = translated_text.split(self.SEPARATOR) trans_by_text = dict(zip(unique_texts, translations)) return list(zip(texts, (trans_by_text.get(t, "") for t in texts))) diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index c4daa17ba..74e727099 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -554,23 +554,23 @@ class TestTranslation: if b"Refrain" in request.body: translations = ( "" - "|[Refrain : Doja Cat]" - "|Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501 - "|Mon corps ne me laissait pas le cacher (Cachez-le)" - "|Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501 - "|Chevauchant à travers le tonnerre, la foudre" + " | [Refrain : Doja Cat]" + " | Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501 + " | Mon corps ne me laissait pas le cacher (Cachez-le)" + " | Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501 + " | Chevauchant à travers le tonnerre, la foudre" ) elif b"00:00.00" in request.body: translations = ( "" - "|[00:00.00] Quelques paroles synchronisées" - "|[00:01.00] Quelques paroles plus synchronisées" + " | [00:00.00] Quelques paroles synchronisées" + " | [00:01.00] Quelques paroles plus synchronisées" ) else: translations = ( "" - "|Quelques paroles synchronisées" - "|Quelques paroles plus synchronisées" + " | Quelques paroles synchronisées" + " | Quelques paroles plus synchronisées" ) return [ From 4da72cb5f8622e97984199209b7126f5907f53d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 10 Feb 2025 02:09:14 +0000 Subject: [PATCH 44/48] Ignore _typing.py coverage --- setup.cfg | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ba580f2c9..3e2fee46a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,7 +15,9 @@ markers = data_file = .reports/coverage/data branch = true relative_files = true -omit = beets/test/* +omit = + beets/test/* + beetsplug/_typing.py [coverage:report] precision = 2 From 5c8f1c1ee578982c1b334eb5e0f735afd28c7066 Mon Sep 17 00:00:00 2001 From: Pierre Ayoub Date: Thu, 20 Feb 2025 17:23:14 +0100 Subject: [PATCH 45/48] Fix convert plugin attempting to process a non-media file (#5261) ## Description My library is managed using Beets for organization and [git-annex](https://git-annex.branchable.com/) as storage backend. Therefore when using this system, while my library files always exists on my filesystem, some files may be empty (without content). In this case, when I'm running the `convert` plugin, I don't wants it to process files which are empty (same apply for any Beets plugin). Hence, I added a check that the file is readable as a `MediaFile` before doing any process. Before this fix, trying to encode an empty file would have lead to an error while leaving `convert` doing its side-effects **and** `convert` would also copy empty files to destination for files that doesn't need to be re-encoded. In my case, this is empty files, but the problem can be anything else (depending on the storage backend) and/or corrupted files. Conclusion, I think **checking that the file is readable is always recommended before proceeding to heavy operation** like this. --- beetsplug/convert.py | 10 ++++++++++ docs/changelog.rst | 2 ++ 2 files changed, 12 insertions(+) diff --git a/beetsplug/convert.py b/beetsplug/convert.py index 536acf16e..0c50fc73f 100644 --- a/beetsplug/convert.py +++ b/beetsplug/convert.py @@ -22,6 +22,7 @@ import tempfile import threading from string import Template +import mediafile from confuse import ConfigTypeError, Optional from beets import art, config, plugins, ui, util @@ -351,6 +352,15 @@ class ConvertPlugin(BeetsPlugin): item = yield (item, original, converted) dest = item.destination(basedir=dest_dir, path_formats=path_formats) + # Ensure that desired item is readable before processing it. Needed + # to avoid any side-effect of the conversion (linking, keep_new, + # refresh) if we already know that it will fail. + try: + mediafile.MediaFile(util.syspath(item.path)) + except mediafile.UnreadableFileError as exc: + self._log.error("Could not open file to convert: {0}", exc) + continue + # When keeping the new file in the library, we first move the # current (pristine) file to the destination. We'll then copy it # back to its old path or transcode it to a new path. diff --git a/docs/changelog.rst b/docs/changelog.rst index 43cdd1255..c5b26c2db 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -519,6 +519,8 @@ Bug fixes: `requests` timeout. * Fix cover art resizing logic to support multiple steps of resizing :bug:`5151` +* :doc:`/plugins/convert`: Fix attempt to convert and perform side-effects if + library file is not readable. For plugin developers: From 5c1817c780eb8886d359c73ddd79206adb42412e Mon Sep 17 00:00:00 2001 From: Jack Wilsdon Date: Thu, 27 Feb 2025 00:54:57 +0000 Subject: [PATCH 46/48] Fix IMBackend#compare on ImageMagick 7.1.1-44 --- beets/util/artresizer.py | 5 +++++ docs/changelog.rst | 1 + test/plugins/test_embedart.py | 18 +++++++++--------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/beets/util/artresizer.py b/beets/util/artresizer.py index 09cc29e0d..ffbc2edba 100644 --- a/beets/util/artresizer.py +++ b/beets/util/artresizer.py @@ -314,6 +314,11 @@ class IMBackend(LocalBackend): else: out_str = stdout + # ImageMagick 7.1.1-44 outputs in a different format. + if b"(" in out_str and out_str.endswith(b")"): + # Extract diff from "... (diff)". + out_str = out_str[out_str.index(b"(") + 1 : -1] + try: phash_diff = float(out_str) except ValueError: diff --git a/docs/changelog.rst b/docs/changelog.rst index c5b26c2db..ae2447081 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -75,6 +75,7 @@ Bug fixes: * :doc:`plugins/lyrics`: Fix plugin crash when ``genius`` backend returns empty lyrics. :bug:`5583` +* ImageMagick 7.1.1-44 is now supported. For packagers: diff --git a/test/plugins/test_embedart.py b/test/plugins/test_embedart.py index 14bfdf522..2d2d68153 100644 --- a/test/plugins/test_embedart.py +++ b/test/plugins/test_embedart.py @@ -285,8 +285,8 @@ class ArtSimilarityTest(unittest.TestCase): mock_extract, mock_subprocess, compare_status=0, - compare_stdout="", - compare_stderr="", + compare_stdout=b"", + compare_stderr=b"", convert_status=0, ): mock_extract.return_value = b"extracted_path" @@ -298,33 +298,33 @@ class ArtSimilarityTest(unittest.TestCase): ] def test_compare_success_similar(self, mock_extract, mock_subprocess): - self._mock_popens(mock_extract, mock_subprocess, 0, "10", "err") + self._mock_popens(mock_extract, mock_subprocess, 0, b"10", b"err") assert self._similarity(20) def test_compare_success_different(self, mock_extract, mock_subprocess): - self._mock_popens(mock_extract, mock_subprocess, 0, "10", "err") + self._mock_popens(mock_extract, mock_subprocess, 0, b"10", b"err") assert not self._similarity(5) def test_compare_status1_similar(self, mock_extract, mock_subprocess): - self._mock_popens(mock_extract, mock_subprocess, 1, "out", "10") + self._mock_popens(mock_extract, mock_subprocess, 1, b"out", b"10") assert self._similarity(20) def test_compare_status1_different(self, mock_extract, mock_subprocess): - self._mock_popens(mock_extract, mock_subprocess, 1, "out", "10") + self._mock_popens(mock_extract, mock_subprocess, 1, b"out", b"10") assert not self._similarity(5) def test_compare_failed(self, mock_extract, mock_subprocess): - self._mock_popens(mock_extract, mock_subprocess, 2, "out", "10") + self._mock_popens(mock_extract, mock_subprocess, 2, b"out", b"10") assert self._similarity(20) is None def test_compare_parsing_error(self, mock_extract, mock_subprocess): - self._mock_popens(mock_extract, mock_subprocess, 0, "foo", "bar") + self._mock_popens(mock_extract, mock_subprocess, 0, b"foo", b"bar") assert self._similarity(20) is None def test_compare_parsing_error_and_failure( self, mock_extract, mock_subprocess ): - self._mock_popens(mock_extract, mock_subprocess, 1, "foo", "bar") + self._mock_popens(mock_extract, mock_subprocess, 1, b"foo", b"bar") assert self._similarity(20) is None def test_convert_failure(self, mock_extract, mock_subprocess): From b7521f9a0bbd13db13a0e2543814c5e6564786e1 Mon Sep 17 00:00:00 2001 From: Allen <64094914+allendema@users.noreply.github.com> Date: Wed, 12 Mar 2025 08:08:53 +0100 Subject: [PATCH 47/48] fix: plugins/listenbrainz: Fix UnboundLocalError in cases where 'mbid' is not defined (#5651) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix ocurrence of `UnboundLocalError` in plugins/listenbrainz > `get_tracks_from_listens()` when `mbid` is not available. Removed a print statment. Fix link to config.yaml. Fix link to Listenbrainz "get the token" documentation. Co-authored-by: Šarūnas Nejus --- beetsplug/listenbrainz.py | 2 +- docs/changelog.rst | 2 ++ docs/plugins/listenbrainz.rst | 7 ++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/beetsplug/listenbrainz.py b/beetsplug/listenbrainz.py index 1e2912793..37a7920b9 100644 --- a/beetsplug/listenbrainz.py +++ b/beetsplug/listenbrainz.py @@ -110,7 +110,7 @@ class ListenBrainzPlugin(BeetsPlugin): if track["track_metadata"].get("release_name") is None: continue mbid_mapping = track["track_metadata"].get("mbid_mapping", {}) - # print(json.dumps(track, indent=4, sort_keys=True)) + mbid = None if mbid_mapping.get("recording_mbid") is None: # search for the track using title and release mbid = self.get_mb_recording_id(track) diff --git a/docs/changelog.rst b/docs/changelog.rst index ae2447081..88d87e32f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -24,6 +24,8 @@ New features: Bug fixes: +* :doc:`plugins/listenbrainz`: Fix rST formatting for URLs of Listenbrainz API Key documentation and config.yaml. +* :doc:`plugins/listenbrainz`: Fix ``UnboundLocalError`` in cases where 'mbid' is not defined. * :doc:`plugins/fetchart`: Fix fetchart bug where a tempfile could not be deleted due to never being properly closed. :bug:`5521` diff --git a/docs/plugins/listenbrainz.rst b/docs/plugins/listenbrainz.rst index 1be15ae67..037ccd685 100644 --- a/docs/plugins/listenbrainz.rst +++ b/docs/plugins/listenbrainz.rst @@ -8,14 +8,14 @@ The ListenBrainz plugin for beets allows you to interact with the ListenBrainz s Installation ------------ -To enable the ListenBrainz plugin, add the following to your beets configuration file (`config.yaml`): +To enable the ListenBrainz plugin, add the following to your beets configuration file (`config.yaml`_): .. code-block:: yaml plugins: - listenbrainz -You can then configure the plugin by providing your Listenbrainz token (see intructions `here`_`)and username:: +You can then configure the plugin by providing your Listenbrainz token (see intructions `here`_) and username:: listenbrainz: token: TOKEN @@ -28,4 +28,5 @@ Usage Once the plugin is enabled, you can import the listening history using the `lbimport` command in beets. -.. _here: https://listenbrainz.readthedocs.io/en/latest/users/api/index.html#get-the-user-token \ No newline at end of file +.. _here: https://listenbrainz.readthedocs.io/en/latest/users/api/index.html#get-the-user-token +.. _config.yaml: ../reference/config.rst From 670a3bcd17a46883c71cf07dd313fcd0dff4be9d Mon Sep 17 00:00:00 2001 From: Bob Cotton Date: Wed, 12 Mar 2025 01:14:38 -0600 Subject: [PATCH 48/48] Add beets-id3extract to community plugins (#5660) Add link to community plugin beets-id3extract --- docs/plugins/index.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst index 6705344c9..b7998ef19 100644 --- a/docs/plugins/index.rst +++ b/docs/plugins/index.rst @@ -478,6 +478,9 @@ Here are a few of the plugins written by the beets community: `beets-ibroadcast`_ Uploads tracks to the `iBroadcast`_ cloud service. +`beets-id3extract`_ + Maps arbitrary ID3 tags to beets custom fields. + `beets-importreplace`_ Lets you perform regex replacements on incoming metadata. @@ -560,6 +563,7 @@ Here are a few of the plugins written by the beets community: .. _beets-follow: https://github.com/nolsto/beets-follow .. _beets-ibroadcast: https://github.com/ctrueden/beets-ibroadcast .. _iBroadcast: https://ibroadcast.com/ +.. _beets-id3extract: https://github.com/bcotton/beets-id3extract .. _beets-importreplace: https://github.com/edgars-supe/beets-importreplace .. _beets-setlister: https://github.com/tomjaspers/beets-setlister .. _beets-noimport: https://gitlab.com/tiago.dias/beets-noimport