Fix legalize_path types

Background The `_legalize_stage` function was causing issues with Mypy due to inconsistent type usage between the `path` and `extension` parameters. This inconsistency stemmed from the `fragment` parameter influencing the types of these variables. Key issues 1. `path` was defined as `str`, while `extension` was `bytes`. 2. Depending on `fragment`, `extension` could be either `str` or `bytes`. 3. `path` was sometimes converted to `bytes` within `_legalize_stage`. Item.destination` method - The `fragment` parameter determined the output format: - `False`: Returned absolute path as bytes (default) - `True`: Returned path relative to library directory as str Thus - Rename `fragment` parameter to `relative_to_libdir` for clarity - Ensure `Item.destination` returns `bytes` in all cases - Code expecting strings now converts the output to `str` - Use only `str` type in `_legalize_stage` and `_legalize_path` functions - These functions are no longer dependent on `relative_to_libdir`
2026-02-10 17:34:05 +01:00 · 2024-09-14 11:38:40 +01:00 · 2024-09-14 11:38:40 +01:00 · 52951bf719
commit 52951bf719
parent ecdff785f7
6 changed files with 58 additions and 89 deletions
--- a/beets/library.py
+++ b/beets/library.py
@ -1075,19 +1075,17 @@ class Item(LibModel):

    def destination(
        self,
-        fragment=False,
+        relative_to_libdir=False,
        basedir=None,
        platform=None,
        path_formats=None,
        replacements=None,
-    ):
-        """Return the path in the library directory designated for the
-        item (i.e., where the file ought to be).
+    ) -> bytes:
+        """Return the path in the library directory designated for the item
+        (i.e., where the file ought to be).

-        fragment makes this method return just the path fragment underneath
-        the root library directory; the path is also returned as Unicode
-        instead of encoded as a bytestring. basedir can override the library's
-        base directory for the destination.
+        The path is returned as a bytestring. ``basedir`` can override the
+        library's base directory for the destination.
        """
        db = self._check_db()
        platform = platform or sys.platform
@ -1137,14 +1135,13 @@ class Item(LibModel):
            # When zero, try to determine from filesystem.
            maxlen = util.max_filename_length(db.directory)

-        subpath, fellback = util.legalize_path(
+        lib_path_str, fallback = util.legalize_path(
            subpath,
            replacements,
            maxlen,
            os.path.splitext(self.path)[1],
-            fragment,
        )
-        if fellback:
+        if fallback:
            # Print an error message if legalization fell back to
            # default replacements because of the maximum length.
            log.warning(
@ -1153,11 +1150,12 @@ class Item(LibModel):
                "the filename.",
                subpath,
            )
+        lib_path_bytes = util.bytestring_path(lib_path_str)

-        if fragment:
-            return util.as_string(subpath)
-        else:
-            return normpath(os.path.join(basedir, subpath))
+        if relative_to_libdir:
+            return lib_path_bytes
+
+        return normpath(os.path.join(basedir, lib_path_bytes))


 class Album(LibModel):
--- a/beets/util/init.py
+++ b/beets/util/init.py
@ -712,25 +712,18 @@ def truncate_path(path: AnyStr, length: int = MAX_FILENAME_LENGTH) -> AnyStr:


 def _legalize_stage(
-    path: str,
-    replacements: Replacements | None,
-    length: int,
-    extension: str,
-    fragment: bool,
-) -> tuple[BytesOrStr, bool]:
+    path: str, replacements: Replacements | None, length: int, extension: str
+) -> tuple[str, bool]:
    """Perform a single round of path legalization steps
-    (sanitation/replacement, encoding from Unicode to bytes,
-    extension-appending, and truncation). Return the path (Unicode if
-    `fragment` is set, `bytes` otherwise) and whether truncation was
-    required.
+    1. sanitation/replacement
+    2. appending the extension
+    3. truncation.
+
+    Return the path and whether truncation was required.
    """
    # Perform an initial sanitization including user replacements.
    path = sanitize_path(path, replacements)

-    # Encode for the filesystem.
-    if not fragment:
-        path = bytestring_path(path)  # type: ignore
-
    # Preserve extension.
    path += extension.lower()

@ -742,57 +735,41 @@ def _legalize_stage(


 def legalize_path(
-    path: str,
-    replacements: Replacements | None,
-    length: int,
-    extension: bytes,
-    fragment: bool,
-) -> tuple[BytesOrStr, bool]:
-    """Given a path-like Unicode string, produce a legal path. Return
-    the path and a flag indicating whether some replacements had to be
-    ignored (see below).
+    path: str, replacements: Replacements | None, length: int, extension: str
+) -> tuple[str, bool]:
+    """Given a path-like Unicode string, produce a legal path. Return the path
+    and a flag indicating whether some replacements had to be ignored (see
+    below).

-    The legalization process (see `_legalize_stage`) consists of
-    applying the sanitation rules in `replacements`, encoding the string
-    to bytes (unless `fragment` is set), truncating components to
-    `length`, appending the `extension`.
+    This function uses `_legalize_stage` function to legalize the path, see its
+    documentation for the details of what this involves. It is called up to
+    three times in case truncation conflicts with replacements (as can happen
+    when truncation creates whitespace at the end of the string, for example).

-    This function performs up to three calls to `_legalize_stage` in
-    case truncation conflicts with replacements (as can happen when
-    truncation creates whitespace at the end of the string, for
-    example). The limited number of iterations iterations avoids the
-    possibility of an infinite loop of sanitation and truncation
-    operations, which could be caused by replacement rules that make the
-    string longer. The flag returned from this function indicates that
-    the path has to be truncated twice (indicating that replacements
-    made the string longer again after it was truncated); the
-    application should probably log some sort of warning.
+    The limited number of iterations avoids the possibility of an infinite loop
+    of sanitation and truncation operations, which could be caused by
+    replacement rules that make the string longer.
+
+    The flag returned from this function indicates that the path has to be
+    truncated twice (indicating that replacements made the string longer again
+    after it was truncated); the application should probably log some sort of
+    warning.
    """
+    args = length, as_string(extension)

-    if fragment:
-        # Outputting Unicode.
-        extension = extension.decode("utf-8", "ignore")
-
-    first_stage_path, _ = _legalize_stage(
-        path, replacements, length, extension, fragment
+    first_stage, _ = os.path.splitext(
+        _legalize_stage(path, replacements, *args)[0]
    )

-    # Convert back to Unicode with extension removed.
-    first_stage_path, _ = os.path.splitext(displayable_path(first_stage_path))
-
    # Re-sanitize following truncation (including user replacements).
-    second_stage_path, retruncated = _legalize_stage(
-        first_stage_path, replacements, length, extension, fragment
-    )
+    second_stage, truncated = _legalize_stage(first_stage, replacements, *args)

-    # If the path was once again truncated, discard user replacements
+    if not truncated:
+        return second_stage, False
+
+    # If the path was truncated, discard user replacements
    # and run through one last legalization stage.
-    if retruncated:
-        second_stage_path, _ = _legalize_stage(
-            first_stage_path, None, length, extension, fragment
-        )
-
-    return second_stage_path, retruncated
+    return _legalize_stage(first_stage, None, *args)[0], True


 def str2bool(value: str) -> bool:
--- a/beets/vfs.py
+++ b/beets/vfs.py
@ -49,7 +49,7 @@ def libtree(lib):
    """
    root = Node({}, {})
    for item in lib.items():
-        dest = item.destination(fragment=True)
-        parts = util.components(dest)
+        dest = item.destination(relative_to_libdir=True)
+        parts = util.components(util.as_string(dest))
        _insert(root, parts, item.id)
    return root
--- a/beetsplug/bpd/init.py
+++ b/beetsplug/bpd/init.py
@ -33,7 +33,7 @@ import beets.ui
 from beets import dbcore, vfs
 from beets.library import Item
 from beets.plugins import BeetsPlugin
-from beets.util import bluelet
+from beets.util import as_string, bluelet

 if TYPE_CHECKING:
    from beets.dbcore.query import Query
@ -1130,7 +1130,7 @@ class Server(BaseServer):

    def _item_info(self, item):
        info_lines = [
-            "file: " + item.destination(fragment=True),
+            "file: " + as_string(item.destination(relative_to_libdir=True)),
            "Time: " + str(int(item.length)),
            "duration: " + f"{item.length:.3f}",
            "Id: " + str(item.id),
--- a/beetsplug/convert.py
+++ b/beetsplug/convert.py
@ -624,13 +624,7 @@ class ConvertPlugin(BeetsPlugin):
            # strings we get from item.destination to bytes.
            items_paths = [
                os.path.relpath(
-                    util.bytestring_path(
-                        item.destination(
-                            basedir=dest,
-                            path_formats=path_formats,
-                            fragment=False,
-                        )
-                    ),
+                    item.destination(basedir=dest, path_formats=path_formats),
                    pl_dir,
                )
                for item in items
--- a/test/test_library.py
+++ b/test/test_library.py
@ -34,7 +34,7 @@ from beets.library import Album
 from beets.test import _common
 from beets.test._common import item
 from beets.test.helper import BeetsTestCase, ItemInDBTestCase
-from beets.util import bytestring_path, syspath
+from beets.util import as_string, bytestring_path, syspath

 # Shortcut to path normalization.
 np = util.normpath
@ -411,14 +411,14 @@ class DestinationTest(BeetsTestCase):
    def test_unicode_normalized_nfd_on_mac(self):
        instr = unicodedata.normalize("NFC", "caf\xe9")
        self.lib.path_formats = [("default", instr)]
-        dest = self.i.destination(platform="darwin", fragment=True)
-        assert dest == unicodedata.normalize("NFD", instr)
+        dest = self.i.destination(platform="darwin", relative_to_libdir=True)
+        assert as_string(dest) == unicodedata.normalize("NFD", instr)

    def test_unicode_normalized_nfc_on_linux(self):
        instr = unicodedata.normalize("NFD", "caf\xe9")
        self.lib.path_formats = [("default", instr)]
-        dest = self.i.destination(platform="linux", fragment=True)
-        assert dest == unicodedata.normalize("NFC", instr)
+        dest = self.i.destination(platform="linux", relative_to_libdir=True)
+        assert as_string(dest) == unicodedata.normalize("NFC", instr)

    def test_non_mbcs_characters_on_windows(self):
        oldfunc = sys.getfilesystemencoding
@ -436,8 +436,8 @@ class DestinationTest(BeetsTestCase):
    def test_unicode_extension_in_fragment(self):
        self.lib.path_formats = [("default", "foo")]
        self.i.path = util.bytestring_path("bar.caf\xe9")
-        dest = self.i.destination(platform="linux", fragment=True)
-        assert dest == "foo.caf\xe9"
+        dest = self.i.destination(platform="linux", relative_to_libdir=True)
+        assert as_string(dest) == "foo.caf\xe9"

    def test_asciify_and_replace(self):
        config["asciify_paths"] = True