From 52951bf7195fe3130dee92dbd12d6a959e215d1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sat, 14 Sep 2024 11:38:40 +0100 Subject: [PATCH 1/9] Fix legalize_path types Background The `_legalize_stage` function was causing issues with Mypy due to inconsistent type usage between the `path` and `extension` parameters. This inconsistency stemmed from the `fragment` parameter influencing the types of these variables. Key issues 1. `path` was defined as `str`, while `extension` was `bytes`. 2. Depending on `fragment`, `extension` could be either `str` or `bytes`. 3. `path` was sometimes converted to `bytes` within `_legalize_stage`. Item.destination` method - The `fragment` parameter determined the output format: - `False`: Returned absolute path as bytes (default) - `True`: Returned path relative to library directory as str Thus - Rename `fragment` parameter to `relative_to_libdir` for clarity - Ensure `Item.destination` returns `bytes` in all cases - Code expecting strings now converts the output to `str` - Use only `str` type in `_legalize_stage` and `_legalize_path` functions - These functions are no longer dependent on `relative_to_libdir` --- beets/library.py | 28 ++++++------ beets/util/__init__.py | 89 +++++++++++++++------------------------ beets/vfs.py | 4 +- beetsplug/bpd/__init__.py | 4 +- beetsplug/convert.py | 8 +--- test/test_library.py | 14 +++--- 6 files changed, 58 insertions(+), 89 deletions(-) diff --git a/beets/library.py b/beets/library.py index d4ec63200..2f5f31393 100644 --- a/beets/library.py +++ b/beets/library.py @@ -1075,19 +1075,17 @@ class Item(LibModel): def destination( self, - fragment=False, + relative_to_libdir=False, basedir=None, platform=None, path_formats=None, replacements=None, - ): - """Return the path in the library directory designated for the - item (i.e., where the file ought to be). + ) -> bytes: + """Return the path in the library directory designated for the item + (i.e., where the file ought to be). - fragment makes this method return just the path fragment underneath - the root library directory; the path is also returned as Unicode - instead of encoded as a bytestring. basedir can override the library's - base directory for the destination. + The path is returned as a bytestring. ``basedir`` can override the + library's base directory for the destination. """ db = self._check_db() platform = platform or sys.platform @@ -1137,14 +1135,13 @@ class Item(LibModel): # When zero, try to determine from filesystem. maxlen = util.max_filename_length(db.directory) - subpath, fellback = util.legalize_path( + lib_path_str, fallback = util.legalize_path( subpath, replacements, maxlen, os.path.splitext(self.path)[1], - fragment, ) - if fellback: + if fallback: # Print an error message if legalization fell back to # default replacements because of the maximum length. log.warning( @@ -1153,11 +1150,12 @@ class Item(LibModel): "the filename.", subpath, ) + lib_path_bytes = util.bytestring_path(lib_path_str) - if fragment: - return util.as_string(subpath) - else: - return normpath(os.path.join(basedir, subpath)) + if relative_to_libdir: + return lib_path_bytes + + return normpath(os.path.join(basedir, lib_path_bytes)) class Album(LibModel): diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 68dbaee65..dc2bb6089 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -712,25 +712,18 @@ def truncate_path(path: AnyStr, length: int = MAX_FILENAME_LENGTH) -> AnyStr: def _legalize_stage( - path: str, - replacements: Replacements | None, - length: int, - extension: str, - fragment: bool, -) -> tuple[BytesOrStr, bool]: + path: str, replacements: Replacements | None, length: int, extension: str +) -> tuple[str, bool]: """Perform a single round of path legalization steps - (sanitation/replacement, encoding from Unicode to bytes, - extension-appending, and truncation). Return the path (Unicode if - `fragment` is set, `bytes` otherwise) and whether truncation was - required. + 1. sanitation/replacement + 2. appending the extension + 3. truncation. + + Return the path and whether truncation was required. """ # Perform an initial sanitization including user replacements. path = sanitize_path(path, replacements) - # Encode for the filesystem. - if not fragment: - path = bytestring_path(path) # type: ignore - # Preserve extension. path += extension.lower() @@ -742,57 +735,41 @@ def _legalize_stage( def legalize_path( - path: str, - replacements: Replacements | None, - length: int, - extension: bytes, - fragment: bool, -) -> tuple[BytesOrStr, bool]: - """Given a path-like Unicode string, produce a legal path. Return - the path and a flag indicating whether some replacements had to be - ignored (see below). + path: str, replacements: Replacements | None, length: int, extension: str +) -> tuple[str, bool]: + """Given a path-like Unicode string, produce a legal path. Return the path + and a flag indicating whether some replacements had to be ignored (see + below). - The legalization process (see `_legalize_stage`) consists of - applying the sanitation rules in `replacements`, encoding the string - to bytes (unless `fragment` is set), truncating components to - `length`, appending the `extension`. + This function uses `_legalize_stage` function to legalize the path, see its + documentation for the details of what this involves. It is called up to + three times in case truncation conflicts with replacements (as can happen + when truncation creates whitespace at the end of the string, for example). - This function performs up to three calls to `_legalize_stage` in - case truncation conflicts with replacements (as can happen when - truncation creates whitespace at the end of the string, for - example). The limited number of iterations iterations avoids the - possibility of an infinite loop of sanitation and truncation - operations, which could be caused by replacement rules that make the - string longer. The flag returned from this function indicates that - the path has to be truncated twice (indicating that replacements - made the string longer again after it was truncated); the - application should probably log some sort of warning. + The limited number of iterations avoids the possibility of an infinite loop + of sanitation and truncation operations, which could be caused by + replacement rules that make the string longer. + + The flag returned from this function indicates that the path has to be + truncated twice (indicating that replacements made the string longer again + after it was truncated); the application should probably log some sort of + warning. """ + args = length, as_string(extension) - if fragment: - # Outputting Unicode. - extension = extension.decode("utf-8", "ignore") - - first_stage_path, _ = _legalize_stage( - path, replacements, length, extension, fragment + first_stage, _ = os.path.splitext( + _legalize_stage(path, replacements, *args)[0] ) - # Convert back to Unicode with extension removed. - first_stage_path, _ = os.path.splitext(displayable_path(first_stage_path)) - # Re-sanitize following truncation (including user replacements). - second_stage_path, retruncated = _legalize_stage( - first_stage_path, replacements, length, extension, fragment - ) + second_stage, truncated = _legalize_stage(first_stage, replacements, *args) - # If the path was once again truncated, discard user replacements + if not truncated: + return second_stage, False + + # If the path was truncated, discard user replacements # and run through one last legalization stage. - if retruncated: - second_stage_path, _ = _legalize_stage( - first_stage_path, None, length, extension, fragment - ) - - return second_stage_path, retruncated + return _legalize_stage(first_stage, None, *args)[0], True def str2bool(value: str) -> bool: diff --git a/beets/vfs.py b/beets/vfs.py index cdbf197a6..4fd133f5a 100644 --- a/beets/vfs.py +++ b/beets/vfs.py @@ -49,7 +49,7 @@ def libtree(lib): """ root = Node({}, {}) for item in lib.items(): - dest = item.destination(fragment=True) - parts = util.components(dest) + dest = item.destination(relative_to_libdir=True) + parts = util.components(util.as_string(dest)) _insert(root, parts, item.id) return root diff --git a/beetsplug/bpd/__init__.py b/beetsplug/bpd/__init__.py index 9d8b4142b..435368e35 100644 --- a/beetsplug/bpd/__init__.py +++ b/beetsplug/bpd/__init__.py @@ -33,7 +33,7 @@ import beets.ui from beets import dbcore, vfs from beets.library import Item from beets.plugins import BeetsPlugin -from beets.util import bluelet +from beets.util import as_string, bluelet if TYPE_CHECKING: from beets.dbcore.query import Query @@ -1130,7 +1130,7 @@ class Server(BaseServer): def _item_info(self, item): info_lines = [ - "file: " + item.destination(fragment=True), + "file: " + as_string(item.destination(relative_to_libdir=True)), "Time: " + str(int(item.length)), "duration: " + f"{item.length:.3f}", "Id: " + str(item.id), diff --git a/beetsplug/convert.py b/beetsplug/convert.py index a8c32e955..7586c2a1b 100644 --- a/beetsplug/convert.py +++ b/beetsplug/convert.py @@ -624,13 +624,7 @@ class ConvertPlugin(BeetsPlugin): # strings we get from item.destination to bytes. items_paths = [ os.path.relpath( - util.bytestring_path( - item.destination( - basedir=dest, - path_formats=path_formats, - fragment=False, - ) - ), + item.destination(basedir=dest, path_formats=path_formats), pl_dir, ) for item in items diff --git a/test/test_library.py b/test/test_library.py index b5e6d4eeb..d6804449f 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -34,7 +34,7 @@ from beets.library import Album from beets.test import _common from beets.test._common import item from beets.test.helper import BeetsTestCase, ItemInDBTestCase -from beets.util import bytestring_path, syspath +from beets.util import as_string, bytestring_path, syspath # Shortcut to path normalization. np = util.normpath @@ -411,14 +411,14 @@ class DestinationTest(BeetsTestCase): def test_unicode_normalized_nfd_on_mac(self): instr = unicodedata.normalize("NFC", "caf\xe9") self.lib.path_formats = [("default", instr)] - dest = self.i.destination(platform="darwin", fragment=True) - assert dest == unicodedata.normalize("NFD", instr) + dest = self.i.destination(platform="darwin", relative_to_libdir=True) + assert as_string(dest) == unicodedata.normalize("NFD", instr) def test_unicode_normalized_nfc_on_linux(self): instr = unicodedata.normalize("NFD", "caf\xe9") self.lib.path_formats = [("default", instr)] - dest = self.i.destination(platform="linux", fragment=True) - assert dest == unicodedata.normalize("NFC", instr) + dest = self.i.destination(platform="linux", relative_to_libdir=True) + assert as_string(dest) == unicodedata.normalize("NFC", instr) def test_non_mbcs_characters_on_windows(self): oldfunc = sys.getfilesystemencoding @@ -436,8 +436,8 @@ class DestinationTest(BeetsTestCase): def test_unicode_extension_in_fragment(self): self.lib.path_formats = [("default", "foo")] self.i.path = util.bytestring_path("bar.caf\xe9") - dest = self.i.destination(platform="linux", fragment=True) - assert dest == "foo.caf\xe9" + dest = self.i.destination(platform="linux", relative_to_libdir=True) + assert as_string(dest) == "foo.caf\xe9" def test_asciify_and_replace(self): config["asciify_paths"] = True From 06dd2738bb3dbca40dfe2b2a80786d594cddced2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 15 Sep 2024 03:13:42 +0100 Subject: [PATCH 2/9] Remove never used replacements paramater from item.destination --- beets/library.py | 8 +------- test/test_library.py | 11 ----------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/beets/library.py b/beets/library.py index 2f5f31393..f45e63ebe 100644 --- a/beets/library.py +++ b/beets/library.py @@ -1079,7 +1079,6 @@ class Item(LibModel): basedir=None, platform=None, path_formats=None, - replacements=None, ) -> bytes: """Return the path in the library directory designated for the item (i.e., where the file ought to be). @@ -1091,8 +1090,6 @@ class Item(LibModel): platform = platform or sys.platform basedir = basedir or db.directory path_formats = path_formats or db.path_formats - if replacements is None: - replacements = self._db.replacements # Use a path format based on a query, falling back on the # default. @@ -1136,10 +1133,7 @@ class Item(LibModel): maxlen = util.max_filename_length(db.directory) lib_path_str, fallback = util.legalize_path( - subpath, - replacements, - maxlen, - os.path.splitext(self.path)[1], + subpath, db.replacements, maxlen, os.path.splitext(self.path)[1] ) if fallback: # Print an error message if legalization fell back to diff --git a/test/test_library.py b/test/test_library.py index d6804449f..342c2fe20 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -462,17 +462,6 @@ class DestinationTest(BeetsTestCase): self.i.album = "bar" assert self.i.destination() == np("base/ber/foo") - def test_destination_with_replacements_argument(self): - self.lib.directory = b"base" - self.lib.replacements = [(re.compile(r"a"), "f")] - self.lib.path_formats = [("default", "$album/$title")] - self.i.title = "foo" - self.i.album = "bar" - replacements = [(re.compile(r"a"), "e")] - assert self.i.destination(replacements=replacements) == np( - "base/ber/foo" - ) - @unittest.skip("unimplemented: #359") def test_destination_with_empty_component(self): self.lib.directory = b"base" From d3186859acfa1cbe771eb43a9e9d720f8066590a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 15 Sep 2024 03:15:14 +0100 Subject: [PATCH 3/9] Remove unused platform parameter in item.destination --- beets/library.py | 4 +--- test/test_library.py | 10 +++++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/beets/library.py b/beets/library.py index f45e63ebe..ec530a5dd 100644 --- a/beets/library.py +++ b/beets/library.py @@ -1077,7 +1077,6 @@ class Item(LibModel): self, relative_to_libdir=False, basedir=None, - platform=None, path_formats=None, ) -> bytes: """Return the path in the library directory designated for the item @@ -1087,7 +1086,6 @@ class Item(LibModel): library's base directory for the destination. """ db = self._check_db() - platform = platform or sys.platform basedir = basedir or db.directory path_formats = path_formats or db.path_formats @@ -1117,7 +1115,7 @@ class Item(LibModel): subpath = self.evaluate_template(subpath_tmpl, True) # Prepare path for output: normalize Unicode characters. - if platform == "darwin": + if sys.platform == "darwin": subpath = unicodedata.normalize("NFD", subpath) else: subpath = unicodedata.normalize("NFC", subpath) diff --git a/test/test_library.py b/test/test_library.py index 342c2fe20..a4e6dab44 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -23,6 +23,7 @@ import sys import time import unicodedata import unittest +from unittest.mock import patch import pytest from mediafile import MediaFile, UnreadableFileError @@ -411,13 +412,15 @@ class DestinationTest(BeetsTestCase): def test_unicode_normalized_nfd_on_mac(self): instr = unicodedata.normalize("NFC", "caf\xe9") self.lib.path_formats = [("default", instr)] - dest = self.i.destination(platform="darwin", relative_to_libdir=True) + with patch("sys.platform", "darwin"): + dest = self.i.destination(relative_to_libdir=True) assert as_string(dest) == unicodedata.normalize("NFD", instr) def test_unicode_normalized_nfc_on_linux(self): instr = unicodedata.normalize("NFD", "caf\xe9") self.lib.path_formats = [("default", instr)] - dest = self.i.destination(platform="linux", relative_to_libdir=True) + with patch("sys.platform", "linux"): + dest = self.i.destination(relative_to_libdir=True) assert as_string(dest) == unicodedata.normalize("NFC", instr) def test_non_mbcs_characters_on_windows(self): @@ -436,7 +439,8 @@ class DestinationTest(BeetsTestCase): def test_unicode_extension_in_fragment(self): self.lib.path_formats = [("default", "foo")] self.i.path = util.bytestring_path("bar.caf\xe9") - dest = self.i.destination(platform="linux", relative_to_libdir=True) + with patch("sys.platform", "linux"): + dest = self.i.destination(relative_to_libdir=True) assert as_string(dest) == "foo.caf\xe9" def test_asciify_and_replace(self): From b3fd84b35651edf2c7b7958a06debec4d3384558 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 10 Mar 2025 00:07:21 +0000 Subject: [PATCH 4/9] Move max filename length calculation closer to where it is used --- beets/library.py | 7 +------ beets/util/__init__.py | 30 +++++++++++++++++++----------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/beets/library.py b/beets/library.py index ec530a5dd..1fe253c18 100644 --- a/beets/library.py +++ b/beets/library.py @@ -1125,13 +1125,8 @@ class Item(LibModel): subpath, beets.config["path_sep_replace"].as_str() ) - maxlen = beets.config["max_filename_length"].get(int) - if not maxlen: - # When zero, try to determine from filesystem. - maxlen = util.max_filename_length(db.directory) - lib_path_str, fallback = util.legalize_path( - subpath, db.replacements, maxlen, os.path.splitext(self.path)[1] + subpath, db.replacements, os.path.splitext(self.path)[1] ) if fallback: # Print an error message if legalization fell back to diff --git a/beets/util/__init__.py b/beets/util/__init__.py index dc2bb6089..d8340a978 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -30,6 +30,7 @@ import traceback from collections import Counter from contextlib import suppress from enum import Enum +from functools import cache from importlib import import_module from multiprocessing.pool import ThreadPool from pathlib import Path @@ -47,6 +48,7 @@ from typing import ( from unidecode import unidecode +import beets from beets.util import hidden if TYPE_CHECKING: @@ -694,25 +696,26 @@ def sanitize_path(path: str, replacements: Replacements | None = None) -> str: return os.path.join(*comps) -def truncate_path(path: AnyStr, length: int = MAX_FILENAME_LENGTH) -> AnyStr: +def truncate_path(path: AnyStr) -> AnyStr: """Given a bytestring path or a Unicode path fragment, truncate the components to a legal length. In the last component, the extension is preserved. """ + max_length = get_max_filename_length() comps = components(path) out = [c[:length] for c in comps] base, ext = os.path.splitext(comps[-1]) if ext: # Last component has an extension. - base = base[: length - len(ext)] + base = base[: max_length - len(ext)] out[-1] = base + ext return os.path.join(*out) def _legalize_stage( - path: str, replacements: Replacements | None, length: int, extension: str + path: str, replacements: Replacements | None, extension: str ) -> tuple[str, bool]: """Perform a single round of path legalization steps 1. sanitation/replacement @@ -729,13 +732,13 @@ def _legalize_stage( # Truncate too-long components. pre_truncate_path = path - path = truncate_path(path, length) + path = truncate_path(path) return path, path != pre_truncate_path def legalize_path( - path: str, replacements: Replacements | None, length: int, extension: str + path: str, replacements: Replacements | None, extension: str ) -> tuple[str, bool]: """Given a path-like Unicode string, produce a legal path. Return the path and a flag indicating whether some replacements had to be ignored (see @@ -755,21 +758,21 @@ def legalize_path( after it was truncated); the application should probably log some sort of warning. """ - args = length, as_string(extension) + suffix = as_string(extension) first_stage, _ = os.path.splitext( - _legalize_stage(path, replacements, *args)[0] + _legalize_stage(path, replacements, suffix)[0] ) # Re-sanitize following truncation (including user replacements). - second_stage, truncated = _legalize_stage(first_stage, replacements, *args) + second_stage, truncated = _legalize_stage(first_stage, replacements, suffix) if not truncated: return second_stage, False # If the path was truncated, discard user replacements # and run through one last legalization stage. - return _legalize_stage(first_stage, None, *args)[0], True + return _legalize_stage(first_stage, None, suffix)[0], True def str2bool(value: str) -> bool: @@ -848,16 +851,21 @@ def command_output(cmd: list[BytesOrStr], shell: bool = False) -> CommandOutput: return CommandOutput(stdout, stderr) -def max_filename_length(path: BytesOrStr, limit=MAX_FILENAME_LENGTH) -> int: +@cache +def get_max_filename_length() -> int: """Attempt to determine the maximum filename length for the filesystem containing `path`. If the value is greater than `limit`, then `limit` is used instead (to prevent errors when a filesystem misreports its capacity). If it cannot be determined (e.g., on Windows), return `limit`. """ + if length := beets.config["max_filename_length"].get(int): + return length + + limit = MAX_FILENAME_LENGTH if hasattr(os, "statvfs"): try: - res = os.statvfs(path) + res = os.statvfs(beets.config["directory"].as_str()) except OSError: return limit return min(res[9], limit) From 40fbc8ee7e9941c5bd8487a24370f6b734dcf451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 10 Mar 2025 03:16:37 +0000 Subject: [PATCH 5/9] Fix and simplify path truncation --- beets/util/__init__.py | 32 ++++++++++++++++++-------------- test/test_util.py | 27 ++++++++++++--------------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index d8340a978..ff0a5d273 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -696,22 +696,26 @@ def sanitize_path(path: str, replacements: Replacements | None = None) -> str: return os.path.join(*comps) -def truncate_path(path: AnyStr) -> AnyStr: - """Given a bytestring path or a Unicode path fragment, truncate the - components to a legal length. In the last component, the extension - is preserved. +def truncate_str(s: str, length: int) -> str: + """Truncate the string to the given byte length. + + If we end up truncating a unicode character in the middle (rendering it invalid), + it is removed: + + >>> s = "🎹🎶" # 8 bytes + >>> truncate_str(s, 6) + '🎹' """ + return os.fsencode(s)[:length].decode(sys.getfilesystemencoding(), "ignore") + + +def truncate_path(str_path: str) -> str: + """Truncate each path part to a legal length preserving the extension.""" max_length = get_max_filename_length() - comps = components(path) - - out = [c[:length] for c in comps] - base, ext = os.path.splitext(comps[-1]) - if ext: - # Last component has an extension. - base = base[: max_length - len(ext)] - out[-1] = base + ext - - return os.path.join(*out) + path = Path(str_path) + parent_parts = [truncate_str(p, max_length) for p in path.parts[:-1]] + stem = truncate_str(path.stem, max_length - len(path.suffix)) + return str(Path(*parent_parts, stem).with_suffix(path.suffix)) def _legalize_stage( diff --git a/test/test_util.py b/test/test_util.py index f5b4fd102..a4b224ee3 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -175,18 +175,15 @@ class PathConversionTest(BeetsTestCase): assert outpath == "C:\\caf\xe9".encode() -class PathTruncationTest(BeetsTestCase): - def test_truncate_bytestring(self): - with _common.platform_posix(): - p = util.truncate_path(b"abcde/fgh", 4) - assert p == b"abcd/fgh" - - def test_truncate_unicode(self): - with _common.platform_posix(): - p = util.truncate_path("abcde/fgh", 4) - assert p == "abcd/fgh" - - def test_truncate_preserves_extension(self): - with _common.platform_posix(): - p = util.truncate_path("abcde/fgh.ext", 5) - assert p == "abcde/f.ext" +@patch("beets.util.get_max_filename_length", lambda: 5) +@pytest.mark.parametrize( + "path, expected", + [ + ("abcdeX/fgh", "abcde/fgh"), + ("abcde/fXX.ext", "abcde/f.ext"), + ("a🎹/a.ext", "a🎹/a.ext"), + ("ab🎹/a.ext", "ab/a.ext"), + ], +) +def test_truncate_path(path, expected): + assert util.truncate_path(path) == expected From 4fcb148d602cb8378e26b1e044a43587133c33e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Thu, 13 Mar 2025 08:18:31 +0000 Subject: [PATCH 6/9] Add test for legalization logic --- test/test_library.py | 31 --------------- test/test_util.py | 92 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 80 insertions(+), 43 deletions(-) diff --git a/test/test_library.py b/test/test_library.py index a4e6dab44..d90b9efd7 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -487,37 +487,6 @@ class DestinationTest(BeetsTestCase): self.i.path = "foo.mp3" assert self.i.destination() == np("base/one/_.mp3") - def test_legalize_path_one_for_one_replacement(self): - # Use a replacement that should always replace the last X in any - # path component with a Z. - self.lib.replacements = [ - (re.compile(r"X$"), "Z"), - ] - - # Construct an item whose untruncated path ends with a Y but whose - # truncated version ends with an X. - self.i.title = "X" * 300 + "Y" - - # The final path should reflect the replacement. - dest = self.i.destination() - assert dest[-2:] == b"XZ" - - def test_legalize_path_one_for_many_replacement(self): - # Use a replacement that should always replace the last X in any - # path component with four Zs. - self.lib.replacements = [ - (re.compile(r"X$"), "ZZZZ"), - ] - - # Construct an item whose untruncated path ends with a Y but whose - # truncated version ends with an X. - self.i.title = "X" * 300 + "Y" - - # The final path should ignore the user replacement and create a path - # of the correct length, containing Xs. - dest = self.i.destination() - assert dest[-2:] == b"XX" - def test_album_field_query(self): self.lib.directory = b"one" self.lib.path_formats = [("default", "two"), ("flex:foo", "three")] diff --git a/test/test_util.py b/test/test_util.py index a4b224ee3..b82ebed10 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -175,15 +175,83 @@ class PathConversionTest(BeetsTestCase): assert outpath == "C:\\caf\xe9".encode() -@patch("beets.util.get_max_filename_length", lambda: 5) -@pytest.mark.parametrize( - "path, expected", - [ - ("abcdeX/fgh", "abcde/fgh"), - ("abcde/fXX.ext", "abcde/f.ext"), - ("a🎹/a.ext", "a🎹/a.ext"), - ("ab🎹/a.ext", "ab/a.ext"), - ], -) -def test_truncate_path(path, expected): - assert util.truncate_path(path) == expected +class TestPathLegalization: + @pytest.fixture(autouse=True) + def _patch_max_filename_length(self, monkeypatch): + monkeypatch.setattr("beets.util.get_max_filename_length", lambda: 5) + + @pytest.mark.parametrize( + "path, expected", + [ + ("abcdeX/fgh", "abcde/fgh"), + ("abcde/fXX.ext", "abcde/f.ext"), + ("a🎹/a.ext", "a🎹/a.ext"), + ("ab🎹/a.ext", "ab/a.ext"), + ], + ) + def test_truncate(self, path, expected): + assert util.truncate_path(path) == expected + + @pytest.mark.parametrize( + "pre_trunc_repl, post_trunc_repl, expected", + [ + pytest.param( + [], + [], + ("_abcd", False), + id="default", + ), + pytest.param( + [(re.compile(r"abcdX$"), "PRE")], + [], + (":PRE", False), + id="valid path after initial repl", + ), + pytest.param( + [(re.compile(r"abcdX$"), "PRE_LONG")], + [], + (":PRE_", False), + id="too long path after initial repl is truncated", + ), + pytest.param( + [], + [(re.compile(r"abcdX$"), "POST")], + (":POST", False), + id="valid path after post-trunc repl", + ), + pytest.param( + [], + [(re.compile(r"abcdX$"), "POST_LONG")], + (":POST", False), + id="too long path after post-trunc repl is truncated", + ), + pytest.param( + [(re.compile(r"abcdX$"), "PRE")], + [(re.compile(r"PRE$"), "POST")], + (":POST", False), + id="both replacements within filename length limit", + ), + pytest.param( + [(re.compile(r"abcdX$"), "PRE_LONG")], + [(re.compile(r"PRE_$"), "POST")], + (":POST", False), + id="too long initial path is truncated and valid post-trunc repl", + ), + pytest.param( + [(re.compile(r"abcdX$"), "PRE")], + [(re.compile(r"PRE$"), "POST_LONG")], + (":POST", False), + id="valid pre-trunc repl and too long post-trunc path is truncated", + ), + pytest.param( + [(re.compile(r"abcdX$"), "PRE_LONG")], + [(re.compile(r"PRE_$"), "POST_LONG")], + ("_PRE_", True), + id="too long repl both times force default ones to be applied", + ), + ], + ) + def test_replacements(self, pre_trunc_repl, post_trunc_repl, expected): + replacements = pre_trunc_repl + post_trunc_repl + + assert util.legalize_path(":abcdX", replacements, "") == expected From 5826d6b59bd7cc082254080085a61c8ea44b0458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 20 Apr 2025 10:24:57 +0100 Subject: [PATCH 7/9] Remove handling of mbcs encoding This has been phased out in Python 3.6. https://peps.python.org/pep-0529/ --- test/test_library.py | 14 -------------- test/test_util.py | 9 ++------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/test/test_library.py b/test/test_library.py index d90b9efd7..8e5e01f5c 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -19,7 +19,6 @@ import os.path import re import shutil import stat -import sys import time import unicodedata import unittest @@ -423,19 +422,6 @@ class DestinationTest(BeetsTestCase): dest = self.i.destination(relative_to_libdir=True) assert as_string(dest) == unicodedata.normalize("NFC", instr) - def test_non_mbcs_characters_on_windows(self): - oldfunc = sys.getfilesystemencoding - sys.getfilesystemencoding = lambda: "mbcs" - try: - self.i.title = "h\u0259d" - self.lib.path_formats = [("default", "$title")] - p = self.i.destination() - assert b"?" not in p - # We use UTF-8 to encode Windows paths now. - assert "h\u0259d".encode() in p - finally: - sys.getfilesystemencoding = oldfunc - def test_unicode_extension_in_fragment(self): self.lib.path_formats = [("default", "foo")] self.i.path = util.bytestring_path("bar.caf\xe9") diff --git a/test/test_util.py b/test/test_util.py index b82ebed10..28a3a4ce1 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -156,13 +156,8 @@ class PathConversionTest(BeetsTestCase): assert path == outpath def _windows_bytestring_path(self, path): - old_gfse = sys.getfilesystemencoding - sys.getfilesystemencoding = lambda: "mbcs" - try: - with _common.platform_windows(): - return util.bytestring_path(path) - finally: - sys.getfilesystemencoding = old_gfse + with _common.platform_windows(): + return util.bytestring_path(path) def test_bytestring_path_windows_encodes_utf8(self): path = "caf\xe9" From 6a7832f207a51d922380b1ccc23eb85ad692a16c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 20 Apr 2025 15:01:15 +0100 Subject: [PATCH 8/9] Adjust tests to work in Windows --- test/test_library.py | 9 +++++++-- test/test_util.py | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/test/test_library.py b/test/test_library.py index 8e5e01f5c..39f1d0b9e 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -559,8 +559,13 @@ class PathFormattingMixin: def _assert_dest(self, dest, i=None): if i is None: i = self.i - with _common.platform_posix(): - actual = i.destination() + + if os.path.sep != "/": + dest = dest.replace(b"/", os.path.sep.encode()) + dest = b"D:" + dest + + actual = i.destination() + assert actual == dest diff --git a/test/test_util.py b/test/test_util.py index 28a3a4ce1..403071df2 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -185,6 +185,9 @@ class TestPathLegalization: ], ) def test_truncate(self, path, expected): + path = path.replace("/", os.path.sep) + expected = expected.replace("/", os.path.sep) + assert util.truncate_path(path) == expected @pytest.mark.parametrize( From 921b7ed9ea572a664accf45462f68101028215d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 27 Apr 2025 14:07:32 +0100 Subject: [PATCH 9/9] Rewrite legalisation tests for readability --- pyproject.toml | 1 + test/test_util.py | 85 +++++++++++++---------------------------------- 2 files changed, 24 insertions(+), 62 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d985c54ea..6819d6624 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -263,6 +263,7 @@ select = [ ] [tool.ruff.lint.per-file-ignores] "beets/**" = ["PT"] +"test/test_util.py" = ["E501"] [tool.ruff.lint.isort] split-on-trailing-comma = false diff --git a/test/test_util.py b/test/test_util.py index 403071df2..3a5e55c49 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -190,66 +190,27 @@ class TestPathLegalization: assert util.truncate_path(path) == expected - @pytest.mark.parametrize( - "pre_trunc_repl, post_trunc_repl, expected", - [ - pytest.param( - [], - [], - ("_abcd", False), - id="default", - ), - pytest.param( - [(re.compile(r"abcdX$"), "PRE")], - [], - (":PRE", False), - id="valid path after initial repl", - ), - pytest.param( - [(re.compile(r"abcdX$"), "PRE_LONG")], - [], - (":PRE_", False), - id="too long path after initial repl is truncated", - ), - pytest.param( - [], - [(re.compile(r"abcdX$"), "POST")], - (":POST", False), - id="valid path after post-trunc repl", - ), - pytest.param( - [], - [(re.compile(r"abcdX$"), "POST_LONG")], - (":POST", False), - id="too long path after post-trunc repl is truncated", - ), - pytest.param( - [(re.compile(r"abcdX$"), "PRE")], - [(re.compile(r"PRE$"), "POST")], - (":POST", False), - id="both replacements within filename length limit", - ), - pytest.param( - [(re.compile(r"abcdX$"), "PRE_LONG")], - [(re.compile(r"PRE_$"), "POST")], - (":POST", False), - id="too long initial path is truncated and valid post-trunc repl", - ), - pytest.param( - [(re.compile(r"abcdX$"), "PRE")], - [(re.compile(r"PRE$"), "POST_LONG")], - (":POST", False), - id="valid pre-trunc repl and too long post-trunc path is truncated", - ), - pytest.param( - [(re.compile(r"abcdX$"), "PRE_LONG")], - [(re.compile(r"PRE_$"), "POST_LONG")], - ("_PRE_", True), - id="too long repl both times force default ones to be applied", - ), - ], - ) - def test_replacements(self, pre_trunc_repl, post_trunc_repl, expected): - replacements = pre_trunc_repl + post_trunc_repl + _p = pytest.param - assert util.legalize_path(":abcdX", replacements, "") == expected + @pytest.mark.parametrize( + "replacements, expected_path, expected_truncated", + [ # [ repl before truncation, repl after truncation ] + _p([ ], "_abcd", False, id="default"), + _p([(r"abcdX$", "1ST"), ], ":1ST", False, id="1st_valid"), + _p([(r"abcdX$", "TOO_LONG"), ], ":TOO_", False, id="1st_truncated"), + _p([(r"abcdX$", "1ST"), (r"1ST$", "2ND") ], ":2ND", False, id="both_valid"), + _p([(r"abcdX$", "TOO_LONG"), (r"TOO_$", "2ND") ], ":2ND", False, id="1st_truncated_2nd_valid"), + _p([(r"abcdX$", "1ST"), (r"1ST$", "TOO_LONG") ], ":TOO_", False, id="1st_valid_2nd_truncated"), + # if the logic truncates the path twice, it ends up applying the default replacements + _p([(r"abcdX$", "TOO_LONG"), (r"TOO_$", "TOO_LONG") ], "_TOO_", True, id="both_truncated_default_repl_applied"), + ] + ) # fmt: skip + def test_replacements( + self, replacements, expected_path, expected_truncated + ): + replacements = [(re.compile(pat), repl) for pat, repl in replacements] + + assert util.legalize_path(":abcdX", replacements, "") == ( + expected_path, + expected_truncated, + )