Fix legalize_path types

Background
  The `_legalize_stage` function was causing issues with Mypy due to
  inconsistent type usage between the `path` and `extension` parameters.
  This inconsistency stemmed from the `fragment` parameter influencing the
  types of these variables.

Key issues
  1. `path` was defined as `str`, while `extension` was `bytes`.
  2. Depending on `fragment`, `extension` could be either `str` or `bytes`.
  3. `path` was sometimes converted to `bytes` within `_legalize_stage`.

Item.destination` method
  - The `fragment` parameter determined the output format:
    - `False`: Returned absolute path as bytes (default)
    - `True`: Returned path relative to library directory as str

Thus
  - Rename `fragment` parameter to `relative_to_libdir` for clarity
  - Ensure `Item.destination` returns `bytes` in all cases
  - Code expecting strings now converts the output to `str`
  - Use only `str` type in `_legalize_stage` and `_legalize_path`
    functions
  - These functions are no longer dependent on `relative_to_libdir`
This commit is contained in:
Šarūnas Nejus 2024-09-14 11:38:40 +01:00
parent ecdff785f7
commit 52951bf719
No known key found for this signature in database
GPG key ID: DD28F6704DBE3435
6 changed files with 58 additions and 89 deletions

View file

@ -1075,19 +1075,17 @@ class Item(LibModel):
def destination( def destination(
self, self,
fragment=False, relative_to_libdir=False,
basedir=None, basedir=None,
platform=None, platform=None,
path_formats=None, path_formats=None,
replacements=None, replacements=None,
): ) -> bytes:
"""Return the path in the library directory designated for the """Return the path in the library directory designated for the item
item (i.e., where the file ought to be). (i.e., where the file ought to be).
fragment makes this method return just the path fragment underneath The path is returned as a bytestring. ``basedir`` can override the
the root library directory; the path is also returned as Unicode library's base directory for the destination.
instead of encoded as a bytestring. basedir can override the library's
base directory for the destination.
""" """
db = self._check_db() db = self._check_db()
platform = platform or sys.platform platform = platform or sys.platform
@ -1137,14 +1135,13 @@ class Item(LibModel):
# When zero, try to determine from filesystem. # When zero, try to determine from filesystem.
maxlen = util.max_filename_length(db.directory) maxlen = util.max_filename_length(db.directory)
subpath, fellback = util.legalize_path( lib_path_str, fallback = util.legalize_path(
subpath, subpath,
replacements, replacements,
maxlen, maxlen,
os.path.splitext(self.path)[1], os.path.splitext(self.path)[1],
fragment,
) )
if fellback: if fallback:
# Print an error message if legalization fell back to # Print an error message if legalization fell back to
# default replacements because of the maximum length. # default replacements because of the maximum length.
log.warning( log.warning(
@ -1153,11 +1150,12 @@ class Item(LibModel):
"the filename.", "the filename.",
subpath, subpath,
) )
lib_path_bytes = util.bytestring_path(lib_path_str)
if fragment: if relative_to_libdir:
return util.as_string(subpath) return lib_path_bytes
else:
return normpath(os.path.join(basedir, subpath)) return normpath(os.path.join(basedir, lib_path_bytes))
class Album(LibModel): class Album(LibModel):

View file

@ -712,25 +712,18 @@ def truncate_path(path: AnyStr, length: int = MAX_FILENAME_LENGTH) -> AnyStr:
def _legalize_stage( def _legalize_stage(
path: str, path: str, replacements: Replacements | None, length: int, extension: str
replacements: Replacements | None, ) -> tuple[str, bool]:
length: int,
extension: str,
fragment: bool,
) -> tuple[BytesOrStr, bool]:
"""Perform a single round of path legalization steps """Perform a single round of path legalization steps
(sanitation/replacement, encoding from Unicode to bytes, 1. sanitation/replacement
extension-appending, and truncation). Return the path (Unicode if 2. appending the extension
`fragment` is set, `bytes` otherwise) and whether truncation was 3. truncation.
required.
Return the path and whether truncation was required.
""" """
# Perform an initial sanitization including user replacements. # Perform an initial sanitization including user replacements.
path = sanitize_path(path, replacements) path = sanitize_path(path, replacements)
# Encode for the filesystem.
if not fragment:
path = bytestring_path(path) # type: ignore
# Preserve extension. # Preserve extension.
path += extension.lower() path += extension.lower()
@ -742,57 +735,41 @@ def _legalize_stage(
def legalize_path( def legalize_path(
path: str, path: str, replacements: Replacements | None, length: int, extension: str
replacements: Replacements | None, ) -> tuple[str, bool]:
length: int, """Given a path-like Unicode string, produce a legal path. Return the path
extension: bytes, and a flag indicating whether some replacements had to be ignored (see
fragment: bool, below).
) -> tuple[BytesOrStr, bool]:
"""Given a path-like Unicode string, produce a legal path. Return
the path and a flag indicating whether some replacements had to be
ignored (see below).
The legalization process (see `_legalize_stage`) consists of This function uses `_legalize_stage` function to legalize the path, see its
applying the sanitation rules in `replacements`, encoding the string documentation for the details of what this involves. It is called up to
to bytes (unless `fragment` is set), truncating components to three times in case truncation conflicts with replacements (as can happen
`length`, appending the `extension`. when truncation creates whitespace at the end of the string, for example).
This function performs up to three calls to `_legalize_stage` in The limited number of iterations avoids the possibility of an infinite loop
case truncation conflicts with replacements (as can happen when of sanitation and truncation operations, which could be caused by
truncation creates whitespace at the end of the string, for replacement rules that make the string longer.
example). The limited number of iterations iterations avoids the
possibility of an infinite loop of sanitation and truncation The flag returned from this function indicates that the path has to be
operations, which could be caused by replacement rules that make the truncated twice (indicating that replacements made the string longer again
string longer. The flag returned from this function indicates that after it was truncated); the application should probably log some sort of
the path has to be truncated twice (indicating that replacements warning.
made the string longer again after it was truncated); the
application should probably log some sort of warning.
""" """
args = length, as_string(extension)
if fragment: first_stage, _ = os.path.splitext(
# Outputting Unicode. _legalize_stage(path, replacements, *args)[0]
extension = extension.decode("utf-8", "ignore")
first_stage_path, _ = _legalize_stage(
path, replacements, length, extension, fragment
) )
# Convert back to Unicode with extension removed.
first_stage_path, _ = os.path.splitext(displayable_path(first_stage_path))
# Re-sanitize following truncation (including user replacements). # Re-sanitize following truncation (including user replacements).
second_stage_path, retruncated = _legalize_stage( second_stage, truncated = _legalize_stage(first_stage, replacements, *args)
first_stage_path, replacements, length, extension, fragment
)
# If the path was once again truncated, discard user replacements if not truncated:
return second_stage, False
# If the path was truncated, discard user replacements
# and run through one last legalization stage. # and run through one last legalization stage.
if retruncated: return _legalize_stage(first_stage, None, *args)[0], True
second_stage_path, _ = _legalize_stage(
first_stage_path, None, length, extension, fragment
)
return second_stage_path, retruncated
def str2bool(value: str) -> bool: def str2bool(value: str) -> bool:

View file

@ -49,7 +49,7 @@ def libtree(lib):
""" """
root = Node({}, {}) root = Node({}, {})
for item in lib.items(): for item in lib.items():
dest = item.destination(fragment=True) dest = item.destination(relative_to_libdir=True)
parts = util.components(dest) parts = util.components(util.as_string(dest))
_insert(root, parts, item.id) _insert(root, parts, item.id)
return root return root

View file

@ -33,7 +33,7 @@ import beets.ui
from beets import dbcore, vfs from beets import dbcore, vfs
from beets.library import Item from beets.library import Item
from beets.plugins import BeetsPlugin from beets.plugins import BeetsPlugin
from beets.util import bluelet from beets.util import as_string, bluelet
if TYPE_CHECKING: if TYPE_CHECKING:
from beets.dbcore.query import Query from beets.dbcore.query import Query
@ -1130,7 +1130,7 @@ class Server(BaseServer):
def _item_info(self, item): def _item_info(self, item):
info_lines = [ info_lines = [
"file: " + item.destination(fragment=True), "file: " + as_string(item.destination(relative_to_libdir=True)),
"Time: " + str(int(item.length)), "Time: " + str(int(item.length)),
"duration: " + f"{item.length:.3f}", "duration: " + f"{item.length:.3f}",
"Id: " + str(item.id), "Id: " + str(item.id),

View file

@ -624,13 +624,7 @@ class ConvertPlugin(BeetsPlugin):
# strings we get from item.destination to bytes. # strings we get from item.destination to bytes.
items_paths = [ items_paths = [
os.path.relpath( os.path.relpath(
util.bytestring_path( item.destination(basedir=dest, path_formats=path_formats),
item.destination(
basedir=dest,
path_formats=path_formats,
fragment=False,
)
),
pl_dir, pl_dir,
) )
for item in items for item in items

View file

@ -34,7 +34,7 @@ from beets.library import Album
from beets.test import _common from beets.test import _common
from beets.test._common import item from beets.test._common import item
from beets.test.helper import BeetsTestCase, ItemInDBTestCase from beets.test.helper import BeetsTestCase, ItemInDBTestCase
from beets.util import bytestring_path, syspath from beets.util import as_string, bytestring_path, syspath
# Shortcut to path normalization. # Shortcut to path normalization.
np = util.normpath np = util.normpath
@ -411,14 +411,14 @@ class DestinationTest(BeetsTestCase):
def test_unicode_normalized_nfd_on_mac(self): def test_unicode_normalized_nfd_on_mac(self):
instr = unicodedata.normalize("NFC", "caf\xe9") instr = unicodedata.normalize("NFC", "caf\xe9")
self.lib.path_formats = [("default", instr)] self.lib.path_formats = [("default", instr)]
dest = self.i.destination(platform="darwin", fragment=True) dest = self.i.destination(platform="darwin", relative_to_libdir=True)
assert dest == unicodedata.normalize("NFD", instr) assert as_string(dest) == unicodedata.normalize("NFD", instr)
def test_unicode_normalized_nfc_on_linux(self): def test_unicode_normalized_nfc_on_linux(self):
instr = unicodedata.normalize("NFD", "caf\xe9") instr = unicodedata.normalize("NFD", "caf\xe9")
self.lib.path_formats = [("default", instr)] self.lib.path_formats = [("default", instr)]
dest = self.i.destination(platform="linux", fragment=True) dest = self.i.destination(platform="linux", relative_to_libdir=True)
assert dest == unicodedata.normalize("NFC", instr) assert as_string(dest) == unicodedata.normalize("NFC", instr)
def test_non_mbcs_characters_on_windows(self): def test_non_mbcs_characters_on_windows(self):
oldfunc = sys.getfilesystemencoding oldfunc = sys.getfilesystemencoding
@ -436,8 +436,8 @@ class DestinationTest(BeetsTestCase):
def test_unicode_extension_in_fragment(self): def test_unicode_extension_in_fragment(self):
self.lib.path_formats = [("default", "foo")] self.lib.path_formats = [("default", "foo")]
self.i.path = util.bytestring_path("bar.caf\xe9") self.i.path = util.bytestring_path("bar.caf\xe9")
dest = self.i.destination(platform="linux", fragment=True) dest = self.i.destination(platform="linux", relative_to_libdir=True)
assert dest == "foo.caf\xe9" assert as_string(dest) == "foo.caf\xe9"
def test_asciify_and_replace(self): def test_asciify_and_replace(self):
config["asciify_paths"] = True config["asciify_paths"] = True