Fix legalize_path types

Background
  The `_legalize_stage` function was causing issues with Mypy due to
  inconsistent type usage between the `path` and `extension` parameters.
  This inconsistency stemmed from the `fragment` parameter influencing the
  types of these variables.

Key issues
  1. `path` was defined as `str`, while `extension` was `bytes`.
  2. Depending on `fragment`, `extension` could be either `str` or `bytes`.
  3. `path` was sometimes converted to `bytes` within `_legalize_stage`.

Item.destination` method
  - The `fragment` parameter determined the output format:
    - `False`: Returned absolute path as bytes (default)
    - `True`: Returned path relative to library directory as str

Thus
  - Rename `fragment` parameter to `relative_to_libdir` for clarity
  - Ensure `Item.destination` returns `bytes` in all cases
  - Code expecting strings now converts the output to `str`
  - Use only `str` type in `_legalize_stage` and `_legalize_path`
    functions
  - These functions are no longer dependent on `relative_to_libdir`
This commit is contained in:
Šarūnas Nejus 2024-09-14 11:38:40 +01:00
parent ecdff785f7
commit 52951bf719
No known key found for this signature in database
GPG key ID: DD28F6704DBE3435
6 changed files with 58 additions and 89 deletions

View file

@ -1075,19 +1075,17 @@ class Item(LibModel):
def destination(
self,
fragment=False,
relative_to_libdir=False,
basedir=None,
platform=None,
path_formats=None,
replacements=None,
):
"""Return the path in the library directory designated for the
item (i.e., where the file ought to be).
) -> bytes:
"""Return the path in the library directory designated for the item
(i.e., where the file ought to be).
fragment makes this method return just the path fragment underneath
the root library directory; the path is also returned as Unicode
instead of encoded as a bytestring. basedir can override the library's
base directory for the destination.
The path is returned as a bytestring. ``basedir`` can override the
library's base directory for the destination.
"""
db = self._check_db()
platform = platform or sys.platform
@ -1137,14 +1135,13 @@ class Item(LibModel):
# When zero, try to determine from filesystem.
maxlen = util.max_filename_length(db.directory)
subpath, fellback = util.legalize_path(
lib_path_str, fallback = util.legalize_path(
subpath,
replacements,
maxlen,
os.path.splitext(self.path)[1],
fragment,
)
if fellback:
if fallback:
# Print an error message if legalization fell back to
# default replacements because of the maximum length.
log.warning(
@ -1153,11 +1150,12 @@ class Item(LibModel):
"the filename.",
subpath,
)
lib_path_bytes = util.bytestring_path(lib_path_str)
if fragment:
return util.as_string(subpath)
else:
return normpath(os.path.join(basedir, subpath))
if relative_to_libdir:
return lib_path_bytes
return normpath(os.path.join(basedir, lib_path_bytes))
class Album(LibModel):

View file

@ -712,25 +712,18 @@ def truncate_path(path: AnyStr, length: int = MAX_FILENAME_LENGTH) -> AnyStr:
def _legalize_stage(
path: str,
replacements: Replacements | None,
length: int,
extension: str,
fragment: bool,
) -> tuple[BytesOrStr, bool]:
path: str, replacements: Replacements | None, length: int, extension: str
) -> tuple[str, bool]:
"""Perform a single round of path legalization steps
(sanitation/replacement, encoding from Unicode to bytes,
extension-appending, and truncation). Return the path (Unicode if
`fragment` is set, `bytes` otherwise) and whether truncation was
required.
1. sanitation/replacement
2. appending the extension
3. truncation.
Return the path and whether truncation was required.
"""
# Perform an initial sanitization including user replacements.
path = sanitize_path(path, replacements)
# Encode for the filesystem.
if not fragment:
path = bytestring_path(path) # type: ignore
# Preserve extension.
path += extension.lower()
@ -742,57 +735,41 @@ def _legalize_stage(
def legalize_path(
path: str,
replacements: Replacements | None,
length: int,
extension: bytes,
fragment: bool,
) -> tuple[BytesOrStr, bool]:
"""Given a path-like Unicode string, produce a legal path. Return
the path and a flag indicating whether some replacements had to be
ignored (see below).
path: str, replacements: Replacements | None, length: int, extension: str
) -> tuple[str, bool]:
"""Given a path-like Unicode string, produce a legal path. Return the path
and a flag indicating whether some replacements had to be ignored (see
below).
The legalization process (see `_legalize_stage`) consists of
applying the sanitation rules in `replacements`, encoding the string
to bytes (unless `fragment` is set), truncating components to
`length`, appending the `extension`.
This function uses `_legalize_stage` function to legalize the path, see its
documentation for the details of what this involves. It is called up to
three times in case truncation conflicts with replacements (as can happen
when truncation creates whitespace at the end of the string, for example).
This function performs up to three calls to `_legalize_stage` in
case truncation conflicts with replacements (as can happen when
truncation creates whitespace at the end of the string, for
example). The limited number of iterations iterations avoids the
possibility of an infinite loop of sanitation and truncation
operations, which could be caused by replacement rules that make the
string longer. The flag returned from this function indicates that
the path has to be truncated twice (indicating that replacements
made the string longer again after it was truncated); the
application should probably log some sort of warning.
The limited number of iterations avoids the possibility of an infinite loop
of sanitation and truncation operations, which could be caused by
replacement rules that make the string longer.
The flag returned from this function indicates that the path has to be
truncated twice (indicating that replacements made the string longer again
after it was truncated); the application should probably log some sort of
warning.
"""
args = length, as_string(extension)
if fragment:
# Outputting Unicode.
extension = extension.decode("utf-8", "ignore")
first_stage_path, _ = _legalize_stage(
path, replacements, length, extension, fragment
first_stage, _ = os.path.splitext(
_legalize_stage(path, replacements, *args)[0]
)
# Convert back to Unicode with extension removed.
first_stage_path, _ = os.path.splitext(displayable_path(first_stage_path))
# Re-sanitize following truncation (including user replacements).
second_stage_path, retruncated = _legalize_stage(
first_stage_path, replacements, length, extension, fragment
)
second_stage, truncated = _legalize_stage(first_stage, replacements, *args)
# If the path was once again truncated, discard user replacements
if not truncated:
return second_stage, False
# If the path was truncated, discard user replacements
# and run through one last legalization stage.
if retruncated:
second_stage_path, _ = _legalize_stage(
first_stage_path, None, length, extension, fragment
)
return second_stage_path, retruncated
return _legalize_stage(first_stage, None, *args)[0], True
def str2bool(value: str) -> bool:

View file

@ -49,7 +49,7 @@ def libtree(lib):
"""
root = Node({}, {})
for item in lib.items():
dest = item.destination(fragment=True)
parts = util.components(dest)
dest = item.destination(relative_to_libdir=True)
parts = util.components(util.as_string(dest))
_insert(root, parts, item.id)
return root

View file

@ -33,7 +33,7 @@ import beets.ui
from beets import dbcore, vfs
from beets.library import Item
from beets.plugins import BeetsPlugin
from beets.util import bluelet
from beets.util import as_string, bluelet
if TYPE_CHECKING:
from beets.dbcore.query import Query
@ -1130,7 +1130,7 @@ class Server(BaseServer):
def _item_info(self, item):
info_lines = [
"file: " + item.destination(fragment=True),
"file: " + as_string(item.destination(relative_to_libdir=True)),
"Time: " + str(int(item.length)),
"duration: " + f"{item.length:.3f}",
"Id: " + str(item.id),

View file

@ -624,13 +624,7 @@ class ConvertPlugin(BeetsPlugin):
# strings we get from item.destination to bytes.
items_paths = [
os.path.relpath(
util.bytestring_path(
item.destination(
basedir=dest,
path_formats=path_formats,
fragment=False,
)
),
item.destination(basedir=dest, path_formats=path_formats),
pl_dir,
)
for item in items

View file

@ -34,7 +34,7 @@ from beets.library import Album
from beets.test import _common
from beets.test._common import item
from beets.test.helper import BeetsTestCase, ItemInDBTestCase
from beets.util import bytestring_path, syspath
from beets.util import as_string, bytestring_path, syspath
# Shortcut to path normalization.
np = util.normpath
@ -411,14 +411,14 @@ class DestinationTest(BeetsTestCase):
def test_unicode_normalized_nfd_on_mac(self):
instr = unicodedata.normalize("NFC", "caf\xe9")
self.lib.path_formats = [("default", instr)]
dest = self.i.destination(platform="darwin", fragment=True)
assert dest == unicodedata.normalize("NFD", instr)
dest = self.i.destination(platform="darwin", relative_to_libdir=True)
assert as_string(dest) == unicodedata.normalize("NFD", instr)
def test_unicode_normalized_nfc_on_linux(self):
instr = unicodedata.normalize("NFD", "caf\xe9")
self.lib.path_formats = [("default", instr)]
dest = self.i.destination(platform="linux", fragment=True)
assert dest == unicodedata.normalize("NFC", instr)
dest = self.i.destination(platform="linux", relative_to_libdir=True)
assert as_string(dest) == unicodedata.normalize("NFC", instr)
def test_non_mbcs_characters_on_windows(self):
oldfunc = sys.getfilesystemencoding
@ -436,8 +436,8 @@ class DestinationTest(BeetsTestCase):
def test_unicode_extension_in_fragment(self):
self.lib.path_formats = [("default", "foo")]
self.i.path = util.bytestring_path("bar.caf\xe9")
dest = self.i.destination(platform="linux", fragment=True)
assert dest == "foo.caf\xe9"
dest = self.i.destination(platform="linux", relative_to_libdir=True)
assert as_string(dest) == "foo.caf\xe9"
def test_asciify_and_replace(self):
config["asciify_paths"] = True