Rewrote path legalization code to be two module functions rather than class methods. Also made algorithm more predictable, and added an extra test.

2026-02-22 07:14:24 +01:00 · 2015-07-07 01:17:12 +01:00 · 2015-07-07 01:17:12 +01:00 · de17d000bd
commit de17d000bd
parent d07c8fde69
3 changed files with 79 additions and 69 deletions
--- a/beets/library.py
+++ b/beets/library.py
@ -30,7 +30,7 @@ from beets import logging
 from beets.mediafile import MediaFile, MutagenError, UnreadableFileError
 from beets import plugins
 from beets import util
-from beets.util import bytestring_path, syspath, normpath, samefile
+from beets.util import bytestring_path, syspath, normpath, samefile, legalize_path
 from beets.util.functemplate import Template
 from beets import dbcore
 from beets.dbcore import types
@ -742,71 +742,6 @@ class Item(LibModel):

    # Templating.

-    def _legalize_partial(self, path, fragment, replacements):
-        """ Perform a partial legalization of the path (ie. a single
-        sanitization and truncation). Outputs unicode if fragment is set,
-        otherwise bytes.
-        """
-
-        # Truncate components and remove forbidden characters.
-        path = util.sanitize_path(path, replacements)
-
-        # Encode for the filesystem.
-        if not fragment:
-            path = bytestring_path(path)
-
-        # Preserve extension.
-        _, extension = os.path.splitext(self.path)
-        if fragment:
-            # Outputting Unicode.
-            extension = extension.decode('utf8', 'ignore')
-        path += extension.lower()
-
-        # Truncate too-long components.
-        maxlen = beets.config['max_filename_length'].get(int)
-        if not maxlen:
-            # When zero, try to determine from filesystem.
-            maxlen = util.max_filename_length(self._db.directory)
-        path = util.truncate_path(path, maxlen)
-
-        return path
-
-    def _legalize_path(self, path, fragment):
-        """ Perform several iterations of _legalize_partial, to generate a
-        stable, optimal output path. This is necessary for cases where
-        truncation produces unclean paths (eg. trailing space).
-        """
-
-        # Create a list of path candidates
-        path_candidates = [b'']
-
-        replacements = self._db.replacements
-
-        # Perform an initial pass
-        path = self._legalize_partial(path, fragment, replacements)
-        while path != path_candidates[-1]:
-            # This will keep sanitizing the path until it's stable, or an
-            # infinite loop appears
-            while path not in path_candidates:
-                path_candidates.append(path)
-                # Convert back to Unicode with extension removed
-                print(util.displayable_path(path))
-                path = os.path.splitext(util.displayable_path(path))[0]
-
-                # Run next pass
-                path = self._legalize_partial(path, fragment, replacements)
-
-            # If an infinite loop occurred, adjust replacements to avoid it
-            if path != path_candidates[-1]:
-                replacements = dict((k, u'_') for k, v in replacements)
-
-                # If there's a rule to match a single underscore, set the
-                # target to a blank string.
-                if '_' in replacements:
-                    replacements['_'] = ''
-
-        return path
-
    def destination(self, fragment=False, basedir=None, platform=None,
                    path_formats=None):
        """Returns the path in the library directory designated for the
@ -855,7 +790,13 @@ class Item(LibModel):
        if beets.config['asciify_paths']:
            subpath = unidecode(subpath)

-        subpath = self._legalize_path(subpath, fragment)
+        maxlen = beets.config['max_filename_length'].get(int)
+        if not maxlen:
+            # When zero, try to determine from filesystem.
+            maxlen = util.max_filename_length(self._db.directory)
+
+        subpath = util.legalize_path(subpath, self._db.replacements, maxlen,
+                                     os.path.splitext(self.path)[1], fragment)

        if fragment:
            return subpath
--- a/beets/util/init.py
+++ b/beets/util/init.py
@ -545,6 +545,59 @@ def truncate_path(path, length=MAX_FILENAME_LENGTH):
    return os.path.join(*out)


+def _legalize_stage(path, replacements, length, extension, fragment):
+    """ Perform a signle stage of legalization of the path (ie. a single
+    sanitization and truncation). Returns the path (unicode if fragment is set,
+    otherwise bytes), and whether truncation was required.
+    """
+    # Perform an initial sanitization including user replacements.
+    path = sanitize_path(path, replacements)
+
+    # Encode for the filesystem.
+    if not fragment:
+        path = bytestring_path(path)
+
+    # Preserve extension.
+    path += extension.lower()
+
+    # Truncate too-long components.
+    pre_truncate_path = path
+    path = truncate_path(path, length)
+
+    return (path, path != pre_truncate_path)
+
+
+def legalize_path(path, replacements, length, extension, fragment):
+    """ Perform up to three calls to _legalize_stage, to generate a stable
+    output path, taking user replacements into consideration if possible. This
+    additional complexity is necessary for cases where truncation produces
+    unclean paths (eg. trailing space).
+    """
+
+    if fragment:
+        # Outputting Unicode.
+        extension = extension.decode('utf8', 'ignore')
+
+    first_stage_path =\
+        _legalize_stage(path, replacements, length, extension, fragment)[0]
+
+    # Convert back to Unicode with extension removed.
+    first_stage_path = os.path.splitext(displayable_path(first_stage_path))[0]
+
+    # Re-sanitize following truncation (including user replacements).
+    second_stage_path, retruncated = _legalize_stage(
+        first_stage_path, replacements, length, extension, fragment
+    )
+
+    # If the path was once again truncated, discard user replacements.
+    if retruncated:
+        second_stage_path = _legalize_stage(
+            first_stage_path, None, length, extension, fragment
+        )[0]
+
+    return second_stage_path
+
+
 def str2bool(value):
    """Returns a boolean reflecting a human-entered string."""
    return value.lower() in ('yes', '1', 'true', 't', 'y')
--- a/test/test_library.py
+++ b/test/test_library.py
@ -452,7 +452,7 @@ class DestinationTest(_common.TestCase):
        self.assertEqual(self.i.destination(),
                         np('base/one/_.mp3'))

-    def test_truncation_does_not_conflict_with_replacement(self):
+    def test_legalize_path_one_for_one_replacement(self):
        # Use a replacement that should always replace the last X in any
        # path component with a Z.
        self.lib.replacements = [
@ -465,7 +465,23 @@ class DestinationTest(_common.TestCase):

        # The final path should reflect the replacement.
        dest = self.i.destination()
-        self.assertTrue('XZ' in dest)
+        self.assertEqual(dest[-2:], 'XZ')
+
+    def test_legalize_path_one_for_many_replacement(self):
+        # Use a replacement that should always replace the last X in any
+        # path component with four Zs.
+        self.lib.replacements = [
+            (re.compile(r'X$'), u'ZZZZ'),
+        ]
+
+        # Construct an item whose untruncated path ends with a Y but whose
+        # truncated version ends with an X.
+        self.i.title = 'X' * 300 + 'Y'
+
+        # The final path should ignore the user replacement and create a path
+        # of the correct length, containing Xs.
+        dest = self.i.destination()
+        self.assertEqual(dest[-2:], 'XX')


 class ItemFormattedMappingTest(_common.LibTestCase):