From d07c8fde69d1dc3f4c25c2d315745e5e50d6e3fb Mon Sep 17 00:00:00 2001 From: Ben Ockmore Date: Mon, 16 Mar 2015 10:48:45 +0000 Subject: [PATCH 1/5] Added loop to iterate over sanitize/truncate until stable. Enabled test_truncation_does_not_conflict_with_replacement test. Fixes #496. --- beets/library.py | 86 +++++++++++++++++++++++++++++++++----------- test/test_library.py | 1 - 2 files changed, 66 insertions(+), 21 deletions(-) diff --git a/beets/library.py b/beets/library.py index 006630c08..0ba29cd68 100644 --- a/beets/library.py +++ b/beets/library.py @@ -742,6 +742,71 @@ class Item(LibModel): # Templating. + def _legalize_partial(self, path, fragment, replacements): + """ Perform a partial legalization of the path (ie. a single + sanitization and truncation). Outputs unicode if fragment is set, + otherwise bytes. + """ + + # Truncate components and remove forbidden characters. + path = util.sanitize_path(path, replacements) + + # Encode for the filesystem. + if not fragment: + path = bytestring_path(path) + + # Preserve extension. + _, extension = os.path.splitext(self.path) + if fragment: + # Outputting Unicode. + extension = extension.decode('utf8', 'ignore') + path += extension.lower() + + # Truncate too-long components. + maxlen = beets.config['max_filename_length'].get(int) + if not maxlen: + # When zero, try to determine from filesystem. + maxlen = util.max_filename_length(self._db.directory) + path = util.truncate_path(path, maxlen) + + return path + + def _legalize_path(self, path, fragment): + """ Perform several iterations of _legalize_partial, to generate a + stable, optimal output path. This is necessary for cases where + truncation produces unclean paths (eg. trailing space). + """ + + # Create a list of path candidates + path_candidates = [b''] + + replacements = self._db.replacements + + # Perform an initial pass + path = self._legalize_partial(path, fragment, replacements) + while path != path_candidates[-1]: + # This will keep sanitizing the path until it's stable, or an + # infinite loop appears + while path not in path_candidates: + path_candidates.append(path) + # Convert back to Unicode with extension removed + print(util.displayable_path(path)) + path = os.path.splitext(util.displayable_path(path))[0] + + # Run next pass + path = self._legalize_partial(path, fragment, replacements) + + # If an infinite loop occurred, adjust replacements to avoid it + if path != path_candidates[-1]: + replacements = dict((k, u'_') for k, v in replacements) + + # If there's a rule to match a single underscore, set the + # target to a blank string. + if '_' in replacements: + replacements['_'] = '' + + return path + def destination(self, fragment=False, basedir=None, platform=None, path_formats=None): """Returns the path in the library directory designated for the @@ -790,26 +855,7 @@ class Item(LibModel): if beets.config['asciify_paths']: subpath = unidecode(subpath) - # Truncate components and remove forbidden characters. - subpath = util.sanitize_path(subpath, self._db.replacements) - - # Encode for the filesystem. - if not fragment: - subpath = bytestring_path(subpath) - - # Preserve extension. - _, extension = os.path.splitext(self.path) - if fragment: - # Outputting Unicode. - extension = extension.decode('utf8', 'ignore') - subpath += extension.lower() - - # Truncate too-long components. - maxlen = beets.config['max_filename_length'].get(int) - if not maxlen: - # When zero, try to determine from filesystem. - maxlen = util.max_filename_length(self._db.directory) - subpath = util.truncate_path(subpath, maxlen) + subpath = self._legalize_path(subpath, fragment) if fragment: return subpath diff --git a/test/test_library.py b/test/test_library.py index a4b4d08a8..e66ad28f0 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -452,7 +452,6 @@ class DestinationTest(_common.TestCase): self.assertEqual(self.i.destination(), np('base/one/_.mp3')) - @unittest.skip('unimplemented: #496') def test_truncation_does_not_conflict_with_replacement(self): # Use a replacement that should always replace the last X in any # path component with a Z. From de17d000bda4c518bc5191f09e379624d7068cbb Mon Sep 17 00:00:00 2001 From: Ben Ockmore Date: Tue, 7 Jul 2015 01:17:12 +0100 Subject: [PATCH 2/5] Rewrote path legalization code to be two module functions rather than class methods. Also made algorithm more predictable, and added an extra test. --- beets/library.py | 75 +++++------------------------------------- beets/util/__init__.py | 53 +++++++++++++++++++++++++++++ test/test_library.py | 20 +++++++++-- 3 files changed, 79 insertions(+), 69 deletions(-) diff --git a/beets/library.py b/beets/library.py index 0ba29cd68..fc51a6c6a 100644 --- a/beets/library.py +++ b/beets/library.py @@ -30,7 +30,7 @@ from beets import logging from beets.mediafile import MediaFile, MutagenError, UnreadableFileError from beets import plugins from beets import util -from beets.util import bytestring_path, syspath, normpath, samefile +from beets.util import bytestring_path, syspath, normpath, samefile, legalize_path from beets.util.functemplate import Template from beets import dbcore from beets.dbcore import types @@ -742,71 +742,6 @@ class Item(LibModel): # Templating. - def _legalize_partial(self, path, fragment, replacements): - """ Perform a partial legalization of the path (ie. a single - sanitization and truncation). Outputs unicode if fragment is set, - otherwise bytes. - """ - - # Truncate components and remove forbidden characters. - path = util.sanitize_path(path, replacements) - - # Encode for the filesystem. - if not fragment: - path = bytestring_path(path) - - # Preserve extension. - _, extension = os.path.splitext(self.path) - if fragment: - # Outputting Unicode. - extension = extension.decode('utf8', 'ignore') - path += extension.lower() - - # Truncate too-long components. - maxlen = beets.config['max_filename_length'].get(int) - if not maxlen: - # When zero, try to determine from filesystem. - maxlen = util.max_filename_length(self._db.directory) - path = util.truncate_path(path, maxlen) - - return path - - def _legalize_path(self, path, fragment): - """ Perform several iterations of _legalize_partial, to generate a - stable, optimal output path. This is necessary for cases where - truncation produces unclean paths (eg. trailing space). - """ - - # Create a list of path candidates - path_candidates = [b''] - - replacements = self._db.replacements - - # Perform an initial pass - path = self._legalize_partial(path, fragment, replacements) - while path != path_candidates[-1]: - # This will keep sanitizing the path until it's stable, or an - # infinite loop appears - while path not in path_candidates: - path_candidates.append(path) - # Convert back to Unicode with extension removed - print(util.displayable_path(path)) - path = os.path.splitext(util.displayable_path(path))[0] - - # Run next pass - path = self._legalize_partial(path, fragment, replacements) - - # If an infinite loop occurred, adjust replacements to avoid it - if path != path_candidates[-1]: - replacements = dict((k, u'_') for k, v in replacements) - - # If there's a rule to match a single underscore, set the - # target to a blank string. - if '_' in replacements: - replacements['_'] = '' - - return path - def destination(self, fragment=False, basedir=None, platform=None, path_formats=None): """Returns the path in the library directory designated for the @@ -855,7 +790,13 @@ class Item(LibModel): if beets.config['asciify_paths']: subpath = unidecode(subpath) - subpath = self._legalize_path(subpath, fragment) + maxlen = beets.config['max_filename_length'].get(int) + if not maxlen: + # When zero, try to determine from filesystem. + maxlen = util.max_filename_length(self._db.directory) + + subpath = util.legalize_path(subpath, self._db.replacements, maxlen, + os.path.splitext(self.path)[1], fragment) if fragment: return subpath diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 68be740f6..a82bb2c89 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -545,6 +545,59 @@ def truncate_path(path, length=MAX_FILENAME_LENGTH): return os.path.join(*out) +def _legalize_stage(path, replacements, length, extension, fragment): + """ Perform a signle stage of legalization of the path (ie. a single + sanitization and truncation). Returns the path (unicode if fragment is set, + otherwise bytes), and whether truncation was required. + """ + # Perform an initial sanitization including user replacements. + path = sanitize_path(path, replacements) + + # Encode for the filesystem. + if not fragment: + path = bytestring_path(path) + + # Preserve extension. + path += extension.lower() + + # Truncate too-long components. + pre_truncate_path = path + path = truncate_path(path, length) + + return (path, path != pre_truncate_path) + + +def legalize_path(path, replacements, length, extension, fragment): + """ Perform up to three calls to _legalize_stage, to generate a stable + output path, taking user replacements into consideration if possible. This + additional complexity is necessary for cases where truncation produces + unclean paths (eg. trailing space). + """ + + if fragment: + # Outputting Unicode. + extension = extension.decode('utf8', 'ignore') + + first_stage_path =\ + _legalize_stage(path, replacements, length, extension, fragment)[0] + + # Convert back to Unicode with extension removed. + first_stage_path = os.path.splitext(displayable_path(first_stage_path))[0] + + # Re-sanitize following truncation (including user replacements). + second_stage_path, retruncated = _legalize_stage( + first_stage_path, replacements, length, extension, fragment + ) + + # If the path was once again truncated, discard user replacements. + if retruncated: + second_stage_path = _legalize_stage( + first_stage_path, None, length, extension, fragment + )[0] + + return second_stage_path + + def str2bool(value): """Returns a boolean reflecting a human-entered string.""" return value.lower() in ('yes', '1', 'true', 't', 'y') diff --git a/test/test_library.py b/test/test_library.py index e66ad28f0..30b0d6fc4 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -452,7 +452,7 @@ class DestinationTest(_common.TestCase): self.assertEqual(self.i.destination(), np('base/one/_.mp3')) - def test_truncation_does_not_conflict_with_replacement(self): + def test_legalize_path_one_for_one_replacement(self): # Use a replacement that should always replace the last X in any # path component with a Z. self.lib.replacements = [ @@ -465,7 +465,23 @@ class DestinationTest(_common.TestCase): # The final path should reflect the replacement. dest = self.i.destination() - self.assertTrue('XZ' in dest) + self.assertEqual(dest[-2:], 'XZ') + + def test_legalize_path_one_for_many_replacement(self): + # Use a replacement that should always replace the last X in any + # path component with four Zs. + self.lib.replacements = [ + (re.compile(r'X$'), u'ZZZZ'), + ] + + # Construct an item whose untruncated path ends with a Y but whose + # truncated version ends with an X. + self.i.title = 'X' * 300 + 'Y' + + # The final path should ignore the user replacement and create a path + # of the correct length, containing Xs. + dest = self.i.destination() + self.assertEqual(dest[-2:], 'XX') class ItemFormattedMappingTest(_common.LibTestCase): From 22b2527eed95ded41d5a650c27eb9d3dd0300e31 Mon Sep 17 00:00:00 2001 From: Ben Ockmore Date: Tue, 7 Jul 2015 01:28:16 +0100 Subject: [PATCH 3/5] Remove unused import. --- beets/library.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beets/library.py b/beets/library.py index fc51a6c6a..9d491a66b 100644 --- a/beets/library.py +++ b/beets/library.py @@ -30,7 +30,7 @@ from beets import logging from beets.mediafile import MediaFile, MutagenError, UnreadableFileError from beets import plugins from beets import util -from beets.util import bytestring_path, syspath, normpath, samefile, legalize_path +from beets.util import bytestring_path, syspath, normpath, samefile from beets.util.functemplate import Template from beets import dbcore from beets.dbcore import types From b479982043d319c1f22b0b428af93631db3d0938 Mon Sep 17 00:00:00 2001 From: Ben Ockmore Date: Tue, 7 Jul 2015 12:02:19 +0100 Subject: [PATCH 4/5] Minor changes suggested suggested in PR comments. --- beets/util/__init__.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index a82bb2c89..bf0abdfe5 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -546,9 +546,9 @@ def truncate_path(path, length=MAX_FILENAME_LENGTH): def _legalize_stage(path, replacements, length, extension, fragment): - """ Perform a signle stage of legalization of the path (ie. a single - sanitization and truncation). Returns the path (unicode if fragment is set, - otherwise bytes), and whether truncation was required. + """ Performs sanitization of the path, then encodes for the filesystem, + appends the extension and truncates. Returns the path (unicode if fragment + is set, otherwise bytes), and whether truncation was required. """ # Perform an initial sanitization including user replacements. path = sanitize_path(path, replacements) @@ -564,22 +564,24 @@ def _legalize_stage(path, replacements, length, extension, fragment): pre_truncate_path = path path = truncate_path(path, length) - return (path, path != pre_truncate_path) + return path, path != pre_truncate_path def legalize_path(path, replacements, length, extension, fragment): """ Perform up to three calls to _legalize_stage, to generate a stable - output path, taking user replacements into consideration if possible. This - additional complexity is necessary for cases where truncation produces - unclean paths (eg. trailing space). + output path, taking user replacements into consideration if possible. The + limited number of iterations avoids the possibility of an infinite loop of + sanitization and truncation operations, which could be caused by some user + replacements. """ if fragment: # Outputting Unicode. extension = extension.decode('utf8', 'ignore') - first_stage_path =\ - _legalize_stage(path, replacements, length, extension, fragment)[0] + first_stage_path, _ = _legalize_stage( + path, replacements, length, extension, fragment + ) # Convert back to Unicode with extension removed. first_stage_path = os.path.splitext(displayable_path(first_stage_path))[0] @@ -591,9 +593,9 @@ def legalize_path(path, replacements, length, extension, fragment): # If the path was once again truncated, discard user replacements. if retruncated: - second_stage_path = _legalize_stage( + second_stage_path, _ = _legalize_stage( first_stage_path, None, length, extension, fragment - )[0] + ) return second_stage_path From 1f1e0f7240fc418af13d848b7a37a00a46a1dcf8 Mon Sep 17 00:00:00 2001 From: Ben Ockmore Date: Tue, 7 Jul 2015 17:46:42 +0100 Subject: [PATCH 5/5] Added warning message and paragraph about replacements/max length interaction in documentation. --- beets/library.py | 10 ++++++++-- beets/util/__init__.py | 4 +++- docs/reference/config.rst | 5 +++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/beets/library.py b/beets/library.py index 9d491a66b..10e54b2a1 100644 --- a/beets/library.py +++ b/beets/library.py @@ -795,8 +795,14 @@ class Item(LibModel): # When zero, try to determine from filesystem. maxlen = util.max_filename_length(self._db.directory) - subpath = util.legalize_path(subpath, self._db.replacements, maxlen, - os.path.splitext(self.path)[1], fragment) + subpath, fellback = util.legalize_path( + subpath, self._db.replacements, maxlen, + os.path.splitext(self.path)[1], fragment + ) + + # Print an error message if legalize fell back to default replacements + if fellback: + log.warning(u'fell back to default replacements when naming file') if fragment: return subpath diff --git a/beets/util/__init__.py b/beets/util/__init__.py index bf0abdfe5..42971fdf6 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -597,7 +597,9 @@ def legalize_path(path, replacements, length, extension, fragment): first_stage_path, None, length, extension, fragment ) - return second_stage_path + return second_stage_path, True + else: + return second_stage_path, False def str2bool(value): diff --git a/docs/reference/config.rst b/docs/reference/config.rst index d737bba69..9000f070a 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -119,6 +119,11 @@ compatibility with Windows-influenced network filesystems like Samba). Trailing dots and trailing whitespace, which can cause problems on Windows clients, are also removed. +When replacements other than the defaults are used, it is possible that they +will increase the length of the path. In the scenario where this leads to a +conflict with the maximum filename length, the default replacements will be +used to resolve the conflict and beets will display a warning. + Note that paths might contain special characters such as typographical quotes (``“”``). With the configuration above, those will not be replaced as they don't match the typewriter quote (``"``). To also strip these