diff --git a/beets/library.py b/beets/library.py index 006630c08..10e54b2a1 100644 --- a/beets/library.py +++ b/beets/library.py @@ -790,26 +790,19 @@ class Item(LibModel): if beets.config['asciify_paths']: subpath = unidecode(subpath) - # Truncate components and remove forbidden characters. - subpath = util.sanitize_path(subpath, self._db.replacements) - - # Encode for the filesystem. - if not fragment: - subpath = bytestring_path(subpath) - - # Preserve extension. - _, extension = os.path.splitext(self.path) - if fragment: - # Outputting Unicode. - extension = extension.decode('utf8', 'ignore') - subpath += extension.lower() - - # Truncate too-long components. maxlen = beets.config['max_filename_length'].get(int) if not maxlen: # When zero, try to determine from filesystem. maxlen = util.max_filename_length(self._db.directory) - subpath = util.truncate_path(subpath, maxlen) + + subpath, fellback = util.legalize_path( + subpath, self._db.replacements, maxlen, + os.path.splitext(self.path)[1], fragment + ) + + # Print an error message if legalize fell back to default replacements + if fellback: + log.warning(u'fell back to default replacements when naming file') if fragment: return subpath diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 68be740f6..42971fdf6 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -545,6 +545,63 @@ def truncate_path(path, length=MAX_FILENAME_LENGTH): return os.path.join(*out) +def _legalize_stage(path, replacements, length, extension, fragment): + """ Performs sanitization of the path, then encodes for the filesystem, + appends the extension and truncates. Returns the path (unicode if fragment + is set, otherwise bytes), and whether truncation was required. + """ + # Perform an initial sanitization including user replacements. + path = sanitize_path(path, replacements) + + # Encode for the filesystem. + if not fragment: + path = bytestring_path(path) + + # Preserve extension. + path += extension.lower() + + # Truncate too-long components. + pre_truncate_path = path + path = truncate_path(path, length) + + return path, path != pre_truncate_path + + +def legalize_path(path, replacements, length, extension, fragment): + """ Perform up to three calls to _legalize_stage, to generate a stable + output path, taking user replacements into consideration if possible. The + limited number of iterations avoids the possibility of an infinite loop of + sanitization and truncation operations, which could be caused by some user + replacements. + """ + + if fragment: + # Outputting Unicode. + extension = extension.decode('utf8', 'ignore') + + first_stage_path, _ = _legalize_stage( + path, replacements, length, extension, fragment + ) + + # Convert back to Unicode with extension removed. + first_stage_path = os.path.splitext(displayable_path(first_stage_path))[0] + + # Re-sanitize following truncation (including user replacements). + second_stage_path, retruncated = _legalize_stage( + first_stage_path, replacements, length, extension, fragment + ) + + # If the path was once again truncated, discard user replacements. + if retruncated: + second_stage_path, _ = _legalize_stage( + first_stage_path, None, length, extension, fragment + ) + + return second_stage_path, True + else: + return second_stage_path, False + + def str2bool(value): """Returns a boolean reflecting a human-entered string.""" return value.lower() in ('yes', '1', 'true', 't', 'y') diff --git a/docs/reference/config.rst b/docs/reference/config.rst index d737bba69..9000f070a 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -119,6 +119,11 @@ compatibility with Windows-influenced network filesystems like Samba). Trailing dots and trailing whitespace, which can cause problems on Windows clients, are also removed. +When replacements other than the defaults are used, it is possible that they +will increase the length of the path. In the scenario where this leads to a +conflict with the maximum filename length, the default replacements will be +used to resolve the conflict and beets will display a warning. + Note that paths might contain special characters such as typographical quotes (``“”``). With the configuration above, those will not be replaced as they don't match the typewriter quote (``"``). To also strip these diff --git a/test/test_library.py b/test/test_library.py index a4b4d08a8..30b0d6fc4 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -452,8 +452,7 @@ class DestinationTest(_common.TestCase): self.assertEqual(self.i.destination(), np('base/one/_.mp3')) - @unittest.skip('unimplemented: #496') - def test_truncation_does_not_conflict_with_replacement(self): + def test_legalize_path_one_for_one_replacement(self): # Use a replacement that should always replace the last X in any # path component with a Z. self.lib.replacements = [ @@ -466,7 +465,23 @@ class DestinationTest(_common.TestCase): # The final path should reflect the replacement. dest = self.i.destination() - self.assertTrue('XZ' in dest) + self.assertEqual(dest[-2:], 'XZ') + + def test_legalize_path_one_for_many_replacement(self): + # Use a replacement that should always replace the last X in any + # path component with four Zs. + self.lib.replacements = [ + (re.compile(r'X$'), u'ZZZZ'), + ] + + # Construct an item whose untruncated path ends with a Y but whose + # truncated version ends with an X. + self.i.title = 'X' * 300 + 'Y' + + # The final path should ignore the user replacement and create a path + # of the correct length, containing Xs. + dest = self.i.destination() + self.assertEqual(dest[-2:], 'XX') class ItemFormattedMappingTest(_common.LibTestCase):