Merge pull request #1361 from LordSputnik/master

Fixed issue #496 - sanitize/truncate bug
This commit is contained in:
Adrian Sampson 2015-07-07 17:33:17 -07:00
commit d766b6bd7c
4 changed files with 89 additions and 19 deletions

View file

@ -790,26 +790,19 @@ class Item(LibModel):
if beets.config['asciify_paths']:
subpath = unidecode(subpath)
# Truncate components and remove forbidden characters.
subpath = util.sanitize_path(subpath, self._db.replacements)
# Encode for the filesystem.
if not fragment:
subpath = bytestring_path(subpath)
# Preserve extension.
_, extension = os.path.splitext(self.path)
if fragment:
# Outputting Unicode.
extension = extension.decode('utf8', 'ignore')
subpath += extension.lower()
# Truncate too-long components.
maxlen = beets.config['max_filename_length'].get(int)
if not maxlen:
# When zero, try to determine from filesystem.
maxlen = util.max_filename_length(self._db.directory)
subpath = util.truncate_path(subpath, maxlen)
subpath, fellback = util.legalize_path(
subpath, self._db.replacements, maxlen,
os.path.splitext(self.path)[1], fragment
)
# Print an error message if legalize fell back to default replacements
if fellback:
log.warning(u'fell back to default replacements when naming file')
if fragment:
return subpath

View file

@ -545,6 +545,63 @@ def truncate_path(path, length=MAX_FILENAME_LENGTH):
return os.path.join(*out)
def _legalize_stage(path, replacements, length, extension, fragment):
""" Performs sanitization of the path, then encodes for the filesystem,
appends the extension and truncates. Returns the path (unicode if fragment
is set, otherwise bytes), and whether truncation was required.
"""
# Perform an initial sanitization including user replacements.
path = sanitize_path(path, replacements)
# Encode for the filesystem.
if not fragment:
path = bytestring_path(path)
# Preserve extension.
path += extension.lower()
# Truncate too-long components.
pre_truncate_path = path
path = truncate_path(path, length)
return path, path != pre_truncate_path
def legalize_path(path, replacements, length, extension, fragment):
""" Perform up to three calls to _legalize_stage, to generate a stable
output path, taking user replacements into consideration if possible. The
limited number of iterations avoids the possibility of an infinite loop of
sanitization and truncation operations, which could be caused by some user
replacements.
"""
if fragment:
# Outputting Unicode.
extension = extension.decode('utf8', 'ignore')
first_stage_path, _ = _legalize_stage(
path, replacements, length, extension, fragment
)
# Convert back to Unicode with extension removed.
first_stage_path = os.path.splitext(displayable_path(first_stage_path))[0]
# Re-sanitize following truncation (including user replacements).
second_stage_path, retruncated = _legalize_stage(
first_stage_path, replacements, length, extension, fragment
)
# If the path was once again truncated, discard user replacements.
if retruncated:
second_stage_path, _ = _legalize_stage(
first_stage_path, None, length, extension, fragment
)
return second_stage_path, True
else:
return second_stage_path, False
def str2bool(value):
"""Returns a boolean reflecting a human-entered string."""
return value.lower() in ('yes', '1', 'true', 't', 'y')

View file

@ -119,6 +119,11 @@ compatibility with Windows-influenced network filesystems like Samba).
Trailing dots and trailing whitespace, which can cause problems on Windows
clients, are also removed.
When replacements other than the defaults are used, it is possible that they
will increase the length of the path. In the scenario where this leads to a
conflict with the maximum filename length, the default replacements will be
used to resolve the conflict and beets will display a warning.
Note that paths might contain special characters such as typographical
quotes (``“”``). With the configuration above, those will not be
replaced as they don't match the typewriter quote (``"``). To also strip these

View file

@ -452,8 +452,7 @@ class DestinationTest(_common.TestCase):
self.assertEqual(self.i.destination(),
np('base/one/_.mp3'))
@unittest.skip('unimplemented: #496')
def test_truncation_does_not_conflict_with_replacement(self):
def test_legalize_path_one_for_one_replacement(self):
# Use a replacement that should always replace the last X in any
# path component with a Z.
self.lib.replacements = [
@ -466,7 +465,23 @@ class DestinationTest(_common.TestCase):
# The final path should reflect the replacement.
dest = self.i.destination()
self.assertTrue('XZ' in dest)
self.assertEqual(dest[-2:], 'XZ')
def test_legalize_path_one_for_many_replacement(self):
# Use a replacement that should always replace the last X in any
# path component with four Zs.
self.lib.replacements = [
(re.compile(r'X$'), u'ZZZZ'),
]
# Construct an item whose untruncated path ends with a Y but whose
# truncated version ends with an X.
self.i.title = 'X' * 300 + 'Y'
# The final path should ignore the user replacement and create a path
# of the correct length, containing Xs.
dest = self.i.destination()
self.assertEqual(dest[-2:], 'XX')
class ItemFormattedMappingTest(_common.LibTestCase):