Rewrote path legalization code to be two module functions rather than class methods. Also made algorithm more predictable, and added an extra test.

This commit is contained in:
Ben Ockmore 2015-07-07 01:17:12 +01:00
parent d07c8fde69
commit de17d000bd
3 changed files with 79 additions and 69 deletions

View file

@ -30,7 +30,7 @@ from beets import logging
from beets.mediafile import MediaFile, MutagenError, UnreadableFileError
from beets import plugins
from beets import util
from beets.util import bytestring_path, syspath, normpath, samefile
from beets.util import bytestring_path, syspath, normpath, samefile, legalize_path
from beets.util.functemplate import Template
from beets import dbcore
from beets.dbcore import types
@ -742,71 +742,6 @@ class Item(LibModel):
# Templating.
def _legalize_partial(self, path, fragment, replacements):
""" Perform a partial legalization of the path (ie. a single
sanitization and truncation). Outputs unicode if fragment is set,
otherwise bytes.
"""
# Truncate components and remove forbidden characters.
path = util.sanitize_path(path, replacements)
# Encode for the filesystem.
if not fragment:
path = bytestring_path(path)
# Preserve extension.
_, extension = os.path.splitext(self.path)
if fragment:
# Outputting Unicode.
extension = extension.decode('utf8', 'ignore')
path += extension.lower()
# Truncate too-long components.
maxlen = beets.config['max_filename_length'].get(int)
if not maxlen:
# When zero, try to determine from filesystem.
maxlen = util.max_filename_length(self._db.directory)
path = util.truncate_path(path, maxlen)
return path
def _legalize_path(self, path, fragment):
""" Perform several iterations of _legalize_partial, to generate a
stable, optimal output path. This is necessary for cases where
truncation produces unclean paths (eg. trailing space).
"""
# Create a list of path candidates
path_candidates = [b'']
replacements = self._db.replacements
# Perform an initial pass
path = self._legalize_partial(path, fragment, replacements)
while path != path_candidates[-1]:
# This will keep sanitizing the path until it's stable, or an
# infinite loop appears
while path not in path_candidates:
path_candidates.append(path)
# Convert back to Unicode with extension removed
print(util.displayable_path(path))
path = os.path.splitext(util.displayable_path(path))[0]
# Run next pass
path = self._legalize_partial(path, fragment, replacements)
# If an infinite loop occurred, adjust replacements to avoid it
if path != path_candidates[-1]:
replacements = dict((k, u'_') for k, v in replacements)
# If there's a rule to match a single underscore, set the
# target to a blank string.
if '_' in replacements:
replacements['_'] = ''
return path
def destination(self, fragment=False, basedir=None, platform=None,
path_formats=None):
"""Returns the path in the library directory designated for the
@ -855,7 +790,13 @@ class Item(LibModel):
if beets.config['asciify_paths']:
subpath = unidecode(subpath)
subpath = self._legalize_path(subpath, fragment)
maxlen = beets.config['max_filename_length'].get(int)
if not maxlen:
# When zero, try to determine from filesystem.
maxlen = util.max_filename_length(self._db.directory)
subpath = util.legalize_path(subpath, self._db.replacements, maxlen,
os.path.splitext(self.path)[1], fragment)
if fragment:
return subpath

View file

@ -545,6 +545,59 @@ def truncate_path(path, length=MAX_FILENAME_LENGTH):
return os.path.join(*out)
def _legalize_stage(path, replacements, length, extension, fragment):
""" Perform a signle stage of legalization of the path (ie. a single
sanitization and truncation). Returns the path (unicode if fragment is set,
otherwise bytes), and whether truncation was required.
"""
# Perform an initial sanitization including user replacements.
path = sanitize_path(path, replacements)
# Encode for the filesystem.
if not fragment:
path = bytestring_path(path)
# Preserve extension.
path += extension.lower()
# Truncate too-long components.
pre_truncate_path = path
path = truncate_path(path, length)
return (path, path != pre_truncate_path)
def legalize_path(path, replacements, length, extension, fragment):
""" Perform up to three calls to _legalize_stage, to generate a stable
output path, taking user replacements into consideration if possible. This
additional complexity is necessary for cases where truncation produces
unclean paths (eg. trailing space).
"""
if fragment:
# Outputting Unicode.
extension = extension.decode('utf8', 'ignore')
first_stage_path =\
_legalize_stage(path, replacements, length, extension, fragment)[0]
# Convert back to Unicode with extension removed.
first_stage_path = os.path.splitext(displayable_path(first_stage_path))[0]
# Re-sanitize following truncation (including user replacements).
second_stage_path, retruncated = _legalize_stage(
first_stage_path, replacements, length, extension, fragment
)
# If the path was once again truncated, discard user replacements.
if retruncated:
second_stage_path = _legalize_stage(
first_stage_path, None, length, extension, fragment
)[0]
return second_stage_path
def str2bool(value):
"""Returns a boolean reflecting a human-entered string."""
return value.lower() in ('yes', '1', 'true', 't', 'y')

View file

@ -452,7 +452,7 @@ class DestinationTest(_common.TestCase):
self.assertEqual(self.i.destination(),
np('base/one/_.mp3'))
def test_truncation_does_not_conflict_with_replacement(self):
def test_legalize_path_one_for_one_replacement(self):
# Use a replacement that should always replace the last X in any
# path component with a Z.
self.lib.replacements = [
@ -465,7 +465,23 @@ class DestinationTest(_common.TestCase):
# The final path should reflect the replacement.
dest = self.i.destination()
self.assertTrue('XZ' in dest)
self.assertEqual(dest[-2:], 'XZ')
def test_legalize_path_one_for_many_replacement(self):
# Use a replacement that should always replace the last X in any
# path component with four Zs.
self.lib.replacements = [
(re.compile(r'X$'), u'ZZZZ'),
]
# Construct an item whose untruncated path ends with a Y but whose
# truncated version ends with an X.
self.i.title = 'X' * 300 + 'Y'
# The final path should ignore the user replacement and create a path
# of the correct length, containing Xs.
dest = self.i.destination()
self.assertEqual(dest[-2:], 'XX')
class ItemFormattedMappingTest(_common.LibTestCase):