Merge pull request #2303 from tweitzel/master

Decompose unicode paths and unidecode each component. Fixes #1087 #1159 #1541 #2286
This commit is contained in:
Adrian Sampson 2016-12-08 21:35:01 -05:00 committed by GitHub
commit 4bbb05b009
4 changed files with 48 additions and 4 deletions

View file

@ -23,7 +23,6 @@ import unicodedata
import time
import re
import six
from unidecode import unidecode
from beets import logging
from beets.mediafile import MediaFile, UnreadableFileError
@ -824,7 +823,10 @@ class Item(LibModel):
subpath = unicodedata.normalize('NFC', subpath)
if beets.config['asciify_paths']:
subpath = unidecode(subpath)
subpath = util.asciify_path(
subpath,
beets.config['path_sep_replace'].as_str()
)
maxlen = beets.config['max_filename_length'].get(int)
if not maxlen:
@ -1076,7 +1078,10 @@ class Album(LibModel):
beets.config['art_filename'].as_str())
subpath = self.evaluate_template(filename_tmpl, True)
if beets.config['asciify_paths']:
subpath = unidecode(subpath)
subpath = util.asciify_path(
subpath,
beets.config['path_sep_replace'].as_str()
)
subpath = util.sanitize_path(subpath,
replacements=self._db.replacements)
subpath = bytestring_path(subpath)
@ -1428,7 +1433,7 @@ class DefaultTemplateFunctions(object):
def tmpl_asciify(s):
"""Translate non-ASCII characters to their ASCII equivalents.
"""
return unidecode(s)
return util.asciify_path(s, beets.config['path_sep_replace'].as_str())
@staticmethod
def tmpl_time(s, fmt):

View file

@ -29,6 +29,7 @@ import platform
import shlex
from beets.util import hidden
import six
from unidecode import unidecode
MAX_FILENAME_LENGTH = 200
@ -941,3 +942,27 @@ def raw_seconds_short(string):
raise ValueError(u'String not in M:SS format')
minutes, seconds = map(int, match.groups())
return float(minutes * 60 + seconds)
def asciify_path(path, sep_replace):
"""Decodes all unicode characters in a path into ASCII equivalents.
Substitutions are provided by the unidecode module. Path separators in the
input are preserved.
Keyword arguments:
path -- The path to be asciified.
sep_replace -- the string to be used to replace extraneous path separators.
"""
# if this platform has an os.altsep, change it to os.sep.
if os.altsep:
path = path.replace(os.altsep, os.sep)
path_components = path.split(os.sep)
for index, item in enumerate(path_components):
path_components[index] = unidecode(item).replace(os.sep, sep_replace)
if os.altsep:
path_components[index] = unidecode(item).replace(
os.altsep,
sep_replace
)
return os.sep.join(path_components)

View file

@ -7,6 +7,9 @@ Changelog
Fixes:
* :doc:`/plugins/badfiles`: Fix a crash on non-ASCII filenames. :bug:`2299`
* The ``%asciify{}`` path formatting function and the :ref:`asciify-paths`
setting properly substitute path separators generated by converting some
unicode characters into ASCII, such as ½ and ¢.
1.4.1 (November 25, 2016)

View file

@ -422,6 +422,13 @@ class DestinationTest(_common.TestCase):
self.i.title = u'\u201c\u00f6\u2014\u00cf\u201d'
self.assertEqual(self.i.destination(), np('lib/qo--Iq'))
def test_asciify_character_expanding_to_slash(self):
config['asciify_paths'] = True
self.lib.directory = b'lib'
self.lib.path_formats = [(u'default', u'$title')]
self.i.title = u'ab\xa2\xbdd'
self.assertEqual(self.i.destination(), np('lib/abC_1_2d'))
def test_destination_with_replacements(self):
self.lib.directory = b'base'
self.lib.replacements = [(re.compile(r'a'), u'e')]
@ -582,6 +589,10 @@ class DestinationFunctionTest(_common.TestCase, PathFormattingMixin):
self._setf(u'%title{$title}')
self._assert_dest(b'/base/The Title')
def test_asciify_variable(self):
self._setf(u'%asciify{ab\xa2\xbdd}')
self._assert_dest(b'/base/abC_1_2d')
def test_left_variable(self):
self._setf(u'%left{$title, 3}')
self._assert_dest(b'/base/the')