diff --git a/beets/library.py b/beets/library.py index e80c4da72..5cffdbe34 100755 --- a/beets/library.py +++ b/beets/library.py @@ -23,7 +23,6 @@ import unicodedata import time import re import six -from unidecode import unidecode from beets import logging from beets.mediafile import MediaFile, UnreadableFileError @@ -824,7 +823,10 @@ class Item(LibModel): subpath = unicodedata.normalize('NFC', subpath) if beets.config['asciify_paths']: - subpath = unidecode(subpath) + subpath = util.asciify_path( + subpath, + beets.config['path_sep_replace'].as_str() + ) maxlen = beets.config['max_filename_length'].get(int) if not maxlen: @@ -1076,7 +1078,10 @@ class Album(LibModel): beets.config['art_filename'].as_str()) subpath = self.evaluate_template(filename_tmpl, True) if beets.config['asciify_paths']: - subpath = unidecode(subpath) + subpath = util.asciify_path( + subpath, + beets.config['path_sep_replace'].as_str() + ) subpath = util.sanitize_path(subpath, replacements=self._db.replacements) subpath = bytestring_path(subpath) @@ -1428,7 +1433,7 @@ class DefaultTemplateFunctions(object): def tmpl_asciify(s): """Translate non-ASCII characters to their ASCII equivalents. """ - return unidecode(s) + return util.asciify_path(s, beets.config['path_sep_replace'].as_str()) @staticmethod def tmpl_time(s, fmt): diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 5e1c30df1..a33d0e02e 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -29,6 +29,7 @@ import platform import shlex from beets.util import hidden import six +from unidecode import unidecode MAX_FILENAME_LENGTH = 200 @@ -941,3 +942,27 @@ def raw_seconds_short(string): raise ValueError(u'String not in M:SS format') minutes, seconds = map(int, match.groups()) return float(minutes * 60 + seconds) + + +def asciify_path(path, sep_replace): + """Decodes all unicode characters in a path into ASCII equivalents. + + Substitutions are provided by the unidecode module. Path separators in the + input are preserved. + + Keyword arguments: + path -- The path to be asciified. + sep_replace -- the string to be used to replace extraneous path separators. + """ + # if this platform has an os.altsep, change it to os.sep. + if os.altsep: + path = path.replace(os.altsep, os.sep) + path_components = path.split(os.sep) + for index, item in enumerate(path_components): + path_components[index] = unidecode(item).replace(os.sep, sep_replace) + if os.altsep: + path_components[index] = unidecode(item).replace( + os.altsep, + sep_replace + ) + return os.sep.join(path_components) diff --git a/docs/changelog.rst b/docs/changelog.rst index 22e453f57..0ed4d6377 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,9 @@ Changelog Fixes: * :doc:`/plugins/badfiles`: Fix a crash on non-ASCII filenames. :bug:`2299` +* The ``%asciify{}`` path formatting function and the :ref:`asciify-paths` + setting properly substitute path separators generated by converting some + unicode characters into ASCII, such as ½ and ¢. 1.4.1 (November 25, 2016) diff --git a/test/test_library.py b/test/test_library.py index bea34b998..7aa88e064 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -422,6 +422,13 @@ class DestinationTest(_common.TestCase): self.i.title = u'\u201c\u00f6\u2014\u00cf\u201d' self.assertEqual(self.i.destination(), np('lib/qo--Iq')) + def test_asciify_character_expanding_to_slash(self): + config['asciify_paths'] = True + self.lib.directory = b'lib' + self.lib.path_formats = [(u'default', u'$title')] + self.i.title = u'ab\xa2\xbdd' + self.assertEqual(self.i.destination(), np('lib/abC_1_2d')) + def test_destination_with_replacements(self): self.lib.directory = b'base' self.lib.replacements = [(re.compile(r'a'), u'e')] @@ -582,6 +589,10 @@ class DestinationFunctionTest(_common.TestCase, PathFormattingMixin): self._setf(u'%title{$title}') self._assert_dest(b'/base/The Title') + def test_asciify_variable(self): + self._setf(u'%asciify{ab\xa2\xbdd}') + self._assert_dest(b'/base/abC_1_2d') + def test_left_variable(self): self._setf(u'%left{$title, 3}') self._assert_dest(b'/base/the')