* break the asciify-paths code out into the util.asciify_paths function

* make asciifying handle both os.sep and os.altsep (testing needed as I
  don't have a windows box handy)
* make %asciify{} use the same code path as the asciify_paths goop.
* added a discrete test to %asciify{} so my life acts as a warning to
  others
* changelog note now with 80% less antihistamine-induced runon sentences
This commit is contained in:
xarph 2016-12-07 20:59:33 -08:00
parent 7546ea8676
commit 53a677b8fd
4 changed files with 36 additions and 11 deletions

View file

@ -23,7 +23,6 @@ import unicodedata
import time
import re
import six
from unidecode import unidecode
from beets import logging
from beets.mediafile import MediaFile, UnreadableFileError
@ -824,13 +823,10 @@ class Item(LibModel):
subpath = unicodedata.normalize('NFC', subpath)
if beets.config['asciify_paths']:
path_components = subpath.split(os.path.sep)
sep_replace = beets.config['path_sep_replace'].get()
for index, item in enumerate(path_components):
path_components[index] = unidecode(item).replace(
os.path.sep, sep_replace
)
subpath = os.path.sep.join(path_components)
subpath = util.asciify_path(
subpath,
beets.config['path_sep_replace'].as_str()
)
maxlen = beets.config['max_filename_length'].get(int)
if not maxlen:
@ -1082,7 +1078,10 @@ class Album(LibModel):
beets.config['art_filename'].as_str())
subpath = self.evaluate_template(filename_tmpl, True)
if beets.config['asciify_paths']:
subpath = unidecode(subpath)
subpath = util.asciify_path(
subpath,
beets.config['path_sep_replace'].as_str()
)
subpath = util.sanitize_path(subpath,
replacements=self._db.replacements)
subpath = bytestring_path(subpath)
@ -1434,7 +1433,7 @@ class DefaultTemplateFunctions(object):
def tmpl_asciify(s):
"""Translate non-ASCII characters to their ASCII equivalents.
"""
return unidecode(s)
return util.asciify_path(s, beets.config['path_sep_replace'].as_str())
@staticmethod
def tmpl_time(s, fmt):

View file

@ -29,6 +29,7 @@ import platform
import shlex
from beets.util import hidden
import six
from unidecode import unidecode
MAX_FILENAME_LENGTH = 200
@ -941,3 +942,22 @@ def raw_seconds_short(string):
raise ValueError(u'String not in M:SS format')
minutes, seconds = map(int, match.groups())
return float(minutes * 60 + seconds)
def asciify_path(path, sep_replace):
"""Decodes all unicode characters in a path into ASCII equivalents.
Substitutions are provided by the unidecode module. Path separators in the
input are preserved.
Keyword arguments:
path -- The path to be asciified.
sep_replace -- the string to be used to replace extraneous path separators.
"""
# if this platform has an os.altsep, change it to os.sep.
if os.altsep:
path.replace(os.altsep, os.sep)
path_components = path.split(os.sep)
for index, item in enumerate(path_components):
path_components[index] = unidecode(item).replace(os.sep, sep_replace)
return os.sep.join(path_components)

View file

@ -7,7 +7,9 @@ Changelog
Fixes:
* :doc:`/plugins/badfiles`: Fix a crash on non-ASCII filenames. :bug:`2299`
* With :ref:`asciify-paths` set to "yes", some unicode tags could decode in such a way that erroneous directories were created mid-filename. This has been fixed.
* The ``%asciify{}`` path formatting function and the :ref:`asciify-paths`
setting properly substitute path separators generated by converting some
unicode characters into ASCII, such as ½ and ¢.
1.4.1 (November 25, 2016)

View file

@ -589,6 +589,10 @@ class DestinationFunctionTest(_common.TestCase, PathFormattingMixin):
self._setf(u'%title{$title}')
self._assert_dest(b'/base/The Title')
def test_asciify_variable(self):
self._setf(u'%asciify{ab\xa2\xbdd}')
self._assert_dest(b'/base/abC_1_2d')
def test_left_variable(self):
self._setf(u'%left{$title, 3}')
self._assert_dest(b'/base/the')