mirror of
https://github.com/beetbox/beets.git
synced 2026-01-30 12:02:41 +01:00
revamp default character substitutions
There's no longer a distinction between Unix and Windows substitutions. Enough users reported problems with Windows-forbidden characters on Samba shares that it seems appropriate to make all filenames Windows-safe, even on Unix. Users who really want those additional characters (<>:"?*|\) can re-enable them via the "replace" option. Nobody has complained about beets being *too* conservative. This also adds sanitization of control characters, which is an all-around good idea, and the substitution now runs in the Unicode (rather than byte) domain.
This commit is contained in:
parent
2d20e3582b
commit
ffa2402ff4
8 changed files with 67 additions and 60 deletions
|
|
@ -880,18 +880,18 @@ class Library(BaseLibrary):
|
|||
funcs.update(plugins.template_funcs())
|
||||
subpath = subpath_tmpl.substitute(mapping, funcs)
|
||||
|
||||
# Encode for the filesystem, dropping unencodable characters.
|
||||
# Prepare path for output: normalize Unicode characters.
|
||||
if platform == 'darwin':
|
||||
subpath = unicodedata.normalize('NFD', subpath)
|
||||
else:
|
||||
subpath = unicodedata.normalize('NFC', subpath)
|
||||
# Truncate components and remove forbidden characters.
|
||||
subpath = util.sanitize_path(subpath, pathmod, self.replacements)
|
||||
# Encode for the filesystem, dropping unencodable characters.
|
||||
if isinstance(subpath, unicode) and not fragment:
|
||||
encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
|
||||
subpath = subpath.encode(encoding, 'replace')
|
||||
|
||||
# Truncate components and remove forbidden characters.
|
||||
subpath = util.sanitize_path(subpath, pathmod, self.replacements)
|
||||
|
||||
# Preserve extension.
|
||||
_, extension = pathmod.splitext(item.path)
|
||||
subpath += extension.lower()
|
||||
|
|
|
|||
|
|
@ -428,6 +428,7 @@ def _get_replacements(config):
|
|||
repl_string = config_val(config, 'beets', 'replace', None)
|
||||
if not repl_string:
|
||||
return
|
||||
repl_string = repl_string.decode('utf8')
|
||||
|
||||
parts = repl_string.strip().split()
|
||||
if not parts:
|
||||
|
|
|
|||
|
|
@ -277,33 +277,33 @@ def unique_path(path):
|
|||
if not os.path.exists(new_path):
|
||||
return new_path
|
||||
|
||||
# Note: POSIX actually supports \ and : -- I just think they're
|
||||
# a pain. And ? has caused problems for some.
|
||||
# Note: The Windows "reserved characters" are, of course, allowed on
|
||||
# Unix. They are forbidden here because they cause problems on Samba
|
||||
# shares, which are sufficiently common as to cause frequent problems.
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx
|
||||
CHAR_REPLACE = [
|
||||
(re.compile(r'[\\/\?"]|^\.'), '_'),
|
||||
(re.compile(r':'), '-'),
|
||||
]
|
||||
CHAR_REPLACE_WINDOWS = [
|
||||
(re.compile(r'["\*<>\|]|^\.|\.$|\s+$'), '_'),
|
||||
(re.compile(ur'[\\/]'), u'_'), # / and \ -- forbidden everywhere.
|
||||
(re.compile(ur'^\.'), u'_'), # Leading dot (hidden files on Unix).
|
||||
(re.compile(ur'[\x00-\x1f]'), u''), # Control characters.
|
||||
(re.compile(ur'[<>:"\?\*\|]'), u'_'), # Windows "reserved characters".
|
||||
(re.compile(ur'\.$'), u'_'), # Trailing dots.
|
||||
(re.compile(ur'\s+$'), u''), # Trailing whitespace.
|
||||
]
|
||||
def sanitize_path(path, pathmod=None, replacements=None):
|
||||
"""Takes a path and makes sure that it is legal. Returns a new path.
|
||||
Only works with fragments; won't work reliably on Windows when a
|
||||
path begins with a drive letter. Path separators (including altsep!)
|
||||
should already be cleaned from the path components. If replacements
|
||||
is specified, it is used *instead* of the default set of
|
||||
replacements for the platform; it must be a list of (compiled regex,
|
||||
replacement string) pairs.
|
||||
"""Takes a path (as a Unicode string) and makes sure that it is
|
||||
legal. Returns a new path. Only works with fragments; won't work
|
||||
reliably on Windows when a path begins with a drive letter. Path
|
||||
separators (including altsep!) should already be cleaned from the
|
||||
path components. If replacements is specified, it is used *instead*
|
||||
of the default set of replacements for the platform; it must be a
|
||||
list of (compiled regex, replacement string) pairs.
|
||||
"""
|
||||
pathmod = pathmod or os.path
|
||||
windows = pathmod.__name__ == 'ntpath'
|
||||
|
||||
# Choose the appropriate replacements.
|
||||
if not replacements:
|
||||
replacements = list(CHAR_REPLACE)
|
||||
if windows:
|
||||
replacements += CHAR_REPLACE_WINDOWS
|
||||
|
||||
|
||||
comps = components(path, pathmod)
|
||||
if not comps:
|
||||
return ''
|
||||
|
|
@ -311,10 +311,10 @@ def sanitize_path(path, pathmod=None, replacements=None):
|
|||
# Replace special characters.
|
||||
for regex, repl in replacements:
|
||||
comp = regex.sub(repl, comp)
|
||||
|
||||
|
||||
# Truncate each component.
|
||||
comp = comp[:MAX_FILENAME_LENGTH]
|
||||
|
||||
|
||||
comps[i] = comp
|
||||
return pathmod.join(*comps)
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,11 @@ Changelog
|
|||
file for easy importing to other systems. Thanks to Fabrice Laporte.
|
||||
* When the autotagger fails to find a match, it now displays the number of
|
||||
tracks on the album (to help you guess what might be going wrong).
|
||||
* The default filename character substitutions were changed to be more
|
||||
conservative. The Windows "reserved characters" are substituted by default
|
||||
even on Unix platforms (this causes less surprise when using Samba shares to
|
||||
store music). To customize your character substitutions, see :ref:`the replace
|
||||
config option <replace>`.
|
||||
* :doc:`/plugins/bpd`: Use Gstreamer's ``playbin2`` element instead of the
|
||||
deprecated ``playbin``.
|
||||
* Filenames are normalized with Unicode Normal Form D (NFD) on Mac OS X and NFC
|
||||
|
|
|
|||
|
|
@ -74,11 +74,13 @@ section header:
|
|||
to be ignored when importing. Defaults to ``.* *~`` (i.e., ignore
|
||||
Unix-style hidden files and backup files).
|
||||
|
||||
.. _replace:
|
||||
|
||||
``replace``
|
||||
A set of regular expression/replacement pairs to be applied to all filenames
|
||||
created by beets. Typically, these replacements are used to avoid confusing
|
||||
problems or errors with the filesystem (for example, leading ``.``
|
||||
characters are replaced on Unix and the ``*<>|`` characters are removed on
|
||||
characters are replaced on Unix and trailing whitespace is removed on
|
||||
Windows). To override these substitutions, specify a sequence of
|
||||
whitespace-separated terms; the first term is a regular expression and the
|
||||
second is a string that should replace anything matching that regex. For
|
||||
|
|
@ -87,19 +89,22 @@ section header:
|
|||
|
||||
If you do change this value, be certain that you include at least enough
|
||||
substitutions to avoid causing errors on your operating system. Here are
|
||||
some recommended base replacements for Unix-like OSes::
|
||||
the default substitutions used by beets, which are sufficient to avoid
|
||||
unexpected behavior on all popular platforms::
|
||||
|
||||
replace = [\\/\?"]|^\. _
|
||||
: -
|
||||
replace = [\\/] _
|
||||
^\. _
|
||||
[\x00-\x1f] _
|
||||
[<>:"\?\*\|] _
|
||||
\.$ _
|
||||
\s+$ <strip>
|
||||
|
||||
And, on Windows::
|
||||
|
||||
replace = [\\/\?"]|^\. _
|
||||
["\*<>\|]|^\.|\.$|\s+$ _
|
||||
: -
|
||||
|
||||
Note that the above examples are, in fact, the default substitutions used by
|
||||
beets.
|
||||
These substitutions remove forward and back slashes, leading dots, and
|
||||
control characters—all of which is a good idea on any OS. The fourth line
|
||||
removes the Windows "reserved characters" (useful even on Unix for for
|
||||
compatibility with Windows-influenced network filesystems like Samba).
|
||||
Trailing dots and trailing whitespace, which can cause problems on Windows
|
||||
clients, are also removed.
|
||||
|
||||
To replace space characters, use the ``\s`` (whitespace) entity::
|
||||
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -232,15 +232,15 @@ class DestinationTest(unittest.TestCase):
|
|||
self.assertFalse('two / three' in p)
|
||||
|
||||
def test_sanitize_unix_replaces_leading_dot(self):
|
||||
p = util.sanitize_path('one/.two/three', posixpath)
|
||||
p = util.sanitize_path(u'one/.two/three', posixpath)
|
||||
self.assertFalse('.' in p)
|
||||
|
||||
def test_sanitize_windows_replaces_trailing_dot(self):
|
||||
p = util.sanitize_path('one/two./three', ntpath)
|
||||
p = util.sanitize_path(u'one/two./three', ntpath)
|
||||
self.assertFalse('.' in p)
|
||||
|
||||
def test_sanitize_windows_replaces_illegal_chars(self):
|
||||
p = util.sanitize_path(':*?"<>|', ntpath)
|
||||
p = util.sanitize_path(u':*?"<>|', ntpath)
|
||||
self.assertFalse(':' in p)
|
||||
self.assertFalse('*' in p)
|
||||
self.assertFalse('?' in p)
|
||||
|
|
@ -249,10 +249,6 @@ class DestinationTest(unittest.TestCase):
|
|||
self.assertFalse('>' in p)
|
||||
self.assertFalse('|' in p)
|
||||
|
||||
def test_sanitize_replaces_colon_with_dash(self):
|
||||
p = util.sanitize_path(u':', posixpath)
|
||||
self.assertEqual(p, u'-')
|
||||
|
||||
def test_path_with_format(self):
|
||||
self.lib.path_formats = [('default', '$artist/$album ($format)')]
|
||||
p = self.lib.destination(self.i)
|
||||
|
|
@ -341,7 +337,7 @@ class DestinationTest(unittest.TestCase):
|
|||
self.assertEqual(path, outpath)
|
||||
|
||||
def test_sanitize_windows_replaces_trailing_space(self):
|
||||
p = util.sanitize_path('one/two /three', ntpath)
|
||||
p = util.sanitize_path(u'one/two /three', ntpath)
|
||||
self.assertFalse(' ' in p)
|
||||
|
||||
def test_component_sanitize_replaces_separators(self):
|
||||
|
|
@ -390,20 +386,20 @@ class DestinationTest(unittest.TestCase):
|
|||
self.assertEqual(p.rsplit(os.path.sep, 1)[1], 'something')
|
||||
|
||||
def test_sanitize_path_works_on_empty_string(self):
|
||||
p = util.sanitize_path('', posixpath)
|
||||
self.assertEqual(p, '')
|
||||
p = util.sanitize_path(u'', posixpath)
|
||||
self.assertEqual(p, u'')
|
||||
|
||||
def test_sanitize_with_custom_replace_overrides_built_in_sub(self):
|
||||
p = util.sanitize_path('a/.?/b', posixpath, [
|
||||
(re.compile(r'foo'), 'bar'),
|
||||
p = util.sanitize_path(u'a/.?/b', posixpath, [
|
||||
(re.compile(ur'foo'), u'bar'),
|
||||
])
|
||||
self.assertEqual(p, 'a/.?/b')
|
||||
self.assertEqual(p, u'a/.?/b')
|
||||
|
||||
def test_sanitize_with_custom_replace_adds_replacements(self):
|
||||
p = util.sanitize_path('foo/bar', posixpath, [
|
||||
(re.compile(r'foo'), 'bar'),
|
||||
p = util.sanitize_path(u'foo/bar', posixpath, [
|
||||
(re.compile(ur'foo'), u'bar'),
|
||||
])
|
||||
self.assertEqual(p, 'bar/bar')
|
||||
self.assertEqual(p, u'bar/bar')
|
||||
|
||||
def test_unicode_normalized_nfd_on_mac(self):
|
||||
instr = unicodedata.normalize('NFC', u'caf\xe9')
|
||||
|
|
@ -822,14 +818,14 @@ class PathStringTest(unittest.TestCase):
|
|||
self.assertEqual(path, alb.artpath)
|
||||
|
||||
def test_sanitize_path_with_special_chars(self):
|
||||
path = 'b\xe1r?'
|
||||
path = u'b\xe1r?'
|
||||
new_path = util.sanitize_path(path)
|
||||
self.assert_(new_path.startswith('b\xe1r'))
|
||||
self.assert_(new_path.startswith(u'b\xe1r'))
|
||||
|
||||
def test_sanitize_path_returns_bytestring(self):
|
||||
path = 'b\xe1r?'
|
||||
def test_sanitize_path_returns_unicode(self):
|
||||
path = u'b\xe1r?'
|
||||
new_path = util.sanitize_path(path)
|
||||
self.assert_(isinstance(new_path, str))
|
||||
self.assert_(isinstance(new_path, unicode))
|
||||
|
||||
def test_unicode_artpath_becomes_bytestring(self):
|
||||
alb = self.lib.add_album([self.i])
|
||||
|
|
|
|||
|
|
@ -532,7 +532,7 @@ class ConfigTest(unittest.TestCase):
|
|||
def test_replacements_parsed(self):
|
||||
def func(lib, config, opts, args):
|
||||
replacements = lib.replacements
|
||||
self.assertEqual(replacements, [(re.compile(r'[xy]'), 'z')])
|
||||
self.assertEqual(replacements, [(re.compile(ur'[xy]'), u'z')])
|
||||
self._run_main([], textwrap.dedent("""
|
||||
[beets]
|
||||
replace=[xy] z"""), func)
|
||||
|
|
@ -549,8 +549,8 @@ class ConfigTest(unittest.TestCase):
|
|||
def func(lib, config, opts, args):
|
||||
replacements = lib.replacements
|
||||
self.assertEqual(replacements, [
|
||||
(re.compile(r'[xy]'), 'z'),
|
||||
(re.compile(r'foo'), 'bar'),
|
||||
(re.compile(ur'[xy]'), u'z'),
|
||||
(re.compile(ur'foo'), u'bar'),
|
||||
])
|
||||
self._run_main([], textwrap.dedent("""
|
||||
[beets]
|
||||
|
|
|
|||
Loading…
Reference in a new issue