diff --git a/beets/library.py b/beets/library.py index dcc7b3ba0..ba05b69c7 100644 --- a/beets/library.py +++ b/beets/library.py @@ -1126,6 +1126,13 @@ class Library(BaseLibrary): # Encode for the filesystem, dropping unencodable characters. if isinstance(subpath, unicode) and not fragment: encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + if encoding == 'mbcs': + # On Windows, a broken encoding known to Python as + # "MBCS" is used for the filesystem. However, we only + # use the Unicode API for Windows paths, so the encoding + # is actually immaterial so we can avoid dealing with + # this nastiness. We arbitrarily choose UTF-8. + encoding = 'utf8' subpath = subpath.encode(encoding, 'replace') # Preserve extension. diff --git a/beets/util/__init__.py b/beets/util/__init__.py index d93f9f8a6..ecd66a58f 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -284,12 +284,16 @@ def syspath(path, pathmod=None): return path if not isinstance(path, unicode): - # Try to decode with default encodings, but fall back to UTF8. - encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + # Beets currently represents Windows paths internally with UTF-8 + # arbitrarily. But earlier versions used MBCS because it is + # reported as the FS encoding by Windows. Try both. try: - path = path.decode(encoding, 'replace') + path = path.decode('utf8') except UnicodeError: - path = path.decode('utf8', 'replace') + # The encoding should always be MBCS, Windows' broken + # Unicode representation. + encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + path = path.decode(encoding, 'replace') # Add the magic prefix if it isn't already there if not path.startswith(u'\\\\?\\'): diff --git a/docs/changelog.rst b/docs/changelog.rst index 1eabcf315..42bcebe60 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -86,6 +86,11 @@ art for your music, enable this plugin after upgrading to beets 1.0b15. already at its destination. * Fix Unicode values in the ``replace`` config option (thanks to Jakob Borg). * Use a nicer error message when input is requested but stdin is closed. +* Fix errors on Windows for certain Unicode characters that can't be represented + in the MBCS encoding. This required a change to the way that paths are + represented in the database on Windows; if you find that beets' paths are out + of sync with your filesystem with this release, delete and recreate your + database with ``beet import -AWC /path/to/music``. .. _artist credits: http://wiki.musicbrainz.org/Artist_Credit diff --git a/test/test_db.py b/test/test_db.py index beafaa6d1..49a1dbef5 100644 --- a/test/test_db.py +++ b/test/test_db.py @@ -21,6 +21,7 @@ import posixpath import shutil import re import unicodedata +import sys import _common from _common import unittest @@ -428,6 +429,19 @@ class DestinationTest(unittest.TestCase): dest = self.lib.destination(self.i, platform='linux2', fragment=True) self.assertEqual(dest, unicodedata.normalize('NFC', instr)) + def test_non_mbcs_characters_on_windows(self): + oldfunc = sys.getfilesystemencoding + sys.getfilesystemencoding = lambda: 'mbcs' + try: + self.i.title = u'h\u0259d' + self.lib.path_formats = [('default', '$title')] + p = self.lib.destination(self.i) + self.assertFalse('?' in p) + # We use UTF-8 to encode Windows paths now. + self.assertTrue(u'h\u0259d'.encode('utf8') in p) + finally: + sys.getfilesystemencoding = oldfunc + class PathFormattingMixin(object): """Utilities for testing path formatting.""" def _setf(self, fmt):