From 16f207e927094bc68647d05ddb5f1c609b132041 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Sat, 20 Oct 2012 23:49:43 -0700 Subject: [PATCH] make syspath/bytestring_path roundtrip on Windows This is an alternative to #58 that makes bytestring_path perform more like the inverse of syspath on Windows. This way, we can convert to syspath, operate on the path, and then bring back to internal representation without data loss. This involves looking for the magic prefix on the Unicode string and removing it before encoding to the internal (UTF-8) representation. --- beets/util/__init__.py | 20 ++++++++++++++++---- test/test_db.py | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 5e7cb8e38..5a36c31bf 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -24,6 +24,7 @@ from collections import defaultdict import traceback MAX_FILENAME_LENGTH = 200 +WINDOWS_MAGIC_PREFIX = u'\\\\?\\' class HumanReadableException(Exception): """An Exception that can include a human-readable error message to @@ -108,7 +109,9 @@ def normpath(path): """Provide the canonical form of the path suitable for storing in the database. """ - return os.path.normpath(os.path.abspath(os.path.expanduser(path))) + path = syspath(path) + path = os.path.normpath(os.path.abspath(os.path.expanduser(path))) + return bytestring_path(path) def ancestry(path, pathmod=None): """Return a list consisting of path's parent directory, its @@ -256,14 +259,23 @@ def _fsencoding(): encoding = 'utf8' return encoding -def bytestring_path(path): +def bytestring_path(path, pathmod=None): """Given a path, which is either a str or a unicode, returns a str path (ensuring that we never deal with Unicode pathnames). """ + pathmod = pathmod or os.path + windows = pathmod.__name__ == 'ntpath' + # Pass through bytestrings. if isinstance(path, str): return path + # On Windows, remove the magic prefix added by `syspath`. This makes + # ``bytestring_path(syspath(X)) == X``, i.e., we can safely + # round-trip through `syspath`. + if windows and path.startswith(WINDOWS_MAGIC_PREFIX): + path = path[len(WINDOWS_MAGIC_PREFIX):] + # Try to encode with default encodings, but fall back to UTF8. try: return path.encode(_fsencoding()) @@ -310,8 +322,8 @@ def syspath(path, pathmod=None): path = path.decode(encoding, 'replace') # Add the magic prefix if it isn't already there - if not path.startswith(u'\\\\?\\'): - path = u'\\\\?\\' + path + if not path.startswith(WINDOWS_MAGIC_PREFIX): + path = WINDOWS_MAGIC_PREFIX + path return path diff --git a/test/test_db.py b/test/test_db.py index fc597b6cd..5df34f45e 100644 --- a/test/test_db.py +++ b/test/test_db.py @@ -341,17 +341,6 @@ class DestinationTest(unittest.TestCase): ] self.assertEqual(self.lib.destination(self.i), np('one/three')) - def test_syspath_windows_format(self): - path = ntpath.join('a', 'b', 'c') - outpath = util.syspath(path, ntpath) - self.assertTrue(isinstance(outpath, unicode)) - self.assertTrue(outpath.startswith(u'\\\\?\\')) - - def test_syspath_posix_unchanged(self): - path = posixpath.join('a', 'b', 'c') - outpath = util.syspath(path, posixpath) - self.assertEqual(path, outpath) - def test_sanitize_windows_replaces_trailing_space(self): p = util.sanitize_path(u'one/two /three', ntpath) self.assertFalse(' ' in p) @@ -563,6 +552,36 @@ class DisambiguationTest(unittest.TestCase, PathFormattingMixin): self._setf(u'foo%aunique{albumartist album,albumtype}/$title') self._assert_dest('/base/foo [foo_bar]/the title', self.i1) +class PathConversionTest(unittest.TestCase): + def test_syspath_windows_format(self): + path = ntpath.join('a', 'b', 'c') + outpath = util.syspath(path, ntpath) + self.assertTrue(isinstance(outpath, unicode)) + self.assertTrue(outpath.startswith(u'\\\\?\\')) + + def test_syspath_posix_unchanged(self): + path = posixpath.join('a', 'b', 'c') + outpath = util.syspath(path, posixpath) + self.assertEqual(path, outpath) + + def _windows_bytestring_path(self, path): + old_gfse = sys.getfilesystemencoding + sys.getfilesystemencoding = lambda: 'mbcs' + try: + return util.bytestring_path(path, ntpath) + finally: + sys.getfilesystemencoding = old_gfse + + def test_bytestring_path_windows_encodes_utf8(self): + path = u'caf\xe9' + outpath = self._windows_bytestring_path(path) + self.assertEqual(path, outpath.decode('utf8')) + + def test_bytesting_path_windows_removes_magic_prefix(self): + path = u'\\\\?\\C:\\caf\xe9' + outpath = self._windows_bytestring_path(path) + self.assertEqual(outpath, u'C:\\caf\xe9'.encode('utf8')) + class PluginDestinationTest(unittest.TestCase): # Mock the plugins.template_values(item) function. def _template_values(self, item):