diff --git a/NEWS b/NEWS index 066168edb..19788abc5 100644 --- a/NEWS +++ b/NEWS @@ -35,6 +35,8 @@ Windows users can now just type "beet" at the prompt to run beets. * Fixed an occasional bug where Mutagen would complain that a tag was already present. +* Fixed some errors with filenames that have badly encoded special + characters. 1.0b3 ----- diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py index 0668fb1be..0cc21fc27 100644 --- a/beets/autotag/__init__.py +++ b/beets/autotag/__init__.py @@ -87,7 +87,16 @@ def _sorted_walk(path): dirs = [] files = [] for base in os.listdir(path): - base = library._unicode_path(base) + # While os.listdir() will try to give us unicode output (as + # we gave it unicode input), it may fail to decode some + # filenames. + try: + base = library._unicode_path(base) + except UnicodeError: + # Log and ignore undecodeable filenames. + log.error(u'invalid filename in %s' % path) + continue + cur = os.path.join(path, base) if os.path.isdir(cur): dirs.append(base) @@ -101,7 +110,6 @@ def _sorted_walk(path): # Recurse into directories. for base in dirs: - base = library._unicode_path(base) cur = os.path.join(path, base) # yield from _sorted_walk(cur) for res in _sorted_walk(cur): diff --git a/beets/library.py b/beets/library.py index fdc18bea8..a0040cbdc 100644 --- a/beets/library.py +++ b/beets/library.py @@ -164,7 +164,19 @@ def _unicode_path(path): """Ensures that a path string is in Unicode.""" if isinstance(path, unicode): return path - return path.decode(sys.getfilesystemencoding()) + encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + try: + out = path.decode(encoding) + except UnicodeError: + # This is of course extremely hacky, but I've received several + # reports of filesystems misrepresenting their encoding as + # UTF-8 and actually providing Latin-1 strings. This helps + # handle those cases. All this is the cost of dealing + # exclusively with Unicode pathnames internally (which + # simplifies their construction from metadata and storage in + # SQLite). + out = path.decode('latin1') + return out # Note: POSIX actually supports \ and : -- I just think they're # a pain. And ? has caused problems for some.