From 82d41446a22f7e55259aab469f9e7b99c50cc70f Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Sat, 1 Oct 2022 16:37:47 -0700 Subject: [PATCH 1/4] Use fsdecode for template substitution The idea in this PR is to converge on Python's `fsdecode` and `fsencode` for argument manipulation. This seems to match up with the Python standard library's assumptions: namely, on Windows, we use `fsdecode` to get back to Unicode strings: https://github.com/python/cpython/blob/54bbb5e3363ef3634f1fd7521ba3f3c42c81a331/Lib/subprocess.py#L561 So let's start by dropping this utility and going straight for `fsdecode` here to match. --- beets/util/__init__.py | 18 ------------------ beetsplug/convert.py | 6 +++--- beetsplug/duplicates.py | 5 +++-- 3 files changed, 6 insertions(+), 23 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 720ca311a..cb9c3f4ea 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -1092,21 +1092,3 @@ def lazy_property(func): return value return wrapper - - -def decode_commandline_path(path): - """Prepare a path for substitution into commandline template. - - On Python 3, we need to construct the subprocess commands to invoke as a - Unicode string. On Unix, this is a little unfortunate---the OS is - expecting bytes---so we use surrogate escaping and decode with the - argument encoding, which is the same encoding that will then be - *reversed* to recover the same bytes before invoking the OS. On - Windows, we want to preserve the Unicode filename "as is." - """ - # On Python 3, the template is a Unicode string, which only supports - # substitution of Unicode variables. - if platform.system() == 'Windows': - return path.decode(_fsencoding()) - else: - return path.decode(arg_encoding(), 'surrogateescape') diff --git a/beetsplug/convert.py b/beetsplug/convert.py index 95240dc39..17a18e358 100644 --- a/beetsplug/convert.py +++ b/beetsplug/convert.py @@ -14,7 +14,7 @@ """Converts tracks or albums to external directory """ -from beets.util import par_map, decode_commandline_path, arg_encoding +from beets.util import par_map, arg_encoding import os import threading @@ -214,8 +214,8 @@ class ConvertPlugin(BeetsPlugin): self._log.info('Encoding {0}', util.displayable_path(source)) command = command.decode(arg_encoding(), 'surrogateescape') - source = decode_commandline_path(source) - dest = decode_commandline_path(dest) + source = os.fsdecode(source) + dest = os.fsdecode(dest) # Substitute $source and $dest in the argument list. args = shlex.split(command) diff --git a/beetsplug/duplicates.py b/beetsplug/duplicates.py index fdd5c1750..f655c61f2 100644 --- a/beetsplug/duplicates.py +++ b/beetsplug/duplicates.py @@ -16,11 +16,12 @@ """ import shlex +import os from beets.plugins import BeetsPlugin from beets.ui import decargs, print_, Subcommand, UserError from beets.util import command_output, displayable_path, subprocess, \ - bytestring_path, MoveOperation, decode_commandline_path + bytestring_path, MoveOperation from beets.library import Item, Album @@ -200,7 +201,7 @@ class DuplicatesPlugin(BeetsPlugin): output as flexattr on a key that is the name of the program, and return the key, checksum tuple. """ - args = [p.format(file=decode_commandline_path(item.path)) + args = [p.format(file=os.fsdecode(item.path)) for p in shlex.split(prog)] key = args[0] checksum = getattr(item, key, False) From 086bab55b1b04eb091fad8194fd641df76bff47a Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Sat, 1 Oct 2022 16:42:43 -0700 Subject: [PATCH 2/4] Standardize on Python's fsencode for arguments This can apparently never be `None`, as of Python 3.2. --- beets/util/__init__.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index cb9c3f4ea..06e02ee08 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -17,7 +17,6 @@ import os import sys import errno -import locale import re import tempfile import shutil @@ -332,12 +331,7 @@ def arg_encoding(): """Get the encoding for command-line arguments (and other OS locale-sensitive strings). """ - try: - return locale.getdefaultlocale()[1] or 'utf-8' - except ValueError: - # Invalid locale environment variable setting. To avoid - # failing entirely for no good reason, assume UTF-8. - return 'utf-8' + return sys.getfilesystemencoding() def _fsencoding(): @@ -837,13 +831,14 @@ def cpu_count(): def convert_command_args(args): - """Convert command arguments to bytestrings on Python 2 and - surrogate-escaped strings on Python 3.""" + """Convert command arguments, which may either be `bytes` or `str` + objects, to uniformly surrogate-escaped strings. + """ assert isinstance(args, list) def convert(arg): if isinstance(arg, bytes): - arg = arg.decode(arg_encoding(), 'surrogateescape') + return os.fsdecode(arg) return arg return [convert(a) for a in args] From 8baf3e302d7b2b9a4dfc0806cc112bb0ddb768cd Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Sat, 1 Oct 2022 17:13:57 -0700 Subject: [PATCH 3/4] Skip an unhelpful test on Windows On Windows, converting command-line arguments (hopefully!!!) only needs to deal with valid strings from the OS. So it is not really relevant to test with non-UTF-8, non-surrogate bytes. --- test/test_util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_util.py b/test/test_util.py index fcaf9f5ce..14ac7f2b2 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -101,6 +101,7 @@ class UtilTest(unittest.TestCase): ]) self.assertEqual(p, 'foo/_/bar') + @unittest.skipIf(sys.platform == 'win32', 'win32') def test_convert_command_args_keeps_undecodeable_bytes(self): arg = b'\x82' # non-ascii bytes cmd_args = util.convert_command_args([arg]) From 1a73a4550ae721391a55755ea9edf58f51c76de6 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Sat, 1 Oct 2022 17:30:53 -0700 Subject: [PATCH 4/4] Changelog for #4507 --- docs/changelog.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9df77d5e7..8ea1c0322 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -51,6 +51,10 @@ Bug fixes: * :doc:`/plugins/replaygain`: Avoid a crash when errors occur in the analysis backend. :bug:`4506` +* We now use Python's defaults for command-line argument encoding, which + should reduce the chance for errors and "file not found" failures when + invoking other command-line tools, especially on Windows. + :bug:`4507` * We now respect the Spotify API's rate limiting, which avoids crashing when the API reports code 429 (too many requests). :bug:`4370` * Fix implicit paths OR queries (e.g. ``beet list /path/ , /other-path/``)