Merge pull request #4507 from beetbox/arg-encoding-experiment

Use Python's standard filesystem encoding for command-line arguments
This commit is contained in:
Adrian Sampson 2022-10-03 10:44:01 -07:00 committed by GitHub
commit 4a9e7c1d33
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 16 additions and 33 deletions

View file

@ -17,7 +17,6 @@
import os
import sys
import errno
import locale
import re
import tempfile
import shutil
@ -332,12 +331,7 @@ def arg_encoding():
"""Get the encoding for command-line arguments (and other OS
locale-sensitive strings).
"""
try:
return locale.getdefaultlocale()[1] or 'utf-8'
except ValueError:
# Invalid locale environment variable setting. To avoid
# failing entirely for no good reason, assume UTF-8.
return 'utf-8'
return sys.getfilesystemencoding()
def _fsencoding():
@ -837,13 +831,14 @@ def cpu_count():
def convert_command_args(args):
"""Convert command arguments to bytestrings on Python 2 and
surrogate-escaped strings on Python 3."""
"""Convert command arguments, which may either be `bytes` or `str`
objects, to uniformly surrogate-escaped strings.
"""
assert isinstance(args, list)
def convert(arg):
if isinstance(arg, bytes):
arg = arg.decode(arg_encoding(), 'surrogateescape')
return os.fsdecode(arg)
return arg
return [convert(a) for a in args]
@ -1092,21 +1087,3 @@ def lazy_property(func):
return value
return wrapper
def decode_commandline_path(path):
"""Prepare a path for substitution into commandline template.
On Python 3, we need to construct the subprocess commands to invoke as a
Unicode string. On Unix, this is a little unfortunate---the OS is
expecting bytes---so we use surrogate escaping and decode with the
argument encoding, which is the same encoding that will then be
*reversed* to recover the same bytes before invoking the OS. On
Windows, we want to preserve the Unicode filename "as is."
"""
# On Python 3, the template is a Unicode string, which only supports
# substitution of Unicode variables.
if platform.system() == 'Windows':
return path.decode(_fsencoding())
else:
return path.decode(arg_encoding(), 'surrogateescape')

View file

@ -14,7 +14,7 @@
"""Converts tracks or albums to external directory
"""
from beets.util import par_map, decode_commandline_path, arg_encoding
from beets.util import par_map, arg_encoding
import os
import threading
@ -214,8 +214,8 @@ class ConvertPlugin(BeetsPlugin):
self._log.info('Encoding {0}', util.displayable_path(source))
command = command.decode(arg_encoding(), 'surrogateescape')
source = decode_commandline_path(source)
dest = decode_commandline_path(dest)
source = os.fsdecode(source)
dest = os.fsdecode(dest)
# Substitute $source and $dest in the argument list.
args = shlex.split(command)

View file

@ -16,11 +16,12 @@
"""
import shlex
import os
from beets.plugins import BeetsPlugin
from beets.ui import decargs, print_, Subcommand, UserError
from beets.util import command_output, displayable_path, subprocess, \
bytestring_path, MoveOperation, decode_commandline_path
bytestring_path, MoveOperation
from beets.library import Item, Album
@ -200,7 +201,7 @@ class DuplicatesPlugin(BeetsPlugin):
output as flexattr on a key that is the name of the program, and
return the key, checksum tuple.
"""
args = [p.format(file=decode_commandline_path(item.path))
args = [p.format(file=os.fsdecode(item.path))
for p in shlex.split(prog)]
key = args[0]
checksum = getattr(item, key, False)

View file

@ -51,6 +51,10 @@ Bug fixes:
* :doc:`/plugins/replaygain`: Avoid a crash when errors occur in the analysis
backend.
:bug:`4506`
* We now use Python's defaults for command-line argument encoding, which
should reduce the chance for errors and "file not found" failures when
invoking other command-line tools, especially on Windows.
:bug:`4507`
* We now respect the Spotify API's rate limiting, which avoids crashing when the API reports code 429 (too many requests).
:bug:`4370`
* Fix implicit paths OR queries (e.g. ``beet list /path/ , /other-path/``)

View file

@ -101,6 +101,7 @@ class UtilTest(unittest.TestCase):
])
self.assertEqual(p, 'foo/_/bar')
@unittest.skipIf(sys.platform == 'win32', 'win32')
def test_convert_command_args_keeps_undecodeable_bytes(self):
arg = b'\x82' # non-ascii bytes
cmd_args = util.convert_command_args([arg])