mirror of
https://github.com/beetbox/beets.git
synced 2025-12-06 08:39:17 +01:00
Sanitize log messages by removing control characters (#6199)
This pull request addresses an issue where control characters in log messages could halt beets execution entirely. The fix implements sanitization of log messages by removing C0 and C1 control characters before they reach the terminal.
This commit is contained in:
commit
ca7e959f5b
3 changed files with 68 additions and 0 deletions
|
|
@ -22,6 +22,7 @@ calls (`debug`, `info`, etc).
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import threading
|
||||
from copy import copy
|
||||
from logging import (
|
||||
|
|
@ -68,6 +69,15 @@ if TYPE_CHECKING:
|
|||
_ArgsType = Union[tuple[object, ...], Mapping[str, object]]
|
||||
|
||||
|
||||
# Regular expression to match:
|
||||
# - C0 control characters (0x00-0x1F) except useful whitespace (\t, \n, \r)
|
||||
# - DEL control character (0x7f)
|
||||
# - C1 control characters (0x80-0x9F)
|
||||
# Used to sanitize log messages that could disrupt terminal output
|
||||
_CONTROL_CHAR_REGEX = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f\x80-\x9f]")
|
||||
_UNICODE_REPLACEMENT_CHARACTER = "\ufffd"
|
||||
|
||||
|
||||
def _logsafe(val: T) -> str | T:
|
||||
"""Coerce `bytes` to `str` to avoid crashes solely due to logging.
|
||||
|
||||
|
|
@ -82,6 +92,10 @@ def _logsafe(val: T) -> str | T:
|
|||
# type, and (b) warn the developer if they do this for other
|
||||
# bytestrings.
|
||||
return val.decode("utf-8", "replace")
|
||||
if isinstance(val, str):
|
||||
# Sanitize log messages by replacing control characters that can disrupt
|
||||
# terminals.
|
||||
return _CONTROL_CHAR_REGEX.sub(_UNICODE_REPLACEMENT_CHARACTER, val)
|
||||
|
||||
# Other objects are used as-is so field access, etc., still works in
|
||||
# the format string. Relies on a working __str__ implementation.
|
||||
|
|
|
|||
|
|
@ -54,6 +54,8 @@ Bug fixes:
|
|||
endpoints. Previously, due to single-quotes (ie. string literal) in the SQL
|
||||
query, the query eg. `GET /item/values/albumartist` would return the literal
|
||||
"albumartist" instead of a list of unique album artists.
|
||||
- Sanitize log messages by removing control characters preventing terminal
|
||||
rendering issues.
|
||||
|
||||
For plugin developers:
|
||||
|
||||
|
|
|
|||
|
|
@ -67,6 +67,58 @@ class TestStrFormatLogger:
|
|||
assert str(caplog.records[0].msg) == expected
|
||||
|
||||
|
||||
class TestLogSanitization:
|
||||
"""Log messages should have control characters removed from:
|
||||
- String arguments
|
||||
- Keyword argument values
|
||||
- Bytes arguments (which get decoded first)
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"msg, args, kwargs, expected",
|
||||
[
|
||||
# Valid UTF-8 bytes are decoded and preserved
|
||||
(
|
||||
"foo {} bar {bar}",
|
||||
(b"oof \xc3\xa9",),
|
||||
{"bar": b"baz \xc3\xa9"},
|
||||
"foo oof é bar baz é",
|
||||
),
|
||||
# Invalid UTF-8 bytes are decoded with replacement characters
|
||||
(
|
||||
"foo {} bar {bar}",
|
||||
(b"oof \xff",),
|
||||
{"bar": b"baz \xff"},
|
||||
"foo oof <20> bar baz <20>",
|
||||
),
|
||||
# Control characters should be removed
|
||||
(
|
||||
"foo {} bar {bar}",
|
||||
("oof \x9e",),
|
||||
{"bar": "baz \x9e"},
|
||||
"foo oof <20> bar baz <20>",
|
||||
),
|
||||
# Whitespace control characters should be preserved
|
||||
(
|
||||
"foo {} bar {bar}",
|
||||
("foo\t\n",),
|
||||
{"bar": "bar\r"},
|
||||
"foo foo\t\n bar bar\r",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sanitization(self, msg, args, kwargs, expected, caplog):
|
||||
level = log.INFO
|
||||
logger = blog.getLogger("test_logger")
|
||||
logger.setLevel(level)
|
||||
|
||||
with caplog.at_level(level, logger="test_logger"):
|
||||
logger.log(level, msg, *args, **kwargs)
|
||||
|
||||
assert caplog.records, "No log records were captured"
|
||||
assert str(caplog.records[0].msg) == expected
|
||||
|
||||
|
||||
class DummyModule(ModuleType):
|
||||
class DummyPlugin(plugins.BeetsPlugin):
|
||||
def __init__(self):
|
||||
|
|
|
|||
Loading…
Reference in a new issue