mirror of
https://github.com/beetbox/beets.git
synced 2025-12-06 00:24:25 +01:00
fix: Sanitize log messages by removing control characters
Added regex pattern to strip C0/C1 control characters (excluding useful whitespace) from log messages before terminal output. This prevents disruptive/malicious control sequences from affecting terminal rendering.
This commit is contained in:
parent
6abb901b6b
commit
67e668d81f
3 changed files with 68 additions and 0 deletions
|
|
@ -22,6 +22,7 @@ calls (`debug`, `info`, etc).
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import threading
|
||||
from copy import copy
|
||||
from logging import (
|
||||
|
|
@ -68,6 +69,15 @@ if TYPE_CHECKING:
|
|||
_ArgsType = Union[tuple[object, ...], Mapping[str, object]]
|
||||
|
||||
|
||||
# Regular expression to match:
|
||||
# - C0 control characters (0x00-0x1F) except useful whitespace (\t, \n, \r)
|
||||
# - DEL control character (0x7f)
|
||||
# - C1 control characters (0x80-0x9F)
|
||||
# Used to sanitize log messages that could disrupt terminal output
|
||||
_CONTROL_CHAR_REGEX = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f\x80-\x9f]")
|
||||
_UNICODE_REPLACEMENT_CHARACTER = "\ufffd"
|
||||
|
||||
|
||||
def _logsafe(val: T) -> str | T:
|
||||
"""Coerce `bytes` to `str` to avoid crashes solely due to logging.
|
||||
|
||||
|
|
@ -82,6 +92,10 @@ def _logsafe(val: T) -> str | T:
|
|||
# type, and (b) warn the developer if they do this for other
|
||||
# bytestrings.
|
||||
return val.decode("utf-8", "replace")
|
||||
if isinstance(val, str):
|
||||
# Sanitize log messages by replacing control characters that can disrupt
|
||||
# terminals.
|
||||
return _CONTROL_CHAR_REGEX.sub(_UNICODE_REPLACEMENT_CHARACTER, val)
|
||||
|
||||
# Other objects are used as-is so field access, etc., still works in
|
||||
# the format string. Relies on a working __str__ implementation.
|
||||
|
|
|
|||
|
|
@ -54,6 +54,8 @@ Bug fixes:
|
|||
endpoints. Previously, due to single-quotes (ie. string literal) in the SQL
|
||||
query, the query eg. `GET /item/values/albumartist` would return the literal
|
||||
"albumartist" instead of a list of unique album artists.
|
||||
- Sanitize log messages by removing control characters preventing terminal
|
||||
rendering issues.
|
||||
|
||||
For plugin developers:
|
||||
|
||||
|
|
|
|||
|
|
@ -67,6 +67,58 @@ class TestStrFormatLogger:
|
|||
assert str(caplog.records[0].msg) == expected
|
||||
|
||||
|
||||
class TestLogSanitization:
|
||||
"""Log messages should have control characters removed from:
|
||||
- String arguments
|
||||
- Keyword argument values
|
||||
- Bytes arguments (which get decoded first)
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"msg, args, kwargs, expected",
|
||||
[
|
||||
# Valid UTF-8 bytes are decoded and preserved
|
||||
(
|
||||
"foo {} bar {bar}",
|
||||
(b"oof \xc3\xa9",),
|
||||
{"bar": b"baz \xc3\xa9"},
|
||||
"foo oof é bar baz é",
|
||||
),
|
||||
# Invalid UTF-8 bytes are decoded with replacement characters
|
||||
(
|
||||
"foo {} bar {bar}",
|
||||
(b"oof \xff",),
|
||||
{"bar": b"baz \xff"},
|
||||
"foo oof <20> bar baz <20>",
|
||||
),
|
||||
# Control characters should be removed
|
||||
(
|
||||
"foo {} bar {bar}",
|
||||
("oof \x9e",),
|
||||
{"bar": "baz \x9e"},
|
||||
"foo oof <20> bar baz <20>",
|
||||
),
|
||||
# Whitespace control characters should be preserved
|
||||
(
|
||||
"foo {} bar {bar}",
|
||||
("foo\t\n",),
|
||||
{"bar": "bar\r"},
|
||||
"foo foo\t\n bar bar\r",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sanitization(self, msg, args, kwargs, expected, caplog):
|
||||
level = log.INFO
|
||||
logger = blog.getLogger("test_logger")
|
||||
logger.setLevel(level)
|
||||
|
||||
with caplog.at_level(level, logger="test_logger"):
|
||||
logger.log(level, msg, *args, **kwargs)
|
||||
|
||||
assert caplog.records, "No log records were captured"
|
||||
assert str(caplog.records[0].msg) == expected
|
||||
|
||||
|
||||
class DummyModule(ModuleType):
|
||||
class DummyPlugin(plugins.BeetsPlugin):
|
||||
def __init__(self):
|
||||
|
|
|
|||
Loading…
Reference in a new issue