fix: Sanitize log messages by removing control characters

Added regex pattern to strip C0/C1 control characters (excluding useful whitespace) from log messages before terminal output. This prevents disruptive/malicious control sequences from affecting terminal rendering.
2026-03-06 21:22:09 +01:00 · 2025-11-24 23:43:33 +05:00 · 2025-11-24 23:43:33 +05:00 · 67e668d81f
commit 67e668d81f
parent 6abb901b6b
3 changed files with 68 additions and 0 deletions
--- a/beets/logging.py
+++ b/beets/logging.py
@ -22,6 +22,7 @@ calls (`debug`, `info`, etc).

 from __future__ import annotations

+import re
 import threading
 from copy import copy
 from logging import (
@ -68,6 +69,15 @@ if TYPE_CHECKING:
    _ArgsType = Union[tuple[object, ...], Mapping[str, object]]


+# Regular expression to match:
+# - C0 control characters (0x00-0x1F) except useful whitespace (\t, \n, \r)
+# - DEL control character (0x7f)
+# - C1 control characters (0x80-0x9F)
+# Used to sanitize log messages that could disrupt terminal output
+_CONTROL_CHAR_REGEX = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f\x80-\x9f]")
+_UNICODE_REPLACEMENT_CHARACTER = "\ufffd"
+
+
 def _logsafe(val: T) -> str | T:
    """Coerce `bytes` to `str` to avoid crashes solely due to logging.

@ -82,6 +92,10 @@ def _logsafe(val: T) -> str | T:
        # type, and (b) warn the developer if they do this for other
        # bytestrings.
        return val.decode("utf-8", "replace")
+    if isinstance(val, str):
+        # Sanitize log messages by replacing control characters that can disrupt
+        # terminals.
+        return _CONTROL_CHAR_REGEX.sub(_UNICODE_REPLACEMENT_CHARACTER, val)

    # Other objects are used as-is so field access, etc., still works in
    # the format string. Relies on a working __str__ implementation.
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -54,6 +54,8 @@ Bug fixes:
  endpoints. Previously, due to single-quotes (ie. string literal) in the SQL
  query, the query eg. `GET /item/values/albumartist` would return the literal
  "albumartist" instead of a list of unique album artists.
+- Sanitize log messages by removing control characters preventing terminal
+  rendering issues.

 For plugin developers:

--- a/test/test_logging.py
+++ b/test/test_logging.py
@ -67,6 +67,58 @@ class TestStrFormatLogger:
        assert str(caplog.records[0].msg) == expected


+class TestLogSanitization:
+    """Log messages should have control characters removed from:
+    - String arguments
+    - Keyword argument values
+    - Bytes arguments (which get decoded first)
+    """
+
+    @pytest.mark.parametrize(
+        "msg, args, kwargs, expected",
+        [
+            # Valid UTF-8 bytes are decoded and preserved
+            (
+                "foo {} bar {bar}",
+                (b"oof \xc3\xa9",),
+                {"bar": b"baz \xc3\xa9"},
+                "foo oof é bar baz é",
+            ),
+            # Invalid UTF-8 bytes are decoded with replacement characters
+            (
+                "foo {} bar {bar}",
+                (b"oof \xff",),
+                {"bar": b"baz \xff"},
+                "foo oof <20> bar baz <20>",
+            ),
+            # Control characters should be removed
+            (
+                "foo {} bar {bar}",
+                ("oof \x9e",),
+                {"bar": "baz \x9e"},
+                "foo oof <20> bar baz <20>",
+            ),
+            # Whitespace control characters should be preserved
+            (
+                "foo {} bar {bar}",
+                ("foo\t\n",),
+                {"bar": "bar\r"},
+                "foo foo\t\n bar bar\r",
+            ),
+        ],
+    )
+    def test_sanitization(self, msg, args, kwargs, expected, caplog):
+        level = log.INFO
+        logger = blog.getLogger("test_logger")
+        logger.setLevel(level)
+
+        with caplog.at_level(level, logger="test_logger"):
+            logger.log(level, msg, *args, **kwargs)
+
+        assert caplog.records, "No log records were captured"
+        assert str(caplog.records[0].msg) == expected
+
+
 class DummyModule(ModuleType):
    class DummyPlugin(plugins.BeetsPlugin):
        def __init__(self):