Rewrite tests, add cached_property decorators, add seperator feature

This commit is contained in:
Henry 2025-11-22 00:16:33 -08:00
parent df1ef40790
commit 83c16cbb5d
4 changed files with 432 additions and 327 deletions

View file

@ -17,7 +17,8 @@ Title case logic is derived from the python-titlecase library.
Provides a template function and a tag modification function."""
import re
from typing import Optional
from functools import cached_property
from typing import TypedDict
from titlecase import titlecase
@ -31,26 +32,22 @@ __author__ = "henryoberholtzer@gmail.com"
__version__ = "1.0"
class TitlecasePlugin(BeetsPlugin):
preserve: dict[str, str] = {}
preserve_phrases: dict[str, re.Pattern[str]] = {}
force_lowercase: bool = True
fields_to_process: set[str] = set()
the_artist: bool = True
the_artist_regexp = re.compile(r"\bthe\b")
class PreservedText(TypedDict):
words: dict[str, str]
phrases: dict[str, re.Pattern[str]]
class TitlecasePlugin(BeetsPlugin):
def __init__(self) -> None:
super().__init__()
# Register template function
self.template_funcs["titlecase"] = self.titlecase # type: ignore
self.config.add(
{
"auto": True,
"preserve": [],
"fields": [],
"replace": [],
"seperators": [],
"force_lowercase": False,
"small_first_last": True,
"the_artist": True,
@ -63,6 +60,7 @@ class TitlecasePlugin(BeetsPlugin):
preserve - Provide a list of strings with specific case requirements.
fields - Fields to apply titlecase to.
replace - List of pairs, first is the target, second is the replacement
seperators - Other characters to treat like periods.
force_lowercase - Lowercases the string before titlecasing.
small_first_last - If small characters should be cased at the start of strings.
the_artist - If the plugin infers the field to be an artist field
@ -71,6 +69,8 @@ class TitlecasePlugin(BeetsPlugin):
that start with 'The', like 'The Who' or 'The Talking Heads' when
they are not at the start of a string. Superceded by preserved phrases.
"""
# Register template function
self.template_funcs["titlecase"] = self.titlecase # type: ignore
# Register UI subcommands
self._command = ui.Subcommand(
@ -78,8 +78,7 @@ class TitlecasePlugin(BeetsPlugin):
help="Apply titlecasing to metadata specified in config.",
)
self._get_config()
if self.config["auto"]:
if self.config["auto"].get(bool):
if self.config["after_choice"].get(bool):
self.import_stages = [self.imported]
else:
@ -90,37 +89,56 @@ class TitlecasePlugin(BeetsPlugin):
"albuminfo_received", self.received_info_handler
)
def _get_config(self):
self.force_lowercase = self.config["force_lowercase"].get(bool)
self.replace = self.config["replace"].as_pairs()
self.the_artist = self.config["the_artist"].get(bool)
self._preserve_words(self.config["preserve"].as_str_seq())
self._initialize_fields(
self.config["fields"].as_str_seq(),
)
@cached_property
def force_lowercase(self) -> bool:
return self.config["force_lowercase"].get(bool)
def _initialize_fields(self, fields: list[str]) -> None:
"""Creates the set for fields to process in tagging."""
if fields:
self.fields_to_process = set(fields)
self._log.debug(
f"set fields to process: {', '.join(self.fields_to_process)}"
)
else:
self._log.debug("no fields specified!")
@cached_property
def replace(self) -> list[tuple[str, str]]:
return self.config["replace"].as_pairs()
def _preserve_words(self, preserve: list[str]) -> None:
for word in preserve:
if " " in word:
self.preserve_phrases[word] = re.compile(
rf"\b{re.escape(word)}\b", re.IGNORECASE
@cached_property
def the_artist(self) -> bool:
return self.config["the_artist"].get(bool)
@cached_property
def fields_to_process(self) -> set[str]:
fields = set(self.config["fields"].as_str_seq())
self._log.debug(f"fields: {', '.join(fields)}")
return fields
@cached_property
def preserve(self) -> PreservedText:
strings = self.config["preserve"].as_str_seq()
preserved: PreservedText = {"words": {}, "phrases": {}}
for s in strings:
if " " in s:
preserved["phrases"][s] = re.compile(
rf"\b{re.escape(s)}\b", re.IGNORECASE
)
else:
self.preserve[word.upper()] = word
preserved["words"][s.upper()] = s
return preserved
def _preserved(self, word, **kwargs) -> Optional[str]:
@cached_property
def seperators(self) -> re.Pattern[str] | None:
if seperators := "".join(
dict.fromkeys(self.config["seperators"].as_str_seq())
):
return re.compile(rf"(.*?[{re.escape(seperators)}]+)(\s*)(?=.)")
return None
@cached_property
def small_first_last(self) -> bool:
return self.config["small_first_last"].get(bool)
@cached_property
def the_artist_regexp(self) -> re.Pattern[str]:
return re.compile(r"\bthe\b")
def titlecase_callback(self, word, **kwargs) -> str | None:
"""Callback function for words to preserve case of."""
if preserved_word := self.preserve.get(word.upper(), ""):
if preserved_word := self.preserve["words"].get(word.upper(), ""):
return preserved_word
return None
@ -146,7 +164,7 @@ class TitlecasePlugin(BeetsPlugin):
self._command.func = func
return [self._command]
def titlecase_fields(self, item: Item | Info):
def titlecase_fields(self, item: Item | Info) -> None:
"""Applies titlecase to fields, except
those excluded by the default exclusions and the
set exclude lists.
@ -178,6 +196,17 @@ class TitlecasePlugin(BeetsPlugin):
def titlecase(self, text: str, field: str = "") -> str:
"""Titlecase the given text."""
# Check we should split this into two substrings.
if self.seperators:
if len(splits := self.seperators.findall(text)):
print(splits)
split_cased = "".join(
[self.titlecase(s[0], field) + s[1] for s in splits]
)
# Add on the remaining portion
return split_cased + self.titlecase(
text[len(split_cased) :], field
)
# Any necessary replacements go first, mainly punctuation.
titlecased = text.lower() if self.force_lowercase else text
for pair in self.replace:
@ -186,14 +215,14 @@ class TitlecasePlugin(BeetsPlugin):
# General titlecase operation
titlecased = titlecase(
titlecased,
small_first_last=self.config["small_first_last"],
callback=self._preserved,
small_first_last=self.small_first_last,
callback=self.titlecase_callback,
)
# Apply "The Artist" feature
if self.the_artist and "artist" in field:
titlecased = self.the_artist_regexp.sub("The", titlecased)
# More complicated phrase replacements.
for phrase, regexp in self.preserve_phrases.items():
for phrase, regexp in self.preserve["phrases"].items():
titlecased = regexp.sub(phrase, titlecased)
return titlecased

View file

@ -26,6 +26,8 @@ New features:
- :doc:`plugins/mbpseudo`: Add a new `mbpseudo` plugin to proactively receive
MusicBrainz pseudo-releases as recommendations during import.
- Added support for Python 3.13.
- :doc:`plugins/titlecase`: Add the `titlecase` plugin to allow users to
resolve differences in metadata source styles.
Bug fixes:

View file

@ -54,9 +54,10 @@ Default
titlecase:
auto: yes
fields:
preserve:
replace:
fields: []
preserve: []
replace: []
seperators: []
force_lowercase: no
small_first_last: yes
the_artist: yes
@ -68,41 +69,62 @@ Default
Whether to automatically apply titlecase to new imports.
.. conf:: fields
:default: []
A list of fields to apply the titlecase logic to. You must specify the fields
you want to have modified in order for titlecase to apply changes to metadata.
A list of fields to apply the titlecase logic to. You must specify the fields
you want to have modified in order for titlecase to apply changes to metadata.
A good starting point is below, which will titlecase artists, album and track titles.
A good starting point is below, which will titlecase album titles, track titles, and all artist fields.
.. code-block:: yaml
fields:
- album
- albumartist
- albumartist_credit
- albumartist_sort
- albumartists
- albumartists_credit
- albumartists_sort
- artist
- artist_credit
- artist_sort
- artists
- artists_credit
- artists_sort
- title
titlecase:
fields:
- album
- title
- albumartist
- albumartist_credit
- albumartist_sort
- albumartists
- albumartists_credit
- albumartists_sort
- artist
- artist_credit
- artist_sort
- artists
- artists_credit
- artists_sort
.. conf:: preserve
:default: []
List of words and phrases to preserve the case of. Without specifying ``DJ`` on
the list, titlecase will format it as ``Dj``, or specify ``The Beatles`` to make sure
``With The Beatles`` is not capitalized as ``With the Beatles``.
List of words and phrases to preserve the case of. Without specifying ``DJ`` on
the list, titlecase will format it as ``Dj``, or specify ``The Beatles`` to make sure
``With The Beatles`` is not capitalized as ``With the Beatles``.
.. conf:: replace
:default: []
The replace function takes place before any titlecasing occurs, and is intended to
help normalize differences in puncuation styles. It accepts a list of tuples, with
the first being the target, and the second being the replacement
The replace function takes place before any titlecasing occurs, and is intended to
help normalize differences in puncuation styles. It accepts a list of tuples, with
the first being the target, and the second being the replacement.
An example configuration that enforces one style of quotation mark is below.
.. code-block:: yaml
titlecase:
replace:
- "": "'"
- "": "'"
- "“": '"'
- "”": '"'
.. conf:: seperators
:default: []
A list of characters to treat as markers of new sentences. Helpful for split titles
that might otherwise have a lowercase letter at the start of the second string.
.. conf:: force_lowercase
:default: no
@ -111,23 +133,26 @@ Default
problems with all caps acronyms titlecase would otherwise recognize.
.. conf:: small_first_last
:default: yes
An option from the base titlecase library. Controls capitalizing small words at the start
of a sentence. With this turned off ``a`` and similar words will not be capitalized
under any circumstance.
An option from the base titlecase library. Controls capitalizing small words at the start
of a sentence. With this turned off ``a`` and similar words will not be capitalized
under any circumstance.
.. conf:: the_artist
:default: yes
If a field name contains ``artist``, then any lowercase ``the`` will be
capitalized. Useful for bands with `The` as part of the proper name,
like ``Amyl and The Sniffers``.
If a field name contains ``artist``, then any lowercase ``the`` will be
capitalized. Useful for bands with `The` as part of the proper name,
like ``Amyl and The Sniffers``.
.. conf:: after_choice
:default: no
By default, titlecase runs on the candidates that are received, adjusting them before
you make your selection and creating different weight calculations. If you'd rather
see the data as recieved from the database, set this to true to run after you make
your tag choice.
By default, titlecase runs on the candidates that are received, adjusting them before
you make your selection and creating different weight calculations. If you'd rather
see the data as recieved from the database, set this to true to run after you make
your tag choice.
Dangerous Fields
~~~~~~~~~~~~~~~~

View file

@ -14,96 +14,12 @@
"""Tests for the 'titlecase' plugin"""
import pytest
from beets.autotag.hooks import AlbumInfo, TrackInfo
from beets.library import Item
from beets.test.helper import PluginMixin
from beets.test.helper import PluginTestCase
from beetsplug.titlecase import TitlecasePlugin
@pytest.mark.parametrize(
"given, expected",
[
("a", "A"),
("PENDULUM", "Pendulum"),
("Aaron-carl", "Aaron-Carl"),
("LTJ bukem", "LTJ Bukem"),
(
"Freaky chakra Vs. Single Cell orchestra",
"Freaky Chakra vs. Single Cell Orchestra",
),
("(original mix)", "(Original Mix)"),
("ALL CAPS TITLE", "All Caps Title"),
],
)
def test_basic_titlecase(given, expected):
"""Assert that general behavior is as expected."""
assert TitlecasePlugin().titlecase(given) == expected
to_preserve = [
"easyFun",
"A.D.O.R",
"D'Angelo",
"ABBA",
"LaTeX",
"O.R.B",
"PinkPantheress",
]
@pytest.mark.parametrize("name", to_preserve)
def test_preserved_words(name):
"""Test using given strings to preserve case"""
t = TitlecasePlugin()
t._preserve_words(to_preserve)
assert t.titlecase(name.lower()) == name
assert t.titlecase(name.upper()) == name
def phrases_with_preserved_strings(phrases: list[str]) -> list[tuple[str, str]]:
def template(x):
return f"Example Phrase: Or {x} in Context!"
return [(template(p.lower()), template(p)) for p in phrases]
@pytest.mark.parametrize(
"given, expected", phrases_with_preserved_strings(to_preserve)
)
def test_preserved_phrases(given, expected):
t = TitlecasePlugin()
t._preserve_words(to_preserve)
assert t.titlecase(given.lower()) == expected
item_test_cases = [
(
{
"preserve": ["D'Angelo"],
"replace": [("", "'")],
"fields": ["artist", "albumartist", "mb_albumid"],
"force_lowercase": False,
"small_first_last": True,
},
Item(
artist="dangelo and the vanguard",
mb_albumid="ab140e13-7b36-402a-a528-b69e3dee38a8",
albumartist="dangelo",
format="CD",
album="the black messiah",
title="Till It's Done (Tutu)",
),
Item(
artist="D'Angelo and The Vanguard",
mb_albumid="Ab140e13-7b36-402a-A528-B69e3dee38a8",
albumartist="D'Angelo",
format="CD",
album="the black messiah",
title="Till It's Done (Tutu)",
),
),
titlecase_fields_testcases = [
(
{
"fields": [
@ -115,7 +31,6 @@ item_test_cases = [
"year",
],
"force_lowercase": True,
"small_first_last": True,
},
Item(
artist="OPHIDIAN",
@ -134,179 +49,291 @@ item_test_cases = [
title="Khameleon",
),
),
(
{
"the_artist": True,
"preserve": ["PANTHER"],
"fields": ["artist", "artists", "discogs_artistid"],
"force_lowercase": False,
"small_first_last": True,
},
Item(
artist="pinkpantheress",
artists=["pinkpantheress", "artist_two"],
artists_ids=["aBcDeF32", "aBcDeF12"],
discogs_artistid=21,
),
Item(
artist="Pinkpantheress",
artists=["Pinkpantheress", "Artist_Two"],
artists_ids=["aBcDeF32", "aBcDeF12"],
discogs_artistid=21,
),
),
(
{
"the_artist": True,
"preserve": ["A Day in the Park"],
"fields": [
"artists",
"artist",
"artists_sorttitle",
"artists_ids",
],
},
Item(
artists_sort=["b-52s, the"],
artist="a day in the park",
artists=[
"vinylgroover & the red head",
"a day in the park",
"amyl and the sniffers",
],
artists_ids=["aBcDeF32", "aBcDeF12"],
),
Item(
artists_sort=["B-52s, The"],
artist="A Day in the Park",
artists=[
"Vinylgroover & The Red Head",
"A Day in The Park",
"Amyl and The Sniffers",
],
artists_ids=["ABcDeF32", "ABcDeF12"],
),
),
(
{
"the_artist": False,
"preserve": ["A Day in the Park"],
"fields": [
"artists",
"artist",
"artists_sorttitle",
"artists_ids",
],
},
Item(
artists_sort=["b-52s, the"],
artist="a day in the park",
artists=[
"vinylgroover & the red head",
"a day in the park",
"amyl and the sniffers",
],
artists_ids=["aBcDeF32", "aBcDeF12"],
),
Item(
artists_sort=["B-52s, The"],
artist="A Day in the Park",
artists=[
"Vinylgroover & the Red Head",
"A Day in the Park",
"Amyl and the Sniffers",
],
artists_ids=["ABcDeF32", "ABcDeF12"],
),
),
]
info_test_cases = [
(
TrackInfo(
album="test album",
artist_credit="test artist credit",
artists=["artist one", "artist two"],
),
TrackInfo(
album="Test Album",
artist_credit="Test Artist Credit",
artists=["Artist One", "Artist Two"],
),
),
(
AlbumInfo(
tracks=[
class TestTitlecasePlugin(PluginTestCase):
plugin = "titlecase"
preload_plugin = False
def test_basic_titlecase(self):
"""Check that default behavior is as expected."""
testcases = [
("a", "A"),
("PENDULUM", "Pendulum"),
("Aaron-carl", "Aaron-Carl"),
("LTJ bukem", "LTJ Bukem"),
("(original mix)", "(Original Mix)"),
("ALL CAPS TITLE", "All Caps Title"),
]
for testcase in testcases:
given, expected = testcase
assert TitlecasePlugin().titlecase(given) == expected
def test_small_first_last(self):
"""Check the behavior for supporting small first last"""
testcases = [
(True, "In a Silent Way", "In a Silent Way"),
(False, "In a Silent Way", "in a Silent Way"),
]
for testcase in testcases:
sfl, given, expected = testcase
cfg = {"small_first_last": sfl}
with self.configure_plugin(cfg):
assert TitlecasePlugin().titlecase(given) == expected
def test_preserve(self):
"""Test using given strings to preserve case"""
preserve_list = [
"easyFun",
"A.D.O.R",
"D'Angelo",
"ABBA",
"LaTeX",
"O.R.B",
"PinkPantheress",
"THE PSYCHIC ED RUSH",
"LTJ Bukem",
]
for word in preserve_list:
with self.configure_plugin({"preserve": preserve_list}):
assert TitlecasePlugin().titlecase(word.upper()) == word
assert TitlecasePlugin().titlecase(word.lower()) == word
def test_seperators(self):
testcases = [
([], "it / a / in / of / to / the", "It / a / in / of / to / The"),
(["/"], "it / the test", "It / The Test"),
(
["/"],
"it / a / in / of / to / the",
"It / A / In / Of / To / The",
),
(["/"], "//it/a/in/of/to/the", "//It/A/In/Of/To/The"),
(
["/", ";", "|"],
"it ; a / in | of / to | the",
"It ; A / In | Of / To | The",
),
]
for testcase in testcases:
seperators, given, expected = testcase
with self.configure_plugin({"seperators": seperators}):
assert TitlecasePlugin().titlecase(given) == expected
def test_received_info_handler(self):
testcases = [
(
TrackInfo(
album="test album",
artist_credit="test artist credit",
artists=["artist one", "artist two"],
)
],
album="test album",
artist_credit="test artist credit",
artists=["artist one", "artist two"],
),
AlbumInfo(
tracks=[
),
TrackInfo(
album="Test Album",
artist_credit="Test Artist Credit",
artists=["Artist One", "Artist Two"],
)
],
album="Test Album",
artist_credit="Test Artist Credit",
artists=["Artist One", "Artist Two"],
),
),
]
),
),
(
AlbumInfo(
tracks=[
TrackInfo(
album="test album",
artist_credit="test artist credit",
artists=["artist one", "artist two"],
)
],
album="test album",
artist_credit="test artist credit",
artists=["artist one", "artist two"],
),
AlbumInfo(
tracks=[
TrackInfo(
album="Test Album",
artist_credit="Test Artist Credit",
artists=["Artist One", "Artist Two"],
)
],
album="Test Album",
artist_credit="Test Artist Credit",
artists=["Artist One", "Artist Two"],
),
),
]
cfg = {"fields": ["album", "artist_credit", "artists"]}
for testcase in testcases:
given, expected = testcase
with self.configure_plugin(cfg):
TitlecasePlugin().received_info_handler(given)
assert given == expected
def test_titlecase_fields(self):
testcases = [
# Test with preserve, replace, and mb_albumid
# Test with the_artist
(
{
"preserve": ["D'Angelo"],
"replace": [("", "'")],
"fields": ["artist", "albumartist", "mb_albumid"],
},
Item(
artist="dangelo and the vanguard",
mb_albumid="ab140e13-7b36-402a-a528-b69e3dee38a8",
albumartist="dangelo",
format="CD",
album="the black messiah",
title="Till It's Done (Tutu)",
),
Item(
artist="D'Angelo and The Vanguard",
mb_albumid="Ab140e13-7b36-402a-A528-B69e3dee38a8",
albumartist="D'Angelo",
format="CD",
album="the black messiah",
title="Till It's Done (Tutu)",
),
),
# Test with force_lowercase, preserve, and an incorrect field
(
{
"force_lowercase": True,
"fields": [
"artist",
"albumartist",
"format",
"title",
"year",
"label",
"format",
"INCORRECT_FIELD",
],
"preserve": ["CD"],
},
Item(
artist="OPHIDIAN",
albumartist="OphiDIAN",
format="cd",
year=2003,
album="BLACKBOX",
title="KhAmElEoN",
label="enzyme records",
),
Item(
artist="Ophidian",
albumartist="Ophidian",
format="CD",
year=2003,
album="Blackbox",
title="Khameleon",
label="Enzyme Records",
),
),
# Test with no changes
(
{
"fields": [
"artist",
"artists",
"albumartist",
"format",
"title",
"year",
"label",
"format",
"INCORRECT_FIELD",
],
"preserve": ["CD"],
},
Item(
artist="Ophidian",
artists=["Ophidian"],
albumartist="Ophidian",
format="CD",
year=2003,
album="Blackbox",
title="Khameleon",
label="Enzyme Records",
),
Item(
artist="Ophidian",
artists=["Ophidian"],
albumartist="Ophidian",
format="CD",
year=2003,
album="Blackbox",
title="Khameleon",
label="Enzyme Records",
),
),
# Test with the_artist disabled
(
{
"the_artist": False,
"fields": [
"artist",
"artists_sort",
],
},
Item(
artists_sort=["b-52s, the"],
artist="a day in the park",
),
Item(
artists_sort=["B-52s, The"],
artist="A Day in the Park",
),
),
# Test to make sure preserve and the_artist
# dont target the middle of sentences
# show that The artist applies to any field
# with artist mentioned
(
{
"preserve": ["PANTHER"],
"fields": ["artist", "artists", "artists_ids"],
},
Item(
artist="pinkpantheress",
artists=["pinkpantheress", "artist_two"],
artists_ids=["the the", "the the"],
),
Item(
artist="Pinkpantheress",
artists=["Pinkpantheress", "Artist_two"],
artists_ids=["The The", "The The"],
),
),
]
for testcase in testcases:
cfg, given, expected = testcase
with self.configure_plugin(cfg):
TitlecasePlugin().titlecase_fields(given)
assert given.artist == expected.artist
assert given.artists == expected.artists
assert given.artists_sort == expected.artists_sort
assert given.albumartist == expected.albumartist
assert given.artists_ids == expected.artists_ids
assert given.format == expected.format
assert given.year == expected.year
assert given.title == expected.title
assert given.label == expected.label
class TitlecasePluginMethodTests(PluginMixin):
plugin = "titlecase"
preload_plugin = False
def test_small_first_last(self):
with self.configure_plugin({"small_first_last": False}):
assert (
TitlecasePlugin().titlecase("A Simple Trial")
== "a Simple Trial"
)
with self.configure_plugin({"small_first_last": True}):
assert (
TitlecasePlugin().titlecase("A simple Trial")
== "A Simple Trial"
)
def test_field_list(self):
fields = ["album", "albumartist"]
with self.configure_plugin({"fields": fields}):
t = TitlecasePlugin()
for field in fields:
assert field in t.fields_to_process
@pytest.mark.parametrize("given, expected", info_test_cases)
def test_received_info_handler(self, given, expected):
with self.configure_plugin(
{"fields": ["album", "artist_credit", "artists"]}
):
TitlecasePlugin().received_info_handler(given)
assert given == expected
@pytest.mark.parametrize("config, given, expected", item_test_cases)
class TitlecasePluginTest(PluginMixin):
plugin = "titlecase"
preload_plugin = False
def test_titlecase_fields(self, config, given, expected):
with self.configure_plugin(config):
TitlecasePlugin.titlecase_fields(given)
assert given == expected
def test_cli(self, config, given, expected):
with self.configure_plugin(config):
def test_cli_write(self):
given = Item(
album="retrodelica 2: back 2 the future",
artist="blue planet corporation",
title="generator",
)
expected = Item(
album="Retrodelica 2: Back 2 the Future",
artist="Blue Planet Corporation",
title="Generator",
)
cfg = {"fields": ["album", "artist", "title"]}
with self.configure_plugin(cfg):
given.add(self.lib)
self.run_command("titlecase")
output = self.run_with_output("ls")
@ -315,3 +342,25 @@ class TitlecasePluginTest(PluginMixin):
== f"{expected.artist} - {expected.album} - {expected.title}\n"
)
self.run_command("remove", expected.artist, "-f")
def test_cli_no_write(self):
given = Item(
album="retrodelica 2: back 2 the future",
artist="blue planet corporation",
title="generator",
)
expected = Item(
album="retrodelica 2: back 2 the future",
artist="blue planet corporation",
title="generator",
)
cfg = {"fields": ["album", "artist", "title"]}
with self.configure_plugin(cfg):
given.add(self.lib)
self.run_command("-p", "titlecase")
output = self.run_with_output("ls")
assert (
output
== f"{expected.artist} - {expected.album} - {expected.title}\n"
)
self.run_command("remove", expected.artist, "-f")