From f94d2767f9919dd933da0ba870355babd29b371e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Fri, 6 Sep 2024 12:11:01 +0100
Subject: [PATCH] Use a single slug implementation

Tidy up 'Google.is_page_candidate' method and remove 'Google.sluggify'
method which was a duplicate of 'slug'.

Since 'GeniusFetchTest' only tested whether the artist name is cleaned
up (the rest of the functionality is patched), remove it and move its
test cases to the 'test_slug' test.
---
 beetsplug/lyrics.py         | 33 ++++++++-------------------------
 test/plugins/test_lyrics.py |  4 ----
 2 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 2ec362356..097110e13 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -23,7 +23,6 @@ import math
 import os.path
 import re
 import struct
-import unicodedata
 from contextlib import contextmanager, suppress
 from dataclasses import dataclass
 from functools import cached_property, partial, total_ordering
@@ -224,7 +223,7 @@ def search_pairs(item):
     return itertools.product(artists, multi_titles)
 
 
-def slug(text):
+def slug(text: str) -> str:
     """Make a URL-safe, human-readable version of the given text
 
     This will do the following:
@@ -234,10 +233,6 @@ def slug(text):
     3. strip whitespace
     4. replace other non-word characters with dashes
     5. strip extra dashes
-
-    This somewhat duplicates the :func:`Google.slugify` function but
-    slugify is not as generic as this one, which can be reused
-    elsewhere.
     """
     return re.sub(r"\W+", "-", unidecode(text).lower().strip()).strip("-")
 
@@ -745,19 +740,6 @@ class Google(SearchBackend):
             self.debug("Bad triggers detected: {}", bad_triggers_occ)
         return len(bad_triggers_occ) < 2
 
-    def slugify(self, text):
-        """Normalize a string and remove non-alphanumeric characters."""
-        text = re.sub(r"[-'_\s]", "_", text)
-        text = re.sub(r"_+", "_", text).strip("_")
-        pat = r"([^,\(]*)\((.*?)\)"  # Remove content within parentheses
-        text = re.sub(pat, r"\g<1>", text).strip()
-        try:
-            text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore")
-            text = str(re.sub(r"[-\s]+", " ", text.decode("utf-8")))
-        except UnicodeDecodeError:
-            self.debug("Failed to normalize '{}'", text)
-        return text
-
     BY_TRANS = ["by", "par", "de", "von"]
     LYRICS_TRANS = ["lyrics", "paroles", "letras", "liedtexte"]
 
@@ -767,23 +749,24 @@ class Google(SearchBackend):
         """Return True if the URL title makes it a good candidate to be a
         page that contains lyrics of title by artist.
         """
-        title_slug = self.slugify(title.lower())
-        url_title_slug = self.slugify(url_title.lower())
+        title_slug = slug(title)
+        url_title_slug = slug(url_title)
         if title_slug in url_title_slug:
             return True
 
-        artist = self.slugify(artist.lower())
+        artist = slug(artist)
         sitename = urlparse(url_link).netloc
 
         # or try extracting song title from URL title and check if
         # they are close enough
         tokens = (
-            [by + "_" + artist for by in self.BY_TRANS]
+            [by + "-" + artist for by in self.BY_TRANS]
             + [artist, sitename, sitename.replace("www.", "")]
             + self.LYRICS_TRANS
         )
-        tokens = [re.escape(t) for t in tokens]
-        song_title = re.sub("(%s)" % "|".join(tokens), "", url_title_slug)
+        song_title = re.sub(
+            "(%s)" % "|".join(tokens), "", url_title_slug
+        ).strip("-")
 
         return self.check_match(artist, title_slug, artist, song_title)
 
diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py
index 9e24d46c2..1d264338a 100644
--- a/test/plugins/test_lyrics.py
+++ b/test/plugins/test_lyrics.py
@@ -370,10 +370,6 @@ the following form.
     def test_bad_lyrics(self, backend, lyrics):
         assert not backend.is_lyrics(lyrics)
 
-    def test_slugify(self, backend):
-        text = "http://site.com/\xe7afe-au_lait(boisson)"
-        assert backend.slugify(text) == "http://site.com/cafe_au_lait"
-
 
 class TestGeniusLyrics(LyricsBackendTest):
     @pytest.fixture(scope="class")