mirror of
https://github.com/beetbox/beets.git
synced 2026-01-07 16:34:45 +01:00
Google: prioritise Songlyrics and AZlyrics sources
This commit is contained in:
parent
70554640e5
commit
07d372c13d
1 changed files with 15 additions and 1 deletions
|
|
@ -28,7 +28,7 @@ from functools import cached_property, partial, total_ordering
|
|||
from html import unescape
|
||||
from http import HTTPStatus
|
||||
from typing import TYPE_CHECKING, ClassVar, Iterable, Iterator, NamedTuple
|
||||
from urllib.parse import quote, urlencode
|
||||
from urllib.parse import quote, urlencode, urlparse
|
||||
|
||||
import requests
|
||||
from unidecode import unidecode
|
||||
|
|
@ -497,6 +497,10 @@ class SearchResult(NamedTuple):
|
|||
title: str
|
||||
url: str
|
||||
|
||||
@property
|
||||
def source(self) -> str:
|
||||
return urlparse(self.url).netloc
|
||||
|
||||
|
||||
class SearchBackend(SoupMixin, Backend):
|
||||
REQUIRES_BS = True
|
||||
|
|
@ -656,6 +660,8 @@ class Google(SearchBackend):
|
|||
#: Split cleaned up URL title into artist and title parts.
|
||||
URL_TITLE_PARTS_RE = re.compile(r" +(?:[ :|-]+|par|by) +")
|
||||
|
||||
SOURCE_DIST_FACTOR = {"www.azlyrics.com": 0.5, "www.songlyrics.com": 0.6}
|
||||
|
||||
@classmethod
|
||||
def pre_process_html(cls, html: str) -> str:
|
||||
"""Pre-process the HTML content before scraping."""
|
||||
|
|
@ -724,6 +730,14 @@ class Google(SearchBackend):
|
|||
for item in data.get("items", []):
|
||||
yield self.make_search_result(artist, title, item)
|
||||
|
||||
def get_results(self, *args) -> Iterable[SearchResult]:
|
||||
"""Try results from preferred sources first."""
|
||||
for result in sorted(
|
||||
super().get_results(*args),
|
||||
key=lambda r: self.SOURCE_DIST_FACTOR.get(r.source, 1),
|
||||
):
|
||||
yield result
|
||||
|
||||
@classmethod
|
||||
def scrape(cls, html: str) -> str | None:
|
||||
# Get the longest text element (if any).
|
||||
|
|
|
|||
Loading…
Reference in a new issue