Fix for Musixmatch multi-part lyrics

Sometimes Musixmatch lyrics come in 2 parts.
Also, sometime non-existent lyrics return some content
This commit is contained in:
Evgeniy Gurevich 2021-04-01 14:21:49 +08:00 committed by GitHub
parent eab4372a58
commit a8fe8ce9a7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -291,14 +291,25 @@ class MusiXmatch(Backend):
self._log.warning(u'we are blocked at MusixMatch: url %s failed'
% url)
return
html_part = html.split('<p class="mxm-lyrics__content')[-1]
lyrics = extract_text_between(html_part, '>', '</p>')
html_parts = html.split('<p class="mxm-lyrics__content')
# Sometimes lyrics come in 2 parts
if len(html_parts) > 1:
html_part1 = html_parts[-2]
lyrics_part1 = extract_text_between(html_part1, '>', '</p>')
else:
lyrics_part1 = ''
html_part2 = html_parts[-1]
lyrics_part2 = extract_text_between(html_part2, '>', '</p>')
lyrics = lyrics_part1 + '\n' + lyrics_part2
lyrics = lyrics.strip(',"').replace('\\n', '\n')
# another odd case: sometimes only that string remains, for
# missing songs. this seems to happen after being blocked
# above, when filling in the CAPTCHA.
if "Instant lyrics for all your music." in lyrics:
return
# sometimes there are non-existent lyrics with some content
if 'Think is wrong? You can always add the lyrics' in lyrics:
return
return lyrics