mirror of
https://github.com/beetbox/beets.git
synced 2025-12-28 11:32:30 +01:00
Merge pull request #3554 from thejli21/master
Fix #3535 - Genius Null Check
This commit is contained in:
commit
e0fc7b18da
4 changed files with 342 additions and 1 deletions
|
|
@ -373,7 +373,14 @@ class Genius(Backend):
|
|||
|
||||
# At least Genius is nice and has a tag called 'lyrics'!
|
||||
# Updated css where the lyrics are based in HTML.
|
||||
lyrics = html.find("div", class_="lyrics").get_text()
|
||||
lyrics_div = html.find("div", class_="lyrics")
|
||||
|
||||
# nullcheck
|
||||
if lyrics_div is None:
|
||||
self._log.debug(u'Genius lyrics for {0} not found',
|
||||
page_url)
|
||||
return None
|
||||
lyrics = lyrics_div.get_text()
|
||||
|
||||
return lyrics
|
||||
|
||||
|
|
|
|||
|
|
@ -178,6 +178,8 @@ Fixes:
|
|||
* Removed ``@classmethod`` decorator from dbcore.query.NoneQuery.match method
|
||||
failing with AttributeError when called. It is now an instance method.
|
||||
:bug:`3516` :bug:`3517`
|
||||
* :doc:`/plugins/lyrics`: Tolerate missing lyrics div in Genius scraper
|
||||
:bug:`3535`
|
||||
|
||||
For plugin developers:
|
||||
|
||||
|
|
|
|||
270
test/rsrc/lyrics/geniuscom/sample.txt
Normal file
270
test/rsrc/lyrics/geniuscom/sample.txt
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="snarly apple_music_player--enabled bagon_song_page--enabled song_stories_public_launch--enabled react_forums--disabled" xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml" lang="en" xml:lang="en">
|
||||
<head>
|
||||
<base target='_top' href="//g-example.com/">
|
||||
|
||||
<script type="text/javascript">
|
||||
//<![CDATA[
|
||||
|
||||
var _sf_startpt=(new Date()).getTime();
|
||||
if (window.performance && performance.mark) {
|
||||
window.performance.mark('parse_start');
|
||||
}
|
||||
|
||||
//]]>
|
||||
</script>
|
||||
|
||||
<title>SAMPLE – SONG Lyrics | g-example Lyrics</title>
|
||||
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<meta content='width=device-width,initial-scale=1' name='viewport'>
|
||||
|
||||
<meta property="og:site_name" content="g-example"/>
|
||||
|
||||
<link title="g-example" type="application/opensearchdescription+xml" rel="search" href="https://g-example.com/opensearch.xml">
|
||||
|
||||
<script async src="https://www.youtube.com/iframe_api"></script>
|
||||
<script defer src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"></script>
|
||||
|
||||
<meta content="https://g-example.com/SAMPLE-SONG-lyrics" property="og:url" />
|
||||
|
||||
<link href="ios-app://#/g-example/songs/#" rel="alternate" />
|
||||
<meta content="/songs/3113595" name="newrelic-resource-path" />
|
||||
<link href="https://g-example.com/SAMPLE-SONG-lyrics" rel="canonical" />
|
||||
<link href="https://g-example.com/amp/SAMPLE-SONG-lyrics" rel="amphtml" />
|
||||
|
||||
<script type="text/javascript">
|
||||
var _qevents = _qevents || [];
|
||||
(function() {
|
||||
var elem = document.createElement('script');
|
||||
elem.src = (document.location.protocol == 'https:' ? 'https://secure' : 'http://edge') + '.quantserve.com/quant.js';
|
||||
elem.async = true;
|
||||
elem.type = 'text/javascript';
|
||||
var scpt = document.getElementsByTagName('script')[0];
|
||||
scpt.parentNode.insertBefore(elem, scpt);
|
||||
})();
|
||||
</script>
|
||||
|
||||
<script type="text/javascript">
|
||||
window.ga = window.ga || function() {
|
||||
(window.ga.q = window.ga.q || []).push(arguments);
|
||||
};
|
||||
|
||||
|
||||
(function(g, e, n, i, u, s) {
|
||||
g['GoogleAnalyticsObject'] = 'ga';
|
||||
g.ga.l = Date.now();
|
||||
u = e.createElement(n);
|
||||
s = e.getElementsByTagName(n)[0];
|
||||
u.async = true;
|
||||
u.src = i;
|
||||
s.parentNode.insertBefore(u, s);
|
||||
})(window, document, 'script', 'https://www.google-analytics.com/analytics.js');
|
||||
|
||||
ga('create', "UA-10346621-1", 'auto', {'useAmpClientId': true});
|
||||
ga('set', 'dimension1', "false");
|
||||
ga('set', 'dimension2', "songs#show");
|
||||
ga('set', 'dimension3', "r-b");
|
||||
ga('set', 'dimension4', "true");
|
||||
ga('set', 'dimension5', 'false');
|
||||
ga('set', 'dimension6', "none");
|
||||
ga('send', 'pageview');
|
||||
|
||||
</script>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
<div class="header" ng-controller="HeaderALBUM as header_ALBUM" click-outside="close_mobile_subnav_menu()">
|
||||
<div class="header-primary active">
|
||||
<div class="header-expand_nav_menu" ng-click="toggle_mobile_subnav_menu()"><div class="header-expand_nav_menu-contents"></div></div>
|
||||
|
||||
|
||||
<div class="logo_container">
|
||||
<a href="https://g-example.com/" class="logo_link">g-example</a>
|
||||
</div>
|
||||
|
||||
|
||||
<header-actions></header-actions>
|
||||
|
||||
<search-form search-style="header"></search-form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<routable-page>
|
||||
<ng-non-bindable>
|
||||
|
||||
<div class="header_with_cover_art">
|
||||
<div class="header_with_cover_art-inner column_layout">
|
||||
<div class="column_layout-column_span column_layout-column_span--primary">
|
||||
<div class="header_with_cover_art-cover_art ">
|
||||
<div class="cover_art">
|
||||
<img alt="#" class="cover_art-image" src="#" srcset="#" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="header_with_cover_art-primary_info_container">
|
||||
<div class="header_with_cover_art-primary_info">
|
||||
<h1 class="header_with_cover_art-primary_info-title ">SONG</h1>
|
||||
<h2>
|
||||
<a href="https://g-example.com/artists/SAMPLE" class="header_with_cover_art-primary_info-primary_artist">
|
||||
SAMPLE
|
||||
</a>
|
||||
</h2>
|
||||
<h3>
|
||||
<div class="metadata_unit ">
|
||||
<span class="metadata_unit-label">Produced by</span>
|
||||
<span class="metadata_unit-info">
|
||||
<a href="https://g-example.com/artists/Person1">Person 1</a> & <a href="https://g-example.com/artists/Person 2">Person 2</a>
|
||||
</span>
|
||||
</div>
|
||||
</h3>
|
||||
<h3>
|
||||
<div class="metadata_unit ">
|
||||
<span class="metadata_unit-label">Album</span>
|
||||
<span class="metadata_unit-info"><a href="https://g-example.com/albums/SAMPLE/ALBUM">ALBUM</a></span>
|
||||
</div>
|
||||
</h3>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="song_body column_layout" initial-content-for="song_body">
|
||||
<div class="column_layout-column_span column_layout-column_span--primary">
|
||||
<div class="song_body-lyrics">
|
||||
<h2 class="text_label text_label--gray text_label--x_small_text_size u-top_margin">SONG Lyrics</h2>
|
||||
<div initial-content-for="lyrics">
|
||||
<div class="totally-not-the-lyrics-div">
|
||||
!!!! MISSING LYRICS HERE !!!
|
||||
</div>
|
||||
</div>
|
||||
<div initial-content-for="recirculated_content">
|
||||
<div class="u-xx_large_vertical_margins">
|
||||
<div class="text_label text_label--gray">More on g-example</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="metadata_unit metadata_unit--table_row">
|
||||
<span class="metadata_unit-label">Released by</span>
|
||||
|
||||
<span class="metadata_unit-info">
|
||||
<a href="https://g-example.com/artists/records">Records</a> & <a href="https://g-example.com/artists/Top">Top</a>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="metadata_unit metadata_unit--table_row">
|
||||
<span class="metadata_unit-label">Mixing</span>
|
||||
<span class="metadata_unit-info">
|
||||
<a href="https://g-example.com/artists/Mixed-by-person">Mixed by Person</a>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="metadata_unit metadata_unit--table_row">
|
||||
<span class="metadata_unit-label">Recorded At</span>
|
||||
<span class="metadata_unit-info metadata_unit-info--text_only">City, Place</span>
|
||||
</div>
|
||||
|
||||
<div class="metadata_unit metadata_unit--table_row">
|
||||
<span class="metadata_unit-label">Release Date</span>
|
||||
<span class="metadata_unit-info metadata_unit-info--text_only">Feb 30, 1290</span>
|
||||
</div>
|
||||
|
||||
<div class="metadata_unit metadata_unit--table_row">
|
||||
<span class="metadata_unit-label">Interpolated By</span>
|
||||
<span class="metadata_unit-info">
|
||||
|
||||
<div class="u-x_small_bottom_margin">
|
||||
<a href="#"> # </a>
|
||||
</div>
|
||||
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div initial-content-for="album">
|
||||
<div class="u-xx_large_vertical_margins">
|
||||
<div class="song_album u-bottom_margin">
|
||||
<a href="https://g-example.com/albums/SAMPLE/ALBUM" class="song_album-album_art" title="ALBUM">
|
||||
<img alt="#" src="#" srcset="#"/>
|
||||
</a>
|
||||
<div class="song_album-info">
|
||||
<a href="https://g-example.com/albums/SAMPLE/ALBUM" title="ALBUM" class="song_album-info-title">
|
||||
ALBUM
|
||||
</a>
|
||||
<a href="https://g-example.com/artists/SAMPLE" class="song_album-info-artist" title="ALBUM">SAMPLE</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</ng-non-bindable>
|
||||
</routable-page>
|
||||
|
||||
<div class="page_footer page_footer--padding-for-sticky-player">
|
||||
<div class="footer">
|
||||
<div>
|
||||
<a href="/about">About g-example</a>
|
||||
<a href="/contributor_guidelines">Contributor Guidelines</a>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<span>g-example</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="text/javascript">_qevents.push({ qacct: "################"});</script>
|
||||
<noscript>
|
||||
<div style="display: none;">
|
||||
<img src="#" height="1" width="1" alt="#"/>
|
||||
</div>
|
||||
</noscript>
|
||||
|
||||
<script type="text/javascript">
|
||||
var _sf_async_config={};
|
||||
|
||||
_sf_async_config.uid = 3877;
|
||||
_sf_async_config.domain = 'g-example.com';
|
||||
_sf_async_config.title = 'SAMPLE – SONG Lyrics | g-example Lyrics';
|
||||
_sf_async_config.sections = 'songs,tag:r-b';
|
||||
_sf_async_config.authors = 'SAMPLE';
|
||||
|
||||
var _cbq = window._cbq || [];
|
||||
|
||||
(function(){
|
||||
function loadChartbeat() {
|
||||
window._sf_endpt=(new Date()).getTime();
|
||||
var e = document.createElement('script');
|
||||
e.setAttribute('language', 'javascript');
|
||||
e.setAttribute('type', 'text/javascript');
|
||||
e.setAttribute('src', '#');
|
||||
document.body.appendChild(e);
|
||||
}
|
||||
var oldonload = window.onload;
|
||||
window.onload = (typeof window.onload != 'function') ?
|
||||
loadChartbeat : function() { oldonload(); loadChartbeat(); };
|
||||
})();
|
||||
</script>
|
||||
|
||||
<!-- Begin comScore Tag -->
|
||||
<script>
|
||||
var _comscore = _comscore || [];
|
||||
_comscore.push({ c1: "2", c2: "17151659" });
|
||||
(function() {
|
||||
var s = document.createElement("script"), el = document.getElementsByTagName("script")[0]; s.async = true;
|
||||
s.src = (document.location.protocol == "https:" ? "https://sb" : "http://b") + ".scorecardresearch.com/beacon.js";
|
||||
el.parentNode.insertBefore(s, el);
|
||||
})();
|
||||
</script>
|
||||
<noscript>
|
||||
<img src="#"/>
|
||||
</noscript>
|
||||
<!-- End comScore Tag -->
|
||||
<noscript>
|
||||
<img height="1" width="1" style="display:none" src="#"/>
|
||||
</noscript>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -39,6 +39,7 @@ from mock import MagicMock
|
|||
log = logging.getLogger('beets.test_lyrics')
|
||||
raw_backend = lyrics.Backend({}, log)
|
||||
google = lyrics.Google(MagicMock(), log)
|
||||
genius = lyrics.Genius(MagicMock(), log)
|
||||
|
||||
|
||||
class LyricsPluginTest(unittest.TestCase):
|
||||
|
|
@ -214,6 +215,33 @@ class MockFetchUrl(object):
|
|||
return content
|
||||
|
||||
|
||||
class GeniusMockGet(object):
|
||||
|
||||
def __init__(self, pathval='fetched_path'):
|
||||
self.pathval = pathval
|
||||
self.fetched = None
|
||||
|
||||
def __call__(self, url, headers=False):
|
||||
from requests.models import Response
|
||||
# for the first requests.get() return a path
|
||||
if headers:
|
||||
response = Response()
|
||||
response.status_code = 200
|
||||
response._content = b'{"meta":{"status":200},\
|
||||
"response":{"song":{"path":"/lyrics/sample"}}}'
|
||||
return response
|
||||
# for the second requests.get() return the genius page
|
||||
else:
|
||||
from mock import PropertyMock
|
||||
self.fetched = url
|
||||
fn = url_to_filename(url)
|
||||
with open(fn, 'r') as f:
|
||||
content = f.read()
|
||||
response = Response()
|
||||
type(response).text = PropertyMock(return_value=content)
|
||||
return response
|
||||
|
||||
|
||||
def is_lyrics_content_ok(title, text):
|
||||
"""Compare lyrics text to expected lyrics for given title."""
|
||||
if not text:
|
||||
|
|
@ -395,6 +423,40 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest):
|
|||
google.is_page_candidate(url, url_title, s['title'], u'Sunn O)))')
|
||||
|
||||
|
||||
class LyricsGeniusBaseTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
"""Set up configuration."""
|
||||
try:
|
||||
__import__('bs4')
|
||||
except ImportError:
|
||||
self.skipTest('Beautiful Soup 4 not available')
|
||||
if sys.version_info[:3] < (2, 7, 3):
|
||||
self.skipTest("Python's built-in HTML parser is not good enough")
|
||||
|
||||
|
||||
class LyricsGeniusScrapTest(LyricsGeniusBaseTest):
|
||||
|
||||
"""Checks that Genius backend works as intended.
|
||||
"""
|
||||
import requests
|
||||
|
||||
def setUp(self):
|
||||
"""Set up configuration"""
|
||||
LyricsGeniusBaseTest.setUp(self)
|
||||
self.plugin = lyrics.LyricsPlugin()
|
||||
|
||||
@patch.object(requests, 'get', GeniusMockGet())
|
||||
def test_no_lyrics_div(self):
|
||||
"""Ensure that `lyrics_from_song_api_path` doesn't crash when the html
|
||||
for a Genius page contain <div class="lyrics"></div>
|
||||
"""
|
||||
# https://github.com/beetbox/beets/issues/3535
|
||||
# expected return value None
|
||||
self.assertEqual(genius.lyrics_from_song_api_path('/nolyric'),
|
||||
None)
|
||||
|
||||
|
||||
class SlugTests(unittest.TestCase):
|
||||
|
||||
def test_slug(self):
|
||||
|
|
|
|||
Loading…
Reference in a new issue