mirror of
https://github.com/beetbox/beets.git
synced 2026-01-15 12:41:22 +01:00
Merge pull request #992 from KraYmer/lyrics-coverage
Improve lyrics coverage
This commit is contained in:
commit
e6bf8c21d8
6 changed files with 415 additions and 16 deletions
|
|
@ -23,6 +23,7 @@ import json
|
|||
import unicodedata
|
||||
import difflib
|
||||
import itertools
|
||||
from HTMLParser import HTMLParseError
|
||||
|
||||
from beets.plugins import BeetsPlugin
|
||||
from beets import ui
|
||||
|
|
@ -271,8 +272,9 @@ def is_page_candidate(urlLink, urlTitle, title, artist):
|
|||
tokens = [by + '_' + artist for by in BY_TRANS] + \
|
||||
[artist, sitename, sitename.replace('www.', '')] + LYRICS_TRANS
|
||||
songTitle = re.sub(u'(%s)' % u'|'.join(tokens), u'', urlTitle)
|
||||
songTitle = songTitle.strip('_|')
|
||||
typoRatio = .9
|
||||
|
||||
typoRatio = .8
|
||||
return difflib.SequenceMatcher(None, songTitle, title).ratio() >= typoRatio
|
||||
|
||||
|
||||
|
|
@ -364,8 +366,12 @@ def scrape_lyrics_from_html(html):
|
|||
html = _scrape_merge_paragraphs(html)
|
||||
|
||||
# extract all long text blocks that are not code
|
||||
soup = BeautifulSoup(html, "html.parser",
|
||||
parse_only=SoupStrainer(text=is_text_notcode))
|
||||
try:
|
||||
soup = BeautifulSoup(html, "html.parser",
|
||||
parse_only=SoupStrainer(text=is_text_notcode))
|
||||
except HTMLParseError:
|
||||
return None
|
||||
|
||||
soup = sorted(soup.stripped_strings, key=len)[-1]
|
||||
|
||||
return soup
|
||||
|
|
|
|||
9
setup.py
9
setup.py
|
|
@ -85,12 +85,13 @@ setup(
|
|||
+ (['ordereddict'] if sys.version_info < (2, 7, 0) else []),
|
||||
|
||||
tests_require=[
|
||||
'responses',
|
||||
'pyechonest',
|
||||
'mock',
|
||||
'beautifulsoup4',
|
||||
'flask',
|
||||
'rarfile',
|
||||
'mock',
|
||||
'pyechonest',
|
||||
'pylast',
|
||||
'rarfile',
|
||||
'responses',
|
||||
],
|
||||
|
||||
# Plugin (optional) dependencies:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,341 @@
|
|||
|
||||
|
||||
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head id="ctl00_Head1"><meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" /><title>
|
||||
Ben & Ellen Harper City Of Dreams Lyrics - Onelyrics.net
|
||||
</title><meta name="keywords" content="Ben &amp; Ellen Harper City Of Dreams, Ben &amp; Ellen Harper City Of Dreams Lyrics, Ben &amp; Ellen Harper City Of Dreams Song Lyrics, Ben &amp; Ellen Harper City Of Dreams Song Text, Ben &amp; Ellen Harper City Of Dreams Şarkı Sözü" /><meta name="description" content="Ben & Ellen Harper City Of Dreams Lyrics. Day breaks over the city of my childhood Daybreak over the city I called home Where the sage met the..." /><meta name="robots" content="index,follow" /><meta name="robots" content="NOODP" /><link rel="icon" href="/favicon.ico" type="image/x-icon" /><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><link href="/App_Themes/default/css/Interface.css" rel="stylesheet" type="text/css" /><link rel="search" type="application/opensearchdescription+xml" title="Onelyrics.net - Search Lyrics" href="/xml/search.xml" /><link rel="alternate" type="application/rss+xml" title="Onelyrics.net - New Song Lyrics" href="/xml/new_lyrics.xml" />
|
||||
<link href="http://www.onelyrics.net/ben-ellen-harper-city-of-dreams-lyrics" rel="canonical" /></head>
|
||||
<body>
|
||||
<form name="aspnetForm" method="post" action="/ben-ellen-harper-city-of-dreams-lyrics" id="aspnetForm">
|
||||
<div>
|
||||
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKMTExNDA4MzEzOGRkBfqlfuuQKa7LybXM/GprsFkj1mSg9qg8V6w++Om/EM0=" />
|
||||
</div>
|
||||
|
||||
<div>
|
||||
|
||||
<input type="hidden" name="__VIEWSTATEGENERATOR" id="__VIEWSTATEGENERATOR" value="67C2BABB" />
|
||||
</div>
|
||||
<div class="center">
|
||||
<div id="header">
|
||||
<div class="logo">
|
||||
<a href="/" title="lyrics">ONE<strong>LYRICS</strong>
|
||||
</a>
|
||||
</div>
|
||||
<div class="header_alt">
|
||||
<ul class="ortamenu">
|
||||
<li class='ilk'><a href="/"
|
||||
title="lyrics">
|
||||
<img src="/App_Themes/default/img/home.gif" alt="lyrics" />
|
||||
</a></li>
|
||||
<li><a href="/new-song-lyrics"
|
||||
title="new song lyrics">New Song Lyrics</a></li>
|
||||
<li class="son"><a href="/populer-song-lyrics"
|
||||
title="populer song lyrics">Populer Song Lyrics</a></li>
|
||||
</ul>
|
||||
<div class="arama">
|
||||
<script>
|
||||
(function () {
|
||||
var cx = '004449022147236721955:rwdr_-ykwwg';
|
||||
var gcse = document.createElement('script');
|
||||
gcse.type = 'text/javascript';
|
||||
gcse.async = true;
|
||||
gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
|
||||
'//www.google.com/cse/cse.js?cx=' + cx;
|
||||
var s = document.getElementsByTagName('script')[0];
|
||||
s.parentNode.insertBefore(gcse, s);
|
||||
})();
|
||||
</script>
|
||||
<gcse:search></gcse:search>
|
||||
</div>
|
||||
<div class="clr"></div>
|
||||
</div>
|
||||
<div class="alfabe">
|
||||
<a href="/a-song-lyrics.html" title="A" class="ilk">A</a>
|
||||
<a href="/b-song-lyrics.html" title="B">B</a>
|
||||
<a href="/c-song-lyrics.html" title="C">C</a>
|
||||
<a href="/d-song-lyrics.html" title="D">D</a>
|
||||
<a href="/e-song-lyrics.html" title="E">E</a>
|
||||
<a href="/f-song-lyrics.html" title="F">F</a>
|
||||
<a href="/g-song-lyrics.html" title="G">G</a>
|
||||
<a href="/h-song-lyrics.html" title="H">H</a>
|
||||
<a href="/i-song-lyrics.html" title="I">I</a>
|
||||
<a href="/j-song-lyrics.html" title="J">J</a>
|
||||
<a href="/k-song-lyrics.html" title="K">K</a>
|
||||
<a href="/l-song-lyrics.html" title="L">L</a>
|
||||
<a href="/m-song-lyrics.html" title="M">M</a>
|
||||
<a href="/n-song-lyrics.html" title="N">N</a>
|
||||
<a href="/o-song-lyrics.html" title="O">O</a>
|
||||
<a href="/p-song-lyrics.html" title="P">P</a>
|
||||
<a href="/q-song-lyrics.html" title="Q">Q</a>
|
||||
<a href="/r-song-lyrics.html" title="R">R</a>
|
||||
<a href="/s-song-lyrics.html" title="S">S</a>
|
||||
<a href="/t-song-lyrics.html" title="T">T</a>
|
||||
<a href="/u-song-lyrics.html" title="U">U</a>
|
||||
<a href="/v-song-lyrics.html" title="V">V</a>
|
||||
<a href="/w-song-lyrics.html" title="W">W</a>
|
||||
<a href="/x-song-lyrics.html" title="X">X</a>
|
||||
<a href="/y-song-lyrics.html" title="Y">Y</a>
|
||||
<a href="/z-song-lyrics.html" title="Z">Z</a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="txtcenter">
|
||||
<script async src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
|
||||
<!-- 728x90 Resim -->
|
||||
<ins class="adsbygoogle"
|
||||
style="display: inline-block; width: 728px; height: 90px"
|
||||
data-ad-client="ca-pub-3379066124362506"
|
||||
data-ad-slot="6181228355"></ins>
|
||||
<script>
|
||||
(adsbygoogle = window.adsbygoogle || []).push({});
|
||||
</script>
|
||||
</div>
|
||||
<div id="main">
|
||||
<p>
|
||||
<a href="http://feeds.feedburner.com/Onelyricsnet-NewSongLyrics" target="_blank">
|
||||
<img src="http://feeds.feedburner.com/~fc/Onelyricsnet-NewSongLyrics?bg=ea5f24&fg=ffffff&anim=0" height="26" width="88" style="border: 0" alt="Onelyricsnet-NewSongLyrics" /></a>
|
||||
</p>
|
||||
|
||||
<div id="sag">
|
||||
|
||||
<div class="reklam">
|
||||
|
||||
<script async src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
|
||||
<!-- 336x280ResimMetin -->
|
||||
<ins class="adsbygoogle"
|
||||
style="display: inline-block; width: 336px; height: 280px"
|
||||
data-ad-client="ca-pub-3379066124362506"
|
||||
data-ad-slot="3608404357"></ins>
|
||||
<script>
|
||||
(adsbygoogle = window.adsbygoogle || []).push({});
|
||||
</script>
|
||||
</div>
|
||||
|
||||
<div class="anabaslik">
|
||||
Latest Updates Lyrics
|
||||
</div>
|
||||
|
||||
<ul class="populer_lyrics">
|
||||
|
||||
<li>
|
||||
|
||||
<a href="jason-aldean-dont-change-gone-lyrics" title="Jason Aldean – Don't Change Gone Lyrics">
|
||||
Jason Aldean – Don't Change Gone Lyrics
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
|
||||
<a href="rapsody-godzilla-lyrics" title="Rapsody – Godzilla Lyrics">
|
||||
Rapsody – Godzilla Lyrics
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
|
||||
<a href="jason-aldean-i-took-it-with-me-lyrics" title="Jason Aldean – I Took It With Me Lyrics">
|
||||
Jason Aldean – I Took It With Me Lyrics
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
|
||||
<a href="david-archuleta-glorious-lyrics" title="David Archuleta – Glorious Lyrics">
|
||||
David Archuleta – Glorious Lyrics
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
|
||||
<a href="jason-aldean-if-my-truck-could-talk-lyrics" title="Jason Aldean – If My Truck Could Talk Lyrics">
|
||||
Jason Aldean – If My Truck Could Talk Lyrics
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
|
||||
<a href="tydi-perfect-crush-lyrics" title="TyDi – Perfect Crush Lyrics">
|
||||
TyDi – Perfect Crush Lyrics
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
|
||||
<a href="jason-aldean-laid-back-lyrics" title="Jason Aldean – Laid Back Lyrics">
|
||||
Jason Aldean – Laid Back Lyrics
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
|
||||
<a href="jason-aldean-miss-that-girl-lyrics" title="Jason Aldean – Miss That Girl Lyrics">
|
||||
Jason Aldean – Miss That Girl Lyrics
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
|
||||
<a href="childish-gambino-go-dj-lyrics" title="Childish Gambino – Go DJ Lyrics">
|
||||
Childish Gambino – Go DJ Lyrics
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
|
||||
<a href="vybz-kartel-well-make-it-lyrics" title="Vybz Kartel – We'll Make It Lyrics">
|
||||
Vybz Kartel – We'll Make It Lyrics
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
<li class="hepsi">
|
||||
<a href="/new-song-lyrics" title="all wiev updates song lyrics">all wiev updates song lyrics>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="sol">
|
||||
|
||||
|
||||
|
||||
<div class="solreklam">
|
||||
<script async src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
|
||||
<!-- 300x600ResimMetin -->
|
||||
<ins class="adsbygoogle"
|
||||
style="display: inline-block; width: 300px; height: 600px"
|
||||
data-ad-client="ca-pub-3379066124362506"
|
||||
data-ad-slot="6547325555"></ins>
|
||||
<script>
|
||||
(adsbygoogle = window.adsbygoogle || []).push({});
|
||||
</script>
|
||||
</div>
|
||||
<div class="sag_sarki">
|
||||
|
||||
<div class="baslik">
|
||||
<div class="breadcrumb" itemprop="breadcrumb" itemscope itemtype="http://schema.org/WebPage">
|
||||
<a href="/" title="Lyrics">Lyrics</a> > <a href="ben-ellen-harper-city-of-dreams-lyrics" title="Ben & Ellen Harper City Of Dreams Lyrics"
|
||||
class="breadcrumb_aktif">
|
||||
Ben & Ellen Harper City Of Dreams Lyrics</a>
|
||||
</div>
|
||||
|
||||
<h1 title="Ben & Ellen Harper City Of Dreams Lyrics">
|
||||
Ben & Ellen Harper City Of Dreams Lyrics</h1>
|
||||
</div>
|
||||
<div class="icerik">
|
||||
|
||||
Day breaks over the city of my childhood
|
||||
<br>Daybreak over the city I called home
|
||||
<br>Where the sage met the sea and the groves were sweet and green
|
||||
<br>It's a city that lives only in my dreams
|
||||
<br>
|
||||
<br>The groves where we played when we were children
|
||||
<br>The groves where we fooled around as teens
|
||||
<br>Those green groves are paved from la to santa fe
|
||||
<br>That city lives only in my dreams
|
||||
<br>
|
||||
<br>Landmarks lost to parking lots in the city I called home
|
||||
<br>Looking back I see what used to be
|
||||
<br>Now freeways crawl though the suburban sprawl
|
||||
<br>As far as the eye can see
|
||||
<br>And the city lives only in my dreams
|
||||
<br>
|
||||
<br>Twilight shades the valley of my memory
|
||||
<br>When citrus groves still perfumed the sky
|
||||
<br>But I guess those orange blossoms weren't so special after all
|
||||
<br>Now it's a city of days gone by
|
||||
<br>
|
||||
<br>Landmarks lost to parking lots in the city I called home
|
||||
<br>Looking back I see what used to be
|
||||
<br>Now freeways crawl through the suburban sprawl
|
||||
<br>As far as the eye can see
|
||||
<br>And the city lives only in my dreams
|
||||
|
||||
<div class='tags'><b>Tags</b><br/><a href='/lyrics.aspx?q=Ben+%26amp%3b+Ellen+Harper+City+Of+Dreams' title='Ben & Ellen Harper City Of Dreams'>Ben & Ellen Harper City Of Dreams</a> <a href='/lyrics.aspx?q=Ben+%26amp%3b+Ellen+Harper+City+Of+Dreams+Lyrics' title='Ben & Ellen Harper City Of Dreams Lyrics'>Ben & Ellen Harper City Of Dreams Lyrics</a> <a href='/lyrics.aspx?q=Ben+%26amp%3b+Ellen+Harper+City+Of+Dreams+Song+Lyrics' title='Ben & Ellen Harper City Of Dreams Song Lyrics'>Ben & Ellen Harper City Of Dreams Song Lyrics</a> <a href='/lyrics.aspx?q=Ben+%26amp%3b+Ellen+Harper+City+Of+Dreams+Song+Text' title='Ben & Ellen Harper City Of Dreams Song Text'>Ben & Ellen Harper City Of Dreams Song Text</a> <a href='/lyrics.aspx?q=Ben+%26amp%3b+Ellen+Harper+City+Of+Dreams+%c5%9eark%c4%b1+S%c3%b6z%c3%bc' title='Ben & Ellen Harper City Of Dreams Şarkı Sözü'>Ben & Ellen Harper City Of Dreams Şarkı Sözü</a> </div>
|
||||
|
||||
<div class="clr"></div>
|
||||
<div class="sarki_alt">
|
||||
|
||||
<div class="icerik_tarih">
|
||||
May 12, 2014
|
||||
</div>
|
||||
<div class="icerik_hit">
|
||||
126 hits
|
||||
</div>
|
||||
|
||||
<div class="clr"></div>
|
||||
|
||||
<div class="sosyalaglar">
|
||||
|
||||
<div class="sosyalaglar_li">
|
||||
<!-- Google +1 Butonu -->
|
||||
<script type="text/javascript" src="https://apis.google.com/js/plusone.js" async="true"> { lang: 'en' }</script>
|
||||
<g:plusone size="tall"></g:plusone>
|
||||
<!-- Google +1 Butonu End-->
|
||||
</div>
|
||||
|
||||
<div class="sosyalaglar_li">
|
||||
<!--Twitter-->
|
||||
<a href="http://twitter.com/share" class="twitter-share-button" data-count="vertical">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script>
|
||||
<!--Twitter End-->
|
||||
</div>
|
||||
|
||||
<div class="sosyalaglar_li">
|
||||
<!--Facebook-->
|
||||
<iframe src="http://www.facebook.com/plugins/like.php?href=http://www.onelyrics.net/ben-ellen-harper-city-of-dreams-lyrics&layout=box_count&show_faces=true&width=450&action=like&colorscheme=light&height=65"
|
||||
scrolling="no" frameborder="0" style="border: none; overflow: hidden; width: 62px; height: 65px;"
|
||||
allowtransparency="true"></iframe>
|
||||
<!--Facebook End-->
|
||||
</div>
|
||||
</div>
|
||||
<div class="clr"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="clr">
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div id="footer">
|
||||
<div class="center footer">
|
||||
<div class="copyright">
|
||||
Copyright © 2014 Onelyrics.net / All rights reserved.
|
||||
</div>
|
||||
<div class="clr"></div>
|
||||
|
||||
<ul class="altmenu2">
|
||||
<li>
|
||||
<a href="/" title="lyrics"><strong>lyrics</strong></a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/new-song-lyrics" title="song lyrics"><em>song lyrics</em></a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/" title="şarkı sözleri">şarkı sözleri</a>
|
||||
</li>
|
||||
<li class="son">
|
||||
<a href="https://plus.google.com/+OnelyricsNet-SongLyrics?rel=author" title="Google" target="_blank">Google</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<div class="clr">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script type="text/javascript">
|
||||
(function (i, s, o, g, r, a, m) {
|
||||
i['GoogleAnalyticsObject'] = r; i[r] = i[r] || function () {
|
||||
(i[r].q = i[r].q || []).push(arguments)
|
||||
}, i[r].l = 1 * new Date(); a = s.createElement(o),
|
||||
m = s.getElementsByTagName(o)[0]; a.async = 1; a.src = g; m.parentNode.insertBefore(a, m)
|
||||
})(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');
|
||||
|
||||
ga('create', 'UA-29965928-11', 'onelyrics.net');
|
||||
ga('send', 'pageview');
|
||||
</script>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -215,6 +215,34 @@ Hey_it_s_ok: |
|
|||
Hey It's OK, I'ts Ok
|
||||
Cause I've found what i wanted
|
||||
|
||||
City_of_dreams: |
|
||||
Day breaks over the city of my childhood
|
||||
Daybreak over the city I called home
|
||||
Where the sage met the sea and the groves were sweet and green
|
||||
It's a city that lives only in my dreams
|
||||
|
||||
The groves where we played when we were children
|
||||
The groves where we fooled around as teens
|
||||
Those green groves are paved from la to santa fe
|
||||
That city lives only in my dreams
|
||||
|
||||
Landmarks lost to parking lots in the city I called home
|
||||
Looking back I see what used to be
|
||||
Now freeways crawl though the suburban sprawl
|
||||
As far as the eye can see
|
||||
And the city lives only in my dreams
|
||||
|
||||
Twilight shades the valley of my memory
|
||||
When citrus groves still perfumed the sky
|
||||
But I guess those orange blossoms weren't so special after all
|
||||
Now it's a city of days gone by
|
||||
|
||||
Landmarks lost to parking lots in the city I called home
|
||||
Looking back I see what used to be
|
||||
Now freeways crawl through the suburban sprawl
|
||||
As far as the eye can see
|
||||
And the city lives only in my dreams
|
||||
|
||||
missing_texts: |
|
||||
Lyricsmania staff is working hard for you to add $TITLE lyrics as soon
|
||||
as they'll be released by $ARTIST, check back soon!
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import os
|
||||
import _common
|
||||
import sys
|
||||
from _common import unittest
|
||||
from beetsplug import lyrics
|
||||
from beets.library import Item
|
||||
|
|
@ -163,8 +164,7 @@ class MockFetchUrl(object):
|
|||
url = url.replace('http://', '').replace('www.', '')
|
||||
fn = "".join(x for x in url if (x.isalnum() or x == '/'))
|
||||
fn = fn.split('/')
|
||||
fn = os.path.join('rsrc', 'lyrics', fn[0], fn[-1]) + '.txt'
|
||||
|
||||
fn = os.path.join(_common.RSRC, 'lyrics', fn[0], fn[-1]) + '.txt'
|
||||
with open(fn, 'r') as f:
|
||||
content = f.read()
|
||||
return content
|
||||
|
|
@ -186,7 +186,7 @@ def is_lyrics_content_ok(title, text):
|
|||
class LyricsGooglePluginTest(unittest.TestCase):
|
||||
# Every source entered in default beets google custom search engine
|
||||
# must be listed below.
|
||||
# Use default query when possible, or override artist and title field
|
||||
# Use default query when possible, or override artist and title fields
|
||||
# if website don't have lyrics for default query.
|
||||
sourcesOk = [
|
||||
dict(definfo,
|
||||
|
|
@ -227,6 +227,10 @@ class LyricsGooglePluginTest(unittest.TestCase):
|
|||
dict(definfo,
|
||||
url='http://www.metrolyrics.com/',
|
||||
path='lady-madonna-lyrics-beatles.html'),
|
||||
dict(definfo,
|
||||
url=u'http://www.onelyrics.net/',
|
||||
artist=u'Ben & Ellen Harper', title=u'City of dreams',
|
||||
path='ben-ellen-harper-city-of-dreams-lyrics'),
|
||||
dict(definfo,
|
||||
url=u'http://www.paroles.net/',
|
||||
artist=u'Lilly Wood & the prick', title=u"Hey it's ok",
|
||||
|
|
@ -258,7 +262,8 @@ class LyricsGooglePluginTest(unittest.TestCase):
|
|||
__import__('bs4')
|
||||
except ImportError:
|
||||
self.skipTest('Beautiful Soup 4 not available')
|
||||
|
||||
if sys.version_info[:3] < (2, 7, 3):
|
||||
self.skipTest("Python’s built-in HTML parser is not good enough")
|
||||
lyrics.LyricsPlugin()
|
||||
lyrics.fetch_url = MockFetchUrl()
|
||||
|
||||
|
|
@ -280,7 +285,7 @@ class LyricsGooglePluginTest(unittest.TestCase):
|
|||
self.assertTrue(lyrics.is_lyrics(res), url)
|
||||
self.assertTrue(is_lyrics_content_ok(s['title'], res), url)
|
||||
|
||||
def test_is_page_candidate(self):
|
||||
def test_is_page_candidate_exact_match(self):
|
||||
from bs4 import SoupStrainer, BeautifulSoup
|
||||
|
||||
for s in self.sourcesOk:
|
||||
|
|
@ -292,6 +297,23 @@ class LyricsGooglePluginTest(unittest.TestCase):
|
|||
s['title'], s['artist']),
|
||||
True, url)
|
||||
|
||||
def test_is_page_candidate_fuzzy_match(self):
|
||||
url = u'http://www.example.com/lazy_madonna_beatles'
|
||||
urlTitle = u'example.com | lazy madonna lyrics by the beatles'
|
||||
title = u'Lady Madonna'
|
||||
artist = u'The Beatles'
|
||||
# very small diffs (typo) are ok
|
||||
self.assertEqual(lyrics.is_page_candidate(url, urlTitle, title,
|
||||
artist), True, url)
|
||||
# reject different title
|
||||
urlTitle = u'example.com | busy madonna lyrics by the beatles'
|
||||
self.assertEqual(lyrics.is_page_candidate(url, urlTitle, title,
|
||||
artist), False, url)
|
||||
# (title, artist) != (artist, title)
|
||||
urlTitle = u'example.com | the beatles lyrics by Lazy Madonna'
|
||||
self.assertEqual(lyrics.is_page_candidate(url, urlTitle, title,
|
||||
artist), False, url)
|
||||
|
||||
|
||||
def suite():
|
||||
return unittest.TestLoader().loadTestsFromName(__name__)
|
||||
|
|
|
|||
9
tox.ini
9
tox.ini
|
|
@ -8,13 +8,14 @@ envlist = py26, py27, pypy, docs, flake8
|
|||
|
||||
[testenv]
|
||||
deps =
|
||||
nose
|
||||
mock
|
||||
pylast
|
||||
beautifulsoup4
|
||||
flask
|
||||
responses
|
||||
mock
|
||||
nose
|
||||
pyechonest
|
||||
pylast
|
||||
rarfile
|
||||
responses
|
||||
commands =
|
||||
nosetests {posargs}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue