Merge branch 'master' into import-filefilter

Conflicts:
	test/test_importer.py
This commit is contained in:
Malte Ried 2014-12-31 17:04:16 +01:00
commit ab996a0de0
9 changed files with 99 additions and 50 deletions

View file

@ -1313,10 +1313,12 @@ def log_files(session, task):
return
if isinstance(task, SingletonImportTask):
log.info(displayable_path(task.item['path']))
log.info(
'Singleton: {0}'.format(displayable_path(task.item['path'])))
elif task.items:
log.info('Album {0}'.format(displayable_path(task.paths[0])))
for item in task.items:
log.info(displayable_path(item['path']))
log.info(' {0}'.format(displayable_path(item['path'])))
def group_albums(session):

View file

@ -41,6 +41,10 @@ PYLAST_EXCEPTIONS = (
pylast.NetworkError,
)
REPLACE = {
u'\u2010': '-',
}
def deduplicate(seq):
"""Remove duplicates from sequence wile preserving order.
@ -235,10 +239,13 @@ class LastGenrePlugin(plugins.BeetsPlugin):
# Cached entity lookups.
def _cached_lookup(self, entity, method, *args):
def _last_lookup(self, entity, method, *args):
"""Get a genre based on the named entity using the callable `method`
whose arguments are given in the sequence `args`. The genre lookup
is cached based on the entity name and the arguments.
is cached based on the entity name and the arguments. Before the
lookup, each argument is has some Unicode characters replaced with
rough ASCII equivalents in order to return better results from the
Last.fm database.
"""
# Shortcut if we're missing metadata.
if any(not s for s in args):
@ -248,32 +255,38 @@ class LastGenrePlugin(plugins.BeetsPlugin):
if key in self._genre_cache:
return self._genre_cache[key]
else:
genre = self.fetch_genre(method(*args))
args_replaced = []
for arg in args:
for k, v in REPLACE.items():
arg = arg.replace(k, v)
args_replaced.append(arg)
genre = self.fetch_genre(method(*args_replaced))
self._genre_cache[key] = genre
return genre
def fetch_album_genre(self, obj):
"""Return the album genre for this Item or Album.
"""
return self._cached_lookup(u'album', LASTFM.get_album, obj.albumartist,
obj.album)
return self._last_lookup(u'album', LASTFM.get_album, obj.albumartist,
obj.album)
def fetch_album_artist_genre(self, obj):
"""Return the album artist genre for this Item or Album.
"""
return self._cached_lookup(u'artist', LASTFM.get_artist,
obj.albumartist)
return self._last_lookup(u'artist', LASTFM.get_artist,
obj.albumartist)
def fetch_artist_genre(self, item):
"""Returns the track artist genre for this Item.
"""
return self._cached_lookup(u'artist', LASTFM.get_artist, item.artist)
return self._last_lookup(u'artist', LASTFM.get_artist, item.artist)
def fetch_track_genre(self, obj):
"""Returns the track genre for this Item.
"""
return self._cached_lookup(u'track', LASTFM.get_track, obj.artist,
obj.title)
return self._last_lookup(u'track', LASTFM.get_track, obj.artist,
obj.title)
def _get_genre(self, obj):
"""Get the genre string for an Album or Item object based on

View file

@ -90,7 +90,7 @@ def extract_text_between(html, start_marker, end_marker):
html, _ = html.split(end_marker, 1)
except ValueError:
return u''
return _scrape_strip_cruft(html, True)
return html
def extract_text_in(html, starttag):
@ -124,8 +124,7 @@ def extract_text_in(html, starttag):
else:
print('no closing tag found!')
return
lyrics = ''.join(parts)
return _scrape_strip_cruft(lyrics, True)
return u''.join(parts)
def search_pairs(item):
@ -221,7 +220,7 @@ def fetch_lyricswiki(artist, title):
if not html:
return
lyrics = extract_text_in(html, "<div class='lyricbox'>")
lyrics = extract_text_in(html, u"<div class='lyricbox'>")
if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
return lyrics
@ -360,13 +359,14 @@ def _scrape_strip_cruft(html, plain_text_out=False):
html = COMMENT_RE.sub('', html)
html = TAG_RE.sub('', html)
# Strip lines
html = '\n'.join([x.strip() for x in html.strip().split('\n')])
html = re.sub(r'\n{3,}', r'\n\n', html)
return html
def _scrape_merge_paragraphs(html):
return re.sub(r'</p>\s*<p(\s*[^>]*)>', '\n', html)
html = re.sub(r'</p>\s*<p(\s*[^>]*)>', '\n', html)
return re.sub(r'<div .*>\s*</div>', '\n', html)
def scrape_lyrics_from_html(html):
@ -541,4 +541,4 @@ class LyricsPlugin(plugins.BeetsPlugin):
if lyrics:
log.debug(u'got lyrics from backend: {0}'
.format(backend.__name__))
return lyrics.strip()
return _scrape_strip_cruft(lyrics, True)

View file

@ -30,6 +30,8 @@ New:
Fixed:
* :doc:`/plugins/lyrics`: Avoid fetching truncated lyrics from the Google
backed by merging text blocks separated by empty ``<div>`` before scraping.
* Fix a new crash with the latest version of Mutagen (1.26).
* We now print a better error message when the database file is corrupted.
* :doc:`/plugins/discogs`: Only prompt for authentication when running the
@ -64,6 +66,8 @@ Fixed:
the import process. Thanks to :user:`Freso`. :bug:`1176` :bug:`1172`
* :doc:`/plugins/ftintitle`: Fix weird behavior when the same artist appears
twice in the artist string. Thanks to Marc Addeo. :bug:`1179` :bug:`1181`
* :doc:`/plugins/lastgenre`: Match songs more robustly when they contain
dashes. Thanks to :user:`djl`. :bug:`1156`
.. _API changes: http://developer.echonest.com/forums/thread/3650
.. _Plex: https://plex.tv/

View file

@ -120,6 +120,8 @@ def has_program(cmd, args=['--version']):
stdout=devnull, stdin=devnull)
except OSError:
return False
except subprocess.CalledProcessError:
return False
else:
return True

View file

@ -222,7 +222,9 @@ e9.size = "120x600, 160x600";
<h2>John Doe <br> beets song lyrics</h2>
<img src="images/phone-left.gif" alt="Ringtones left icon" width="16" height="17"> <a href="http://www.ringtonematcher.com/go/?sid=LBSMros&amp;artist=The+John Doe&amp;song=Beets+Song" target="_blank"><b><font size="+1" color="red" face="arial">Send "beets song" Ringtone to your Cell</font></b></a> <img src="images/phone-right.gif" alt="Ringtones right icon" width="16" height="17"><br><br><center>Beets is the media library management system for obsessive-compulsive music geeks.<br>
The purpose of beets is to get your music collection right once and for all. It catalogs your collection, automatically improving its metadata as it goes. It then provides a bouquet of tools for manipulating and accessing your music.<br>
Here's an example of beets' brainy tag corrector doing its thing:</center>
<div class='flow breaker'> </div>
Here's an example of beets' brainy tag corrector doing its thing:
Because beets is designed as a library, it can do almost anything you can imagine for your music collection. Via plugins, beets becomes a panacea</center>
<img src="images/phone-left.gif" alt="Ringtones left icon" width="16" height="17"> <a href="http://www.ringtonematcher.com/go/?sid=LBSMros&amp;artist=The+John Doe&amp;song=Beets+Song" target="_blank"><b><font size="+1" color="red" face="arial">Send "beets song" Ringtone to your Cell</font></b></a> <img src="images/phone-right.gif" alt="Ringtones right icon" width="16" height="17"><br><br>
<center>
<font color="black" size="2" face="arial">Share <strong>beets song lyrics</strong></font><br><p style="height: 1px; margin: 3pt; padding: 0pt;"></p>

View file

@ -1,3 +1,8 @@
Beets_song:
- geeks
- bouquet
- panacea
Amsterdam:
- oriflammes
- fortune

View file

@ -1542,7 +1542,8 @@ class ImportPretendTest(_common.TestCase, ImportHelper):
def setUp(self):
super(ImportPretendTest, self).setUp()
self.setup_beets()
self._create_import_dir(1)
self.__create_import_dir()
self.__create_empty_import_dir()
self._setup_import_session()
config['import']['pretend'] = True
self.matcher = AutotagStub().install()
@ -1552,37 +1553,29 @@ class ImportPretendTest(_common.TestCase, ImportHelper):
self.teardown_beets()
self.matcher.restore()
def test_import_pretend(self):
def __create_import_dir(self):
self._create_import_dir(1)
resource_path = os.path.join(_common.RSRC, u'empty.mp3')
single_path = os.path.join(self.import_dir, u'track_2.mp3')
shutil.copy(resource_path, single_path)
import_files = [
self.import_paths = [
os.path.join(self.import_dir, u'the_album'),
single_path
]
self._setup_import_session(singletons=True)
self.importer.paths = import_files
self.import_files = [
displayable_path(
os.path.join(self.import_paths[0], u'track_1.mp3')),
displayable_path(single_path)
]
with capture_log() as logs:
self.importer.run()
logs = [line for line in logs if not line.startswith('Sending event:')]
self.assertEqual(len(self.lib.items()), 0)
self.assertEqual(len(self.lib.albums()), 0)
self.assertEqual(len(logs), 2)
self.assertEqual(logs[0], os.path.join(import_files[0],
u'track_1.mp3'))
self.assertEqual(logs[1], import_files[1])
def test_import_pretend_empty(self):
def __create_empty_import_dir(self):
path = os.path.join(self.temp_dir, 'empty')
os.makedirs(path)
self.empty_path = path
self._setup_import_session(singletons=True)
self.importer.paths = [path]
def __run(self, import_paths, singletons=True):
self._setup_import_session(singletons=singletons)
self.importer.paths = import_paths
with capture_log() as logs:
self.importer.run()
@ -1592,9 +1585,29 @@ class ImportPretendTest(_common.TestCase, ImportHelper):
self.assertEqual(len(self.lib.items()), 0)
self.assertEqual(len(self.lib.albums()), 0)
self.assertEqual(len(logs), 1)
self.assertEqual(logs[0], 'No files imported from {0}'
.format(displayable_path(path)))
return logs
def test_import_singletons_pretend(self):
logs = self.__run(self.import_paths)
self.assertEqual(logs, [
'Singleton: %s' % self.import_files[0],
'Singleton: %s' % self.import_paths[1]])
def test_import_album_pretend(self):
logs = self.__run(self.import_paths, singletons=False)
self.assertEqual(logs, [
'Album %s' % displayable_path(self.import_paths[0]),
' %s' % self.import_files[0],
'Album %s' % displayable_path(self.import_paths[1]),
' %s' % self.import_paths[1]])
def test_import_pretend_empty(self):
logs = self.__run([self.empty_path])
self.assertEqual(logs, ['No files imported from {0}'
.format(displayable_path(self.empty_path))])
def suite():

View file

@ -300,6 +300,15 @@ class LyricsGooglePluginTest(unittest.TestCase):
lyrics.LyricsPlugin()
lyrics.fetch_url = MockFetchUrl()
def test_mocked_source_ok(self):
"""Test that lyrics of the mocked page are correctly scraped"""
url = self.source['url'] + self.source['path']
if os.path.isfile(url_to_filename(url)):
res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url))
self.assertTrue(lyrics.is_lyrics(res), url)
self.assertTrue(is_lyrics_content_ok(self.source['title'], res),
url)
def test_google_sources_ok(self):
"""Test if lyrics present on websites registered in beets google custom
search engine are correctly scraped."""
@ -319,12 +328,11 @@ class LyricsGooglePluginTest(unittest.TestCase):
for (fun, s) in zip([lyrics.fetch_lyricswiki,
lyrics.fetch_lyricscom,
lyrics.fetch_musixmatch], DEFAULT_SOURCES):
if os.path.isfile(url_to_filename(
s['url'] + s['path'])):
url = s['url'] + s['path']
if os.path.isfile(url_to_filename(url)):
res = fun(s['artist'], s['title'])
self.assertTrue(lyrics.is_lyrics(res))
self.assertTrue(is_lyrics_content_ok(
s['title'], res))
self.assertTrue(lyrics.is_lyrics(res), url)
self.assertTrue(is_lyrics_content_ok(s['title'], res), url)
def test_is_page_candidate_exact_match(self):
"""Test matching html page title with song infos -- when song infos are