diff --git a/beets/importer.py b/beets/importer.py index e383b5aa0..c3c0fcb28 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1313,10 +1313,12 @@ def log_files(session, task): return if isinstance(task, SingletonImportTask): - log.info(displayable_path(task.item['path'])) + log.info( + 'Singleton: {0}'.format(displayable_path(task.item['path']))) elif task.items: + log.info('Album {0}'.format(displayable_path(task.paths[0]))) for item in task.items: - log.info(displayable_path(item['path'])) + log.info(' {0}'.format(displayable_path(item['path']))) def group_albums(session): diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index ea0537988..109aaae3c 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -41,6 +41,10 @@ PYLAST_EXCEPTIONS = ( pylast.NetworkError, ) +REPLACE = { + u'\u2010': '-', +} + def deduplicate(seq): """Remove duplicates from sequence wile preserving order. @@ -235,10 +239,13 @@ class LastGenrePlugin(plugins.BeetsPlugin): # Cached entity lookups. - def _cached_lookup(self, entity, method, *args): + def _last_lookup(self, entity, method, *args): """Get a genre based on the named entity using the callable `method` whose arguments are given in the sequence `args`. The genre lookup - is cached based on the entity name and the arguments. + is cached based on the entity name and the arguments. Before the + lookup, each argument is has some Unicode characters replaced with + rough ASCII equivalents in order to return better results from the + Last.fm database. """ # Shortcut if we're missing metadata. if any(not s for s in args): @@ -248,32 +255,38 @@ class LastGenrePlugin(plugins.BeetsPlugin): if key in self._genre_cache: return self._genre_cache[key] else: - genre = self.fetch_genre(method(*args)) + args_replaced = [] + for arg in args: + for k, v in REPLACE.items(): + arg = arg.replace(k, v) + args_replaced.append(arg) + + genre = self.fetch_genre(method(*args_replaced)) self._genre_cache[key] = genre return genre def fetch_album_genre(self, obj): """Return the album genre for this Item or Album. """ - return self._cached_lookup(u'album', LASTFM.get_album, obj.albumartist, - obj.album) + return self._last_lookup(u'album', LASTFM.get_album, obj.albumartist, + obj.album) def fetch_album_artist_genre(self, obj): """Return the album artist genre for this Item or Album. """ - return self._cached_lookup(u'artist', LASTFM.get_artist, - obj.albumartist) + return self._last_lookup(u'artist', LASTFM.get_artist, + obj.albumartist) def fetch_artist_genre(self, item): """Returns the track artist genre for this Item. """ - return self._cached_lookup(u'artist', LASTFM.get_artist, item.artist) + return self._last_lookup(u'artist', LASTFM.get_artist, item.artist) def fetch_track_genre(self, obj): """Returns the track genre for this Item. """ - return self._cached_lookup(u'track', LASTFM.get_track, obj.artist, - obj.title) + return self._last_lookup(u'track', LASTFM.get_track, obj.artist, + obj.title) def _get_genre(self, obj): """Get the genre string for an Album or Item object based on diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 448399fe0..a2ebe7c36 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -90,7 +90,7 @@ def extract_text_between(html, start_marker, end_marker): html, _ = html.split(end_marker, 1) except ValueError: return u'' - return _scrape_strip_cruft(html, True) + return html def extract_text_in(html, starttag): @@ -124,8 +124,7 @@ def extract_text_in(html, starttag): else: print('no closing tag found!') return - lyrics = ''.join(parts) - return _scrape_strip_cruft(lyrics, True) + return u''.join(parts) def search_pairs(item): @@ -221,7 +220,7 @@ def fetch_lyricswiki(artist, title): if not html: return - lyrics = extract_text_in(html, "
") + lyrics = extract_text_in(html, u"
") if lyrics and 'Unfortunately, we are not licensed' not in lyrics: return lyrics @@ -360,13 +359,14 @@ def _scrape_strip_cruft(html, plain_text_out=False): html = COMMENT_RE.sub('', html) html = TAG_RE.sub('', html) - # Strip lines html = '\n'.join([x.strip() for x in html.strip().split('\n')]) + html = re.sub(r'\n{3,}', r'\n\n', html) return html def _scrape_merge_paragraphs(html): - return re.sub(r'

\s*]*)>', '\n', html) + html = re.sub(r'

\s*]*)>', '\n', html) + return re.sub(r'
\s*
', '\n', html) def scrape_lyrics_from_html(html): @@ -541,4 +541,4 @@ class LyricsPlugin(plugins.BeetsPlugin): if lyrics: log.debug(u'got lyrics from backend: {0}' .format(backend.__name__)) - return lyrics.strip() + return _scrape_strip_cruft(lyrics, True) diff --git a/docs/changelog.rst b/docs/changelog.rst index 8446646e5..0446300cc 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -30,6 +30,8 @@ New: Fixed: +* :doc:`/plugins/lyrics`: Avoid fetching truncated lyrics from the Google + backed by merging text blocks separated by empty ``
`` before scraping. * Fix a new crash with the latest version of Mutagen (1.26). * We now print a better error message when the database file is corrupted. * :doc:`/plugins/discogs`: Only prompt for authentication when running the @@ -64,6 +66,8 @@ Fixed: the import process. Thanks to :user:`Freso`. :bug:`1176` :bug:`1172` * :doc:`/plugins/ftintitle`: Fix weird behavior when the same artist appears twice in the artist string. Thanks to Marc Addeo. :bug:`1179` :bug:`1181` +* :doc:`/plugins/lastgenre`: Match songs more robustly when they contain + dashes. Thanks to :user:`djl`. :bug:`1156` .. _API changes: http://developer.echonest.com/forums/thread/3650 .. _Plex: https://plex.tv/ diff --git a/test/helper.py b/test/helper.py index 4cd7e8ef2..459e643a0 100644 --- a/test/helper.py +++ b/test/helper.py @@ -120,6 +120,8 @@ def has_program(cmd, args=['--version']): stdout=devnull, stdin=devnull) except OSError: return False + except subprocess.CalledProcessError: + return False else: return True diff --git a/test/rsrc/lyrics/examplecom/beetssong.txt b/test/rsrc/lyrics/examplecom/beetssong.txt index ad97db206..3bba9f702 100644 --- a/test/rsrc/lyrics/examplecom/beetssong.txt +++ b/test/rsrc/lyrics/examplecom/beetssong.txt @@ -222,7 +222,9 @@ e9.size = "120x600, 160x600";

John Doe
beets song lyrics

Ringtones left icon Send "beets song" Ringtone to your Cell Ringtones right icon

Beets is the media library management system for obsessive-compulsive music geeks.
The purpose of beets is to get your music collection right once and for all. It catalogs your collection, automatically improving its metadata as it goes. It then provides a bouquet of tools for manipulating and accessing your music.
-Here's an example of beets' brainy tag corrector doing its thing:
+
+Here's an example of beets' brainy tag corrector doing its thing: +Because beets is designed as a library, it can do almost anything you can imagine for your music collection. Via plugins, beets becomes a panacea Ringtones left icon Send "beets song" Ringtone to your Cell Ringtones right icon

Share beets song lyrics

diff --git a/test/rsrc/lyricstext.yaml b/test/rsrc/lyricstext.yaml index 70ce384fb..814c207df 100644 --- a/test/rsrc/lyricstext.yaml +++ b/test/rsrc/lyricstext.yaml @@ -1,3 +1,8 @@ +Beets_song: + - geeks + - bouquet + - panacea + Amsterdam: - oriflammes - fortune diff --git a/test/test_importer.py b/test/test_importer.py index 37f091b7d..89a65b74f 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -1542,7 +1542,8 @@ class ImportPretendTest(_common.TestCase, ImportHelper): def setUp(self): super(ImportPretendTest, self).setUp() self.setup_beets() - self._create_import_dir(1) + self.__create_import_dir() + self.__create_empty_import_dir() self._setup_import_session() config['import']['pretend'] = True self.matcher = AutotagStub().install() @@ -1552,37 +1553,29 @@ class ImportPretendTest(_common.TestCase, ImportHelper): self.teardown_beets() self.matcher.restore() - def test_import_pretend(self): + def __create_import_dir(self): + self._create_import_dir(1) resource_path = os.path.join(_common.RSRC, u'empty.mp3') single_path = os.path.join(self.import_dir, u'track_2.mp3') - shutil.copy(resource_path, single_path) - import_files = [ + self.import_paths = [ os.path.join(self.import_dir, u'the_album'), single_path ] - self._setup_import_session(singletons=True) - self.importer.paths = import_files + self.import_files = [ + displayable_path( + os.path.join(self.import_paths[0], u'track_1.mp3')), + displayable_path(single_path) + ] - with capture_log() as logs: - self.importer.run() - - logs = [line for line in logs if not line.startswith('Sending event:')] - - self.assertEqual(len(self.lib.items()), 0) - self.assertEqual(len(self.lib.albums()), 0) - - self.assertEqual(len(logs), 2) - self.assertEqual(logs[0], os.path.join(import_files[0], - u'track_1.mp3')) - self.assertEqual(logs[1], import_files[1]) - - def test_import_pretend_empty(self): + def __create_empty_import_dir(self): path = os.path.join(self.temp_dir, 'empty') os.makedirs(path) + self.empty_path = path - self._setup_import_session(singletons=True) - self.importer.paths = [path] + def __run(self, import_paths, singletons=True): + self._setup_import_session(singletons=singletons) + self.importer.paths = import_paths with capture_log() as logs: self.importer.run() @@ -1592,9 +1585,29 @@ class ImportPretendTest(_common.TestCase, ImportHelper): self.assertEqual(len(self.lib.items()), 0) self.assertEqual(len(self.lib.albums()), 0) - self.assertEqual(len(logs), 1) - self.assertEqual(logs[0], 'No files imported from {0}' - .format(displayable_path(path))) + return logs + + def test_import_singletons_pretend(self): + logs = self.__run(self.import_paths) + + self.assertEqual(logs, [ + 'Singleton: %s' % self.import_files[0], + 'Singleton: %s' % self.import_paths[1]]) + + def test_import_album_pretend(self): + logs = self.__run(self.import_paths, singletons=False) + + self.assertEqual(logs, [ + 'Album %s' % displayable_path(self.import_paths[0]), + ' %s' % self.import_files[0], + 'Album %s' % displayable_path(self.import_paths[1]), + ' %s' % self.import_paths[1]]) + + def test_import_pretend_empty(self): + logs = self.__run([self.empty_path]) + + self.assertEqual(logs, ['No files imported from {0}' + .format(displayable_path(self.empty_path))]) def suite(): diff --git a/test/test_lyrics.py b/test/test_lyrics.py index 605e4c6e7..492229630 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -300,6 +300,15 @@ class LyricsGooglePluginTest(unittest.TestCase): lyrics.LyricsPlugin() lyrics.fetch_url = MockFetchUrl() + def test_mocked_source_ok(self): + """Test that lyrics of the mocked page are correctly scraped""" + url = self.source['url'] + self.source['path'] + if os.path.isfile(url_to_filename(url)): + res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) + self.assertTrue(lyrics.is_lyrics(res), url) + self.assertTrue(is_lyrics_content_ok(self.source['title'], res), + url) + def test_google_sources_ok(self): """Test if lyrics present on websites registered in beets google custom search engine are correctly scraped.""" @@ -319,12 +328,11 @@ class LyricsGooglePluginTest(unittest.TestCase): for (fun, s) in zip([lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom, lyrics.fetch_musixmatch], DEFAULT_SOURCES): - if os.path.isfile(url_to_filename( - s['url'] + s['path'])): + url = s['url'] + s['path'] + if os.path.isfile(url_to_filename(url)): res = fun(s['artist'], s['title']) - self.assertTrue(lyrics.is_lyrics(res)) - self.assertTrue(is_lyrics_content_ok( - s['title'], res)) + self.assertTrue(lyrics.is_lyrics(res), url) + self.assertTrue(is_lyrics_content_ok(s['title'], res), url) def test_is_page_candidate_exact_match(self): """Test matching html page title with song infos -- when song infos are