From c227501a5d1c6810a7555f1928c5a6401b101799 Mon Sep 17 00:00:00 2001 From: David Logie Date: Sat, 20 Dec 2014 22:10:58 +0000 Subject: [PATCH 01/19] lastgenre: Optionally replace unicode characters when performing lookups. --- beetsplug/lastgenre/__init__.py | 6 ++++++ docs/plugins/lastgenre.rst | 3 +++ 2 files changed, 9 insertions(+) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index ea0537988..2d502e076 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -25,6 +25,8 @@ import pylast import os import yaml +from unidecode import unidecode + from beets import plugins from beets import ui from beets.util import normpath, plurality @@ -133,6 +135,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): 'force': True, 'auto': True, 'separator': u', ', + 'asciify': False, }) self.setup() @@ -244,6 +247,9 @@ class LastGenrePlugin(plugins.BeetsPlugin): if any(not s for s in args): return None + if self.config['asciify']: + args = [unidecode(arg) for arg in args] + key = u'{0}.{1}'.format(entity, u'-'.join(unicode(a) for a in args)) if key in self._genre_cache: return self._genre_cache[key] diff --git a/docs/plugins/lastgenre.rst b/docs/plugins/lastgenre.rst index 9d3242467..d874a4a72 100644 --- a/docs/plugins/lastgenre.rst +++ b/docs/plugins/lastgenre.rst @@ -108,6 +108,9 @@ configuration file. The available options are: - **whitelist**: The filename of a custom genre list, ``yes`` to use the internal whitelist, or ``no`` to consider all genres valid. Default: ``yes``. +- **asciify**: Convert all non-ASCII characters to ASCII equivalents + when performing lookups. + Default: ``no``. Running Manually ---------------- From c7b6f75ca8c95b7b547bde9a3d5bb39c53f0bcff Mon Sep 17 00:00:00 2001 From: David Logie Date: Sun, 21 Dec 2014 12:58:15 +0000 Subject: [PATCH 02/19] lastgenre: Automatically retry asciified lookups if the initial lookup fails. --- beetsplug/lastgenre/__init__.py | 12 ++++++------ docs/plugins/lastgenre.rst | 3 --- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 2d502e076..7937db277 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -135,7 +135,6 @@ class LastGenrePlugin(plugins.BeetsPlugin): 'force': True, 'auto': True, 'separator': u', ', - 'asciify': False, }) self.setup() @@ -247,15 +246,16 @@ class LastGenrePlugin(plugins.BeetsPlugin): if any(not s for s in args): return None - if self.config['asciify']: - args = [unidecode(arg) for arg in args] - key = u'{0}.{1}'.format(entity, u'-'.join(unicode(a) for a in args)) if key in self._genre_cache: return self._genre_cache[key] else: - genre = self.fetch_genre(method(*args)) - self._genre_cache[key] = genre + args_ascii = [unidecode(arg) for arg in args] + for arglist in [args, args_ascii]: + genre = self.fetch_genre(method(*arglist)) + self._genre_cache[key] = genre + if genre: + break return genre def fetch_album_genre(self, obj): diff --git a/docs/plugins/lastgenre.rst b/docs/plugins/lastgenre.rst index d874a4a72..9d3242467 100644 --- a/docs/plugins/lastgenre.rst +++ b/docs/plugins/lastgenre.rst @@ -108,9 +108,6 @@ configuration file. The available options are: - **whitelist**: The filename of a custom genre list, ``yes`` to use the internal whitelist, or ``no`` to consider all genres valid. Default: ``yes``. -- **asciify**: Convert all non-ASCII characters to ASCII equivalents - when performing lookups. - Default: ``no``. Running Manually ---------------- From d9673ef9b5e1456cc5da2a600465e1587ba885df Mon Sep 17 00:00:00 2001 From: David Logie Date: Fri, 26 Dec 2014 14:08:22 +0000 Subject: [PATCH 03/19] lastgenre: Only replace a small set of unicode characters. --- beetsplug/lastgenre/__init__.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 7937db277..8d75c7e4f 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2013, Adrian Sampson. # @@ -25,8 +26,6 @@ import pylast import os import yaml -from unidecode import unidecode - from beets import plugins from beets import ui from beets.util import normpath, plurality @@ -43,6 +42,10 @@ PYLAST_EXCEPTIONS = ( pylast.NetworkError, ) +REPLACE = { + u'‐': '-', +} + def deduplicate(seq): """Remove duplicates from sequence wile preserving order. @@ -250,7 +253,12 @@ class LastGenrePlugin(plugins.BeetsPlugin): if key in self._genre_cache: return self._genre_cache[key] else: - args_ascii = [unidecode(arg) for arg in args] + args_ascii = [] + for arg in args: + for k, v in REPLACE.items(): + arg = arg.replace(k, v) + args_ascii.append(arg) + for arglist in [args, args_ascii]: genre = self.fetch_genre(method(*arglist)) self._genre_cache[key] = genre From 3015c22d403d93002813800911047137d456ca4a Mon Sep 17 00:00:00 2001 From: David Logie Date: Fri, 26 Dec 2014 14:17:49 +0000 Subject: [PATCH 04/19] Fix indent level. --- beetsplug/lastgenre/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 8d75c7e4f..be853c55b 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -43,7 +43,7 @@ PYLAST_EXCEPTIONS = ( ) REPLACE = { - u'‐': '-', + u'‐': '-', } From 433d68724a915f5c5980168e7db987de4a0c9e2e Mon Sep 17 00:00:00 2001 From: David Logie Date: Sun, 28 Dec 2014 18:38:15 +0000 Subject: [PATCH 05/19] Escape Unicode characters so we don't need the encoding headers. --- beetsplug/lastgenre/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index be853c55b..5dbfaefb9 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2013, Adrian Sampson. # @@ -43,7 +42,7 @@ PYLAST_EXCEPTIONS = ( ) REPLACE = { - u'‐': '-', + u'\u2010': '-', } From 148d9048d54cf030432f09d36face43c3a7a589e Mon Sep 17 00:00:00 2001 From: David Logie Date: Sun, 28 Dec 2014 18:38:42 +0000 Subject: [PATCH 06/19] Update _cached_lookup() docstring. --- beetsplug/lastgenre/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 5dbfaefb9..e9dfe634c 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -242,7 +242,9 @@ class LastGenrePlugin(plugins.BeetsPlugin): def _cached_lookup(self, entity, method, *args): """Get a genre based on the named entity using the callable `method` whose arguments are given in the sequence `args`. The genre lookup - is cached based on the entity name and the arguments. + is cached based on the entity name and the arguments. Before the lookup, + each argument is has some Unicode characters replaced with rough ASCII + equivalents in order to return better results from the Last.fm database. """ # Shortcut if we're missing metadata. if any(not s for s in args): From b33d5b577a845b08bbff7a035d47fc73f4f36b51 Mon Sep 17 00:00:00 2001 From: Thomas Scholtes Date: Mon, 29 Dec 2014 12:07:57 +0100 Subject: [PATCH 07/19] Catch exception when program check has non-zero return code See #1175. --- test/helper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/helper.py b/test/helper.py index 4cd7e8ef2..459e643a0 100644 --- a/test/helper.py +++ b/test/helper.py @@ -120,6 +120,8 @@ def has_program(cmd, args=['--version']): stdout=devnull, stdin=devnull) except OSError: return False + except subprocess.CalledProcessError: + return False else: return True From 5123a41258e82dcbddf7cf4f7fa44ad5ae886499 Mon Sep 17 00:00:00 2001 From: Malte Ried Date: Mon, 29 Dec 2014 12:54:16 +0100 Subject: [PATCH 08/19] Added a flag `--detailed` to get a more detailed output of the `--pretend` option. --- beets/config_default.yaml | 1 + beets/importer.py | 9 ++++- beets/ui/commands.py | 4 +++ docs/reference/cli.rst | 4 +++ test/test_importer.py | 76 ++++++++++++++++++++++++++++----------- 5 files changed, 72 insertions(+), 22 deletions(-) diff --git a/beets/config_default.yaml b/beets/config_default.yaml index 78f16d051..ae0882542 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -22,6 +22,7 @@ import: flat: no group_albums: no pretend: false + detailed: false clutter: ["Thumbs.DB", ".DS_Store"] ignore: [".*", "*~", "System Volume Information"] diff --git a/beets/importer.py b/beets/importer.py index 0877b5508..632eda30c 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1299,9 +1299,16 @@ def manipulate_files(session, task): def log_files(session, task): """A coroutine (pipeline stage) to log each file which will be imported """ + detailed = config['import']['detailed'].get() if isinstance(task, SingletonImportTask): - log.info(displayable_path(task.item['path'])) + if detailed: + log.info( + 'Singleton: {0}'.format(displayable_path(task.item['path']))) + else: + log.info(displayable_path(task.item['path'])) elif task.items: + if detailed: + log.info('Album {0}'.format(displayable_path(task.paths[0]))) for item in task.items: log.info(displayable_path(item['path'])) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 546fe87d9..8f369a213 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -947,6 +947,10 @@ import_cmd.parser.add_option( '--pretend', dest='pretend', action='store_true', help='just print the files to import' ) +import_cmd.parser.add_option( + '--detailed', dest='detailed', action='store_true', + help='use in conjunction with --pretend to get sophisticated output' +) import_cmd.func = import_func default_commands.append(import_cmd) diff --git a/docs/reference/cli.rst b/docs/reference/cli.rst index e569c073c..baee5423b 100644 --- a/docs/reference/cli.rst +++ b/docs/reference/cli.rst @@ -132,6 +132,10 @@ Optional command flags: option. If set, beets will just print a list of files that it would otherwise import. +* To get a more detailed output with the ``--pretend`` option, you can use the + ``--detailed`` flag. This way you can see which tracks beets think should + belong to an album. + .. _rarfile: https://pypi.python.org/pypi/rarfile/2.2 .. only:: html diff --git a/test/test_importer.py b/test/test_importer.py index 0ffa9aaf3..71059961d 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -1542,7 +1542,8 @@ class ImportPretendTest(_common.TestCase, ImportHelper): def setUp(self): super(ImportPretendTest, self).setUp() self.setup_beets() - self._create_import_dir(1) + self.__create_import_dir() + self.__create_empty_import_dir() self._setup_import_session() config['import']['pretend'] = True self.matcher = AutotagStub().install() @@ -1552,35 +1553,31 @@ class ImportPretendTest(_common.TestCase, ImportHelper): self.teardown_beets() self.matcher.restore() - def test_import_pretend(self): + def __create_import_dir(self): + self._create_import_dir(1) resource_path = os.path.join(_common.RSRC, u'empty.mp3') single_path = os.path.join(self.import_dir, u'track_2.mp3') - shutil.copy(resource_path, single_path) - import_files = [ + self.import_paths = [ os.path.join(self.import_dir, u'the_album'), single_path ] - self._setup_import_session(singletons=True) - self.importer.paths = import_files + self.import_files = [ + displayable_path( + os.path.join(self.import_paths[0], u'track_1.mp3')), + displayable_path(single_path) + ] - with capture_log() as logs: - self.importer.run() - - self.assertEqual(len(self.lib.items()), 0) - self.assertEqual(len(self.lib.albums()), 0) - - self.assertEqual(len(logs), 3) - self.assertEqual(logs[1], os.path.join(import_files[0], - u'track_1.mp3')) - self.assertEqual(logs[2], import_files[1]) - - def test_import_pretend_empty(self): + def __create_empty_import_dir(self): path = os.path.join(self.temp_dir, 'empty') os.makedirs(path) + self.empty_path = path - self._setup_import_session(singletons=True) - self.importer.paths = [path] + def __run(self, import_paths, singletons=True, detailed=False): + config['import']['detailed'] = detailed + + self._setup_import_session(singletons=singletons) + self.importer.paths = import_paths with capture_log() as logs: self.importer.run() @@ -1588,9 +1585,46 @@ class ImportPretendTest(_common.TestCase, ImportHelper): self.assertEqual(len(self.lib.items()), 0) self.assertEqual(len(self.lib.albums()), 0) + return logs + + def test_import_pretend(self): + logs = self.__run(self.import_paths) + + self.assertEqual(len(logs), 3) + self.assertEqual(logs[1], self.import_files[0]) + self.assertEqual(logs[2], self.import_files[1]) + + def test_import_pretend_empty(self): + logs = self.__run([self.empty_path]) + self.assertEqual(len(logs), 2) self.assertEqual(logs[1], 'No files imported from {0}' - .format(displayable_path(path))) + .format(displayable_path(self.empty_path))) + + def test_import_singletons_pretend_detailed(self): + logs = self.__run(self.import_paths, detailed=True) + + self.assertEqual(len(logs), 3) + self.assertEqual(logs[1], 'Singleton: %s' % self.import_files[0]) + self.assertEqual(logs[2], 'Singleton: %s' % self.import_paths[1]) + + def test_import_album_pretend_detailed(self): + logs = self.__run(self.import_paths, singletons=False, detailed=True) + + self.assertEqual(len(logs), 5) + self.assertEqual(logs[1], + 'Album %s' % displayable_path(self.import_paths[0])) + self.assertEqual(logs[2], self.import_files[0]) + self.assertEqual(logs[3], + 'Album %s' % displayable_path(self.import_paths[1])) + self.assertEqual(logs[4], self.import_paths[1]) + + def test_import_pretend_empty_detailed(self): + logs = self.__run([self.empty_path], detailed=True) + + self.assertEqual(len(logs), 2) + self.assertEqual(logs[1], 'No files imported from {0}' + .format(displayable_path(self.empty_path))) def suite(): From 023c13d292ee3893d6846f4c00dacfc08e3361f3 Mon Sep 17 00:00:00 2001 From: Malte Ried Date: Mon, 29 Dec 2014 13:22:28 +0100 Subject: [PATCH 09/19] Updated the test code to ignore the "Sending event" log messages. --- test/test_importer.py | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/test/test_importer.py b/test/test_importer.py index 71059961d..5516d3100 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -1582,6 +1582,8 @@ class ImportPretendTest(_common.TestCase, ImportHelper): with capture_log() as logs: self.importer.run() + logs = [line for line in logs if not line.startswith('Sending event:')] + self.assertEqual(len(self.lib.items()), 0) self.assertEqual(len(self.lib.albums()), 0) @@ -1590,41 +1592,35 @@ class ImportPretendTest(_common.TestCase, ImportHelper): def test_import_pretend(self): logs = self.__run(self.import_paths) - self.assertEqual(len(logs), 3) - self.assertEqual(logs[1], self.import_files[0]) - self.assertEqual(logs[2], self.import_files[1]) + self.assertEqual(logs, self.import_files) def test_import_pretend_empty(self): logs = self.__run([self.empty_path]) - self.assertEqual(len(logs), 2) - self.assertEqual(logs[1], 'No files imported from {0}' - .format(displayable_path(self.empty_path))) + self.assertEqual(logs, ['No files imported from {0}' + .format(displayable_path(self.empty_path))]) def test_import_singletons_pretend_detailed(self): logs = self.__run(self.import_paths, detailed=True) - self.assertEqual(len(logs), 3) - self.assertEqual(logs[1], 'Singleton: %s' % self.import_files[0]) - self.assertEqual(logs[2], 'Singleton: %s' % self.import_paths[1]) + self.assertEqual(logs, [ + 'Singleton: %s' % self.import_files[0], + 'Singleton: %s' % self.import_paths[1]]) def test_import_album_pretend_detailed(self): logs = self.__run(self.import_paths, singletons=False, detailed=True) - self.assertEqual(len(logs), 5) - self.assertEqual(logs[1], - 'Album %s' % displayable_path(self.import_paths[0])) - self.assertEqual(logs[2], self.import_files[0]) - self.assertEqual(logs[3], - 'Album %s' % displayable_path(self.import_paths[1])) - self.assertEqual(logs[4], self.import_paths[1]) + self.assertEqual(logs, [ + 'Album %s' % displayable_path(self.import_paths[0]), + self.import_files[0], + 'Album %s' % displayable_path(self.import_paths[1]), + self.import_paths[1]]) def test_import_pretend_empty_detailed(self): logs = self.__run([self.empty_path], detailed=True) - self.assertEqual(len(logs), 2) - self.assertEqual(logs[1], 'No files imported from {0}' - .format(displayable_path(self.empty_path))) + self.assertEqual(logs, ['No files imported from {0}' + .format(displayable_path(self.empty_path))]) def suite(): From f2704461cfd8ba6fe781c608e48ef995616397a3 Mon Sep 17 00:00:00 2001 From: David Logie Date: Sun, 28 Dec 2014 18:39:34 +0000 Subject: [PATCH 10/19] Go back to a single lookup with specific Unicode characters replaced. --- beetsplug/lastgenre/__init__.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index e9dfe634c..c4ec46af1 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -254,17 +254,14 @@ class LastGenrePlugin(plugins.BeetsPlugin): if key in self._genre_cache: return self._genre_cache[key] else: - args_ascii = [] + args_replaced = [] for arg in args: for k, v in REPLACE.items(): arg = arg.replace(k, v) - args_ascii.append(arg) + args_replaced.append(arg) - for arglist in [args, args_ascii]: - genre = self.fetch_genre(method(*arglist)) - self._genre_cache[key] = genre - if genre: - break + genre = self.fetch_genre(method(*args_replaced)) + self._genre_cache[key] = genre return genre def fetch_album_genre(self, obj): From 08d5b9c13860d942df7570186915c87533164ea1 Mon Sep 17 00:00:00 2001 From: David Logie Date: Mon, 29 Dec 2014 19:45:33 +0000 Subject: [PATCH 11/19] Fix line length in docstring. --- beetsplug/lastgenre/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index c4ec46af1..9a6c99585 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -242,9 +242,10 @@ class LastGenrePlugin(plugins.BeetsPlugin): def _cached_lookup(self, entity, method, *args): """Get a genre based on the named entity using the callable `method` whose arguments are given in the sequence `args`. The genre lookup - is cached based on the entity name and the arguments. Before the lookup, - each argument is has some Unicode characters replaced with rough ASCII - equivalents in order to return better results from the Last.fm database. + is cached based on the entity name and the arguments. Before the + lookup, each argument is has some Unicode characters replaced with + rough ASCII equivalents in order to return better results from the + Last.fm database. """ # Shortcut if we're missing metadata. if any(not s for s in args): From fd94094c1b7eb3bf08883839b617b48b86dcfe72 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Tue, 30 Dec 2014 15:27:17 -0400 Subject: [PATCH 12/19] Changelog and name change for #1156 --- beetsplug/lastgenre/__init__.py | 16 ++++++++-------- docs/changelog.rst | 2 ++ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 9a6c99585..109aaae3c 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -239,7 +239,7 @@ class LastGenrePlugin(plugins.BeetsPlugin): # Cached entity lookups. - def _cached_lookup(self, entity, method, *args): + def _last_lookup(self, entity, method, *args): """Get a genre based on the named entity using the callable `method` whose arguments are given in the sequence `args`. The genre lookup is cached based on the entity name and the arguments. Before the @@ -268,25 +268,25 @@ class LastGenrePlugin(plugins.BeetsPlugin): def fetch_album_genre(self, obj): """Return the album genre for this Item or Album. """ - return self._cached_lookup(u'album', LASTFM.get_album, obj.albumartist, - obj.album) + return self._last_lookup(u'album', LASTFM.get_album, obj.albumartist, + obj.album) def fetch_album_artist_genre(self, obj): """Return the album artist genre for this Item or Album. """ - return self._cached_lookup(u'artist', LASTFM.get_artist, - obj.albumartist) + return self._last_lookup(u'artist', LASTFM.get_artist, + obj.albumartist) def fetch_artist_genre(self, item): """Returns the track artist genre for this Item. """ - return self._cached_lookup(u'artist', LASTFM.get_artist, item.artist) + return self._last_lookup(u'artist', LASTFM.get_artist, item.artist) def fetch_track_genre(self, obj): """Returns the track genre for this Item. """ - return self._cached_lookup(u'track', LASTFM.get_track, obj.artist, - obj.title) + return self._last_lookup(u'track', LASTFM.get_track, obj.artist, + obj.title) def _get_genre(self, obj): """Get the genre string for an Album or Item object based on diff --git a/docs/changelog.rst b/docs/changelog.rst index 8446646e5..583f51188 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -64,6 +64,8 @@ Fixed: the import process. Thanks to :user:`Freso`. :bug:`1176` :bug:`1172` * :doc:`/plugins/ftintitle`: Fix weird behavior when the same artist appears twice in the artist string. Thanks to Marc Addeo. :bug:`1179` :bug:`1181` +* :doc:`/plugins/lastgenre`: Match songs more robustly when they contain + dashes. Thanks to :user:`djl`. :bug:`1156` .. _API changes: http://developer.echonest.com/forums/thread/3650 .. _Plex: https://plex.tv/ From d4d5c085fafa08a1a8fc92aac6da6bb95a9e7159 Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Tue, 30 Dec 2014 23:37:23 +0100 Subject: [PATCH 13/19] lyrics : remove empty divs before scraping it may result in \n being inserted that we will strip in _scrape_strip_cruft --- beetsplug/lyrics.py | 14 +++++++------- test/test_lyrics.py | 9 ++++----- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 448399fe0..a2ebe7c36 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -90,7 +90,7 @@ def extract_text_between(html, start_marker, end_marker): html, _ = html.split(end_marker, 1) except ValueError: return u'' - return _scrape_strip_cruft(html, True) + return html def extract_text_in(html, starttag): @@ -124,8 +124,7 @@ def extract_text_in(html, starttag): else: print('no closing tag found!') return - lyrics = ''.join(parts) - return _scrape_strip_cruft(lyrics, True) + return u''.join(parts) def search_pairs(item): @@ -221,7 +220,7 @@ def fetch_lyricswiki(artist, title): if not html: return - lyrics = extract_text_in(html, "
") + lyrics = extract_text_in(html, u"
") if lyrics and 'Unfortunately, we are not licensed' not in lyrics: return lyrics @@ -360,13 +359,14 @@ def _scrape_strip_cruft(html, plain_text_out=False): html = COMMENT_RE.sub('', html) html = TAG_RE.sub('', html) - # Strip lines html = '\n'.join([x.strip() for x in html.strip().split('\n')]) + html = re.sub(r'\n{3,}', r'\n\n', html) return html def _scrape_merge_paragraphs(html): - return re.sub(r'

\s*]*)>', '\n', html) + html = re.sub(r'

\s*]*)>', '\n', html) + return re.sub(r'
\s*
', '\n', html) def scrape_lyrics_from_html(html): @@ -541,4 +541,4 @@ class LyricsPlugin(plugins.BeetsPlugin): if lyrics: log.debug(u'got lyrics from backend: {0}' .format(backend.__name__)) - return lyrics.strip() + return _scrape_strip_cruft(lyrics, True) diff --git a/test/test_lyrics.py b/test/test_lyrics.py index 605e4c6e7..9374f99f5 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -319,12 +319,11 @@ class LyricsGooglePluginTest(unittest.TestCase): for (fun, s) in zip([lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom, lyrics.fetch_musixmatch], DEFAULT_SOURCES): - if os.path.isfile(url_to_filename( - s['url'] + s['path'])): + url = s['url'] + s['path'] + if os.path.isfile(url_to_filename(url)): res = fun(s['artist'], s['title']) - self.assertTrue(lyrics.is_lyrics(res)) - self.assertTrue(is_lyrics_content_ok( - s['title'], res)) + self.assertTrue(lyrics.is_lyrics(res), url) + self.assertTrue(is_lyrics_content_ok(s['title'], res), url) def test_is_page_candidate_exact_match(self): """Test matching html page title with song infos -- when song infos are From c34e718ab6218bc80293584e1eb0fb7c9a716e84 Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Wed, 31 Dec 2014 06:15:41 +0100 Subject: [PATCH 14/19] lyrics: introduce empty div in mocked lyrics page --- test/rsrc/lyrics/examplecom/beetssong.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/rsrc/lyrics/examplecom/beetssong.txt b/test/rsrc/lyrics/examplecom/beetssong.txt index ad97db206..3bba9f702 100644 --- a/test/rsrc/lyrics/examplecom/beetssong.txt +++ b/test/rsrc/lyrics/examplecom/beetssong.txt @@ -222,7 +222,9 @@ e9.size = "120x600, 160x600";

John Doe
beets song lyrics

Ringtones left icon Send "beets song" Ringtone to your Cell Ringtones right icon

Beets is the media library management system for obsessive-compulsive music geeks.
The purpose of beets is to get your music collection right once and for all. It catalogs your collection, automatically improving its metadata as it goes. It then provides a bouquet of tools for manipulating and accessing your music.
-Here's an example of beets' brainy tag corrector doing its thing:
+
+Here's an example of beets' brainy tag corrector doing its thing: +Because beets is designed as a library, it can do almost anything you can imagine for your music collection. Via plugins, beets becomes a panacea Ringtones left icon Send "beets song" Ringtone to your Cell Ringtones right icon

Share beets song lyrics

From 38b890cdd6d0187dba8244c9607d242757e77a8d Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Wed, 31 Dec 2014 06:16:06 +0100 Subject: [PATCH 15/19] lyrics: add test checking scraping of mocked page --- test/rsrc/lyricstext.yaml | 5 +++++ test/test_lyrics.py | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/test/rsrc/lyricstext.yaml b/test/rsrc/lyricstext.yaml index 70ce384fb..814c207df 100644 --- a/test/rsrc/lyricstext.yaml +++ b/test/rsrc/lyricstext.yaml @@ -1,3 +1,8 @@ +Beets_song: + - geeks + - bouquet + - panacea + Amsterdam: - oriflammes - fortune diff --git a/test/test_lyrics.py b/test/test_lyrics.py index 9374f99f5..492229630 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -300,6 +300,15 @@ class LyricsGooglePluginTest(unittest.TestCase): lyrics.LyricsPlugin() lyrics.fetch_url = MockFetchUrl() + def test_mocked_source_ok(self): + """Test that lyrics of the mocked page are correctly scraped""" + url = self.source['url'] + self.source['path'] + if os.path.isfile(url_to_filename(url)): + res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) + self.assertTrue(lyrics.is_lyrics(res), url) + self.assertTrue(is_lyrics_content_ok(self.source['title'], res), + url) + def test_google_sources_ok(self): """Test if lyrics present on websites registered in beets google custom search engine are correctly scraped.""" From 650696a5deb805794fe24297cc2914226490ed93 Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Wed, 31 Dec 2014 06:23:39 +0100 Subject: [PATCH 16/19] edit changelog --- docs/changelog.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 583f51188..9d8bd00af 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -30,6 +30,8 @@ New: Fixed: +* :doc:`/plugins/lyrics`: Avoid fetching truncated lyrics from the Google +backed by merging text blocks separated by empty ``
`` before scraping. * Fix a new crash with the latest version of Mutagen (1.26). * We now print a better error message when the database file is corrupted. * :doc:`/plugins/discogs`: Only prompt for authentication when running the From e63a8c17a4a2a09d98a4d0886b1ce0df2b94a1b0 Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Wed, 31 Dec 2014 06:37:05 +0100 Subject: [PATCH 17/19] fix changelog --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9d8bd00af..0446300cc 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -31,7 +31,7 @@ New: Fixed: * :doc:`/plugins/lyrics`: Avoid fetching truncated lyrics from the Google -backed by merging text blocks separated by empty ``
`` before scraping. + backed by merging text blocks separated by empty ``
`` before scraping. * Fix a new crash with the latest version of Mutagen (1.26). * We now print a better error message when the database file is corrupted. * :doc:`/plugins/discogs`: Only prompt for authentication when running the From cc82e1cb43d54ecfa224673f3f342cfef69c836f Mon Sep 17 00:00:00 2001 From: Malte Ried Date: Wed, 31 Dec 2014 11:30:46 +0100 Subject: [PATCH 18/19] Made the detailed output the default behaviour. --- beets/config_default.yaml | 1 - beets/importer.py | 11 +++-------- beets/ui/commands.py | 4 ---- docs/reference/cli.rst | 4 ---- test/test_importer.py | 25 ++++++------------------- 5 files changed, 9 insertions(+), 36 deletions(-) diff --git a/beets/config_default.yaml b/beets/config_default.yaml index ae0882542..78f16d051 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -22,7 +22,6 @@ import: flat: no group_albums: no pretend: false - detailed: false clutter: ["Thumbs.DB", ".DS_Store"] ignore: [".*", "*~", "System Volume Information"] diff --git a/beets/importer.py b/beets/importer.py index 632eda30c..6bf0e91a9 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1299,16 +1299,11 @@ def manipulate_files(session, task): def log_files(session, task): """A coroutine (pipeline stage) to log each file which will be imported """ - detailed = config['import']['detailed'].get() if isinstance(task, SingletonImportTask): - if detailed: - log.info( - 'Singleton: {0}'.format(displayable_path(task.item['path']))) - else: - log.info(displayable_path(task.item['path'])) + log.info( + 'Singleton: {0}'.format(displayable_path(task.item['path']))) elif task.items: - if detailed: - log.info('Album {0}'.format(displayable_path(task.paths[0]))) + log.info('Album {0}'.format(displayable_path(task.paths[0]))) for item in task.items: log.info(displayable_path(item['path'])) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 8f369a213..546fe87d9 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -947,10 +947,6 @@ import_cmd.parser.add_option( '--pretend', dest='pretend', action='store_true', help='just print the files to import' ) -import_cmd.parser.add_option( - '--detailed', dest='detailed', action='store_true', - help='use in conjunction with --pretend to get sophisticated output' -) import_cmd.func = import_func default_commands.append(import_cmd) diff --git a/docs/reference/cli.rst b/docs/reference/cli.rst index baee5423b..e569c073c 100644 --- a/docs/reference/cli.rst +++ b/docs/reference/cli.rst @@ -132,10 +132,6 @@ Optional command flags: option. If set, beets will just print a list of files that it would otherwise import. -* To get a more detailed output with the ``--pretend`` option, you can use the - ``--detailed`` flag. This way you can see which tracks beets think should - belong to an album. - .. _rarfile: https://pypi.python.org/pypi/rarfile/2.2 .. only:: html diff --git a/test/test_importer.py b/test/test_importer.py index 5516d3100..3b55bf8c5 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -1573,9 +1573,7 @@ class ImportPretendTest(_common.TestCase, ImportHelper): os.makedirs(path) self.empty_path = path - def __run(self, import_paths, singletons=True, detailed=False): - config['import']['detailed'] = detailed - + def __run(self, import_paths, singletons=True): self._setup_import_session(singletons=singletons) self.importer.paths = import_paths @@ -1589,26 +1587,15 @@ class ImportPretendTest(_common.TestCase, ImportHelper): return logs - def test_import_pretend(self): + def test_import_singletons_pretend(self): logs = self.__run(self.import_paths) - self.assertEqual(logs, self.import_files) - - def test_import_pretend_empty(self): - logs = self.__run([self.empty_path]) - - self.assertEqual(logs, ['No files imported from {0}' - .format(displayable_path(self.empty_path))]) - - def test_import_singletons_pretend_detailed(self): - logs = self.__run(self.import_paths, detailed=True) - self.assertEqual(logs, [ 'Singleton: %s' % self.import_files[0], 'Singleton: %s' % self.import_paths[1]]) - def test_import_album_pretend_detailed(self): - logs = self.__run(self.import_paths, singletons=False, detailed=True) + def test_import_album_pretend(self): + logs = self.__run(self.import_paths, singletons=False) self.assertEqual(logs, [ 'Album %s' % displayable_path(self.import_paths[0]), @@ -1616,8 +1603,8 @@ class ImportPretendTest(_common.TestCase, ImportHelper): 'Album %s' % displayable_path(self.import_paths[1]), self.import_paths[1]]) - def test_import_pretend_empty_detailed(self): - logs = self.__run([self.empty_path], detailed=True) + def test_import_pretend_empty(self): + logs = self.__run([self.empty_path]) self.assertEqual(logs, ['No files imported from {0}' .format(displayable_path(self.empty_path))]) From bee0a5b9fe1d9c47e837ed408b5401bd9b9b945f Mon Sep 17 00:00:00 2001 From: Malte Ried Date: Wed, 31 Dec 2014 11:38:03 +0100 Subject: [PATCH 19/19] Album tracks are prepended by two spaces to indent them a bit. --- beets/importer.py | 2 +- test/test_importer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/beets/importer.py b/beets/importer.py index 6bf0e91a9..4a7bd997f 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -1305,7 +1305,7 @@ def log_files(session, task): elif task.items: log.info('Album {0}'.format(displayable_path(task.paths[0]))) for item in task.items: - log.info(displayable_path(item['path'])) + log.info(' {0}'.format(displayable_path(item['path']))) def group_albums(session): diff --git a/test/test_importer.py b/test/test_importer.py index 3b55bf8c5..89a65b74f 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -1599,9 +1599,9 @@ class ImportPretendTest(_common.TestCase, ImportHelper): self.assertEqual(logs, [ 'Album %s' % displayable_path(self.import_paths[0]), - self.import_files[0], + ' %s' % self.import_files[0], 'Album %s' % displayable_path(self.import_paths[1]), - self.import_paths[1]]) + ' %s' % self.import_paths[1]]) def test_import_pretend_empty(self): logs = self.__run([self.empty_path])