From 0c612f408b7b328c467eb8f29a9fd9c7325892ee Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Mon, 15 Mar 2021 00:55:14 +0000 Subject: [PATCH 01/15] Create plugin for "bare-ASCII" matching query Signed-off-by: Graham R. Cobb --- beetsplug/bareasc.py | 51 +++++++++++++++++++++++++ test/test_bareasc.py | 88 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 beetsplug/bareasc.py create mode 100644 test/test_bareasc.py diff --git a/beetsplug/bareasc.py b/beetsplug/bareasc.py new file mode 100644 index 000000000..9a23ac6d5 --- /dev/null +++ b/beetsplug/bareasc.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2016, Philippe Mongeau. +# Copyright 2021, Graham R. Cobb +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and ascociated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# This module is adapted from Fuzzy in accordance to the licence of +# that module + +"""Provides a bare-ASCII matching query. +""" + +from __future__ import division, absolute_import, print_function + +from beets.plugins import BeetsPlugin +from beets.dbcore.query import StringFieldQuery +from unidecode import unidecode + + +class BareascQuery(StringFieldQuery): + @classmethod + def string_match(cls, pattern, val): + print('In BareascQuery' + ' ' + pattern + ' ' + val) + # smartcase + if pattern.islower(): + val = val.lower() + pattern = unidecode(pattern) + val = unidecode(val) + return pattern == val + + +class BareascPlugin(BeetsPlugin): + def __init__(self): + super(BareascPlugin, self).__init__() + self.config.add({ + 'prefix': '#', + }) + + def queries(self): + prefix = self.config['prefix'].as_str() + return {prefix: BareascQuery} diff --git a/test/test_bareasc.py b/test/test_bareasc.py new file mode 100644 index 000000000..20aceac3f --- /dev/null +++ b/test/test_bareasc.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- + +"""Tests for the 'bareasc' plugin""" + +from __future__ import division, absolute_import, print_function + +import unittest + +from test.helper import TestHelper + +from beets import logging + + +class BareascPluginTest(unittest.TestCase, TestHelper): + + def setUp(self): + self.setup_beets() + self.log = logging.getLogger('beets.web') + self.config['bareasc']['prefix'] = u'#' + self.load_plugins('bareasc') + + # Add library elements. Note that self.lib.add overrides any "id=" + # and assigns the next free id number. + self.add_item(title=u'with accents', + album_id=2, + artist=u'dvořák') + self.add_item(title=u'without accents', + artist=u'dvorak') + self.add_item(title=u'with umlaut', + album_id=2, + artist=u'Brüggen') + self.add_item(title=u'without umlaut', + artist=u'Bruggen') + + def test_search_normal_noaccent(self): + items = self.lib.items('dvorak') + + self.assertEqual(len(items), 1) + self.assertEqual([items[0].title], [u'without accents']) + + def test_search_normal_accent(self): + items = self.lib.items('dvořák') + + self.assertEqual(len(items), 1) + self.assertEqual([items[0].title], [u'with accents']) + + def test_search_bareasc_noaccent(self): + items = self.lib.items('#dvorak') + + self.assertEqual(len(items), 2) + self.assertEqual( + {items[0].title, items[1].title}, + {u'without accents', u'with accents'} + ) + + def test_search_bareasc_accent(self): + items = self.lib.items('#dvořák') + + self.assertEqual(len(items), 2) + self.assertEqual( + {items[0].title, items[1].title}, + {u'without accents', u'with accents'} + ) + + def test_search_bareasc_noumlaut(self): + items = self.lib.items('#Bruggen') + + self.assertEqual(len(items), 2) + self.assertEqual( + {items[0].title, items[1].title}, + {u'without umlaut', u'with umlaut'} + ) + + def test_search_bareasc_umlaut(self): + items = self.lib.items('#Brüggen') + + self.assertEqual(len(items), 2) + self.assertEqual( + {items[0].title, items[1].title}, + {u'without umlaut', u'with umlaut'} + ) + + +def suite(): + return unittest.TestLoader().loadTestsFromName(__name__) + +if __name__ == '__main__': + unittest.main(defaultTest='suite') From 7dd1ee3fab34f6fafc7f9be83d50fbe905d38d65 Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Mon, 15 Mar 2021 01:09:50 +0000 Subject: [PATCH 02/15] Remove debugging print. Signed-off-by: Graham R. Cobb --- beetsplug/bareasc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/beetsplug/bareasc.py b/beetsplug/bareasc.py index 9a23ac6d5..1419a5ba6 100644 --- a/beetsplug/bareasc.py +++ b/beetsplug/bareasc.py @@ -30,7 +30,6 @@ from unidecode import unidecode class BareascQuery(StringFieldQuery): @classmethod def string_match(cls, pattern, val): - print('In BareascQuery' + ' ' + pattern + ' ' + val) # smartcase if pattern.islower(): val = val.lower() From 80048e7153b709b6980871055395c5e4eb2678eb Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Mon, 15 Mar 2021 09:11:17 +0000 Subject: [PATCH 03/15] Specify unicode, for python2 Signed-off-by: Graham R. Cobb --- test/test_bareasc.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_bareasc.py b/test/test_bareasc.py index 20aceac3f..6b2b9bee9 100644 --- a/test/test_bareasc.py +++ b/test/test_bareasc.py @@ -33,19 +33,19 @@ class BareascPluginTest(unittest.TestCase, TestHelper): artist=u'Bruggen') def test_search_normal_noaccent(self): - items = self.lib.items('dvorak') + items = self.lib.items(u'dvorak') self.assertEqual(len(items), 1) self.assertEqual([items[0].title], [u'without accents']) def test_search_normal_accent(self): - items = self.lib.items('dvořák') + items = self.lib.items(u'dvořák') self.assertEqual(len(items), 1) self.assertEqual([items[0].title], [u'with accents']) def test_search_bareasc_noaccent(self): - items = self.lib.items('#dvorak') + items = self.lib.items(u'#dvorak') self.assertEqual(len(items), 2) self.assertEqual( @@ -54,7 +54,7 @@ class BareascPluginTest(unittest.TestCase, TestHelper): ) def test_search_bareasc_accent(self): - items = self.lib.items('#dvořák') + items = self.lib.items(u'#dvořák') self.assertEqual(len(items), 2) self.assertEqual( @@ -63,7 +63,7 @@ class BareascPluginTest(unittest.TestCase, TestHelper): ) def test_search_bareasc_noumlaut(self): - items = self.lib.items('#Bruggen') + items = self.lib.items(u'#Bruggen') self.assertEqual(len(items), 2) self.assertEqual( @@ -72,7 +72,7 @@ class BareascPluginTest(unittest.TestCase, TestHelper): ) def test_search_bareasc_umlaut(self): - items = self.lib.items('#Brüggen') + items = self.lib.items(u'#Brüggen') self.assertEqual(len(items), 2) self.assertEqual( From dcbe622b7604aadf66d0a1f881ef1ee7a7651096 Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Mon, 15 Mar 2021 10:25:19 +0000 Subject: [PATCH 04/15] Oops, use `in` for matching! (It was late last night!) Signed-off-by: Graham R. Cobb --- beetsplug/bareasc.py | 8 ++++---- test/test_bareasc.py | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/beetsplug/bareasc.py b/beetsplug/bareasc.py index 1419a5ba6..f0c43fdfd 100644 --- a/beetsplug/bareasc.py +++ b/beetsplug/bareasc.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Philippe Mongeau. -# Copyright 2021, Graham R. Cobb +# Copyright 2021, Graham R. Cobb. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and ascociated documentation files (the @@ -17,8 +17,7 @@ # This module is adapted from Fuzzy in accordance to the licence of # that module -"""Provides a bare-ASCII matching query. -""" +"""Provides a bare-ASCII matching query.""" from __future__ import division, absolute_import, print_function @@ -28,6 +27,7 @@ from unidecode import unidecode class BareascQuery(StringFieldQuery): + """Matches items using bare ASCII, without accents etc.""" @classmethod def string_match(cls, pattern, val): # smartcase @@ -35,7 +35,7 @@ class BareascQuery(StringFieldQuery): val = val.lower() pattern = unidecode(pattern) val = unidecode(val) - return pattern == val + return pattern in val class BareascPlugin(BeetsPlugin): diff --git a/test/test_bareasc.py b/test/test_bareasc.py index 6b2b9bee9..4b004ff62 100644 --- a/test/test_bareasc.py +++ b/test/test_bareasc.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2021, Graham R. Cobb. -"""Tests for the 'bareasc' plugin""" +"""Tests for the 'bareasc' plugin.""" from __future__ import division, absolute_import, print_function @@ -23,9 +25,9 @@ class BareascPluginTest(unittest.TestCase, TestHelper): # and assigns the next free id number. self.add_item(title=u'with accents', album_id=2, - artist=u'dvořák') + artist=u'Antonín dvořák') self.add_item(title=u'without accents', - artist=u'dvorak') + artist=u'Antonín dvorak') self.add_item(title=u'with umlaut', album_id=2, artist=u'Brüggen') From fab7a27e9ff54aecb9c8565ac7b718cb965978a9 Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Mon, 15 Mar 2021 12:43:58 +0000 Subject: [PATCH 05/15] Add a couple more tests and make lint happy. Signed-off-by: Graham R. Cobb --- beetsplug/bareasc.py | 10 +++++++- test/test_bareasc.py | 54 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/beetsplug/bareasc.py b/beetsplug/bareasc.py index f0c43fdfd..bdae32284 100644 --- a/beetsplug/bareasc.py +++ b/beetsplug/bareasc.py @@ -27,9 +27,14 @@ from unidecode import unidecode class BareascQuery(StringFieldQuery): - """Matches items using bare ASCII, without accents etc.""" + """Compare items using bare ASCII, without accents etc.""" @classmethod def string_match(cls, pattern, val): + """Convert both pattern and string to plain ASCII before matching. + + If pattern is all lower case, also convert string to lower case so + match is also case insensitive + """ # smartcase if pattern.islower(): val = val.lower() @@ -39,12 +44,15 @@ class BareascQuery(StringFieldQuery): class BareascPlugin(BeetsPlugin): + """Plugin to provide bare-ASCII option for beets matching.""" def __init__(self): + """Default prefix for selecting bare-ASCII matching is #.""" super(BareascPlugin, self).__init__() self.config.add({ 'prefix': '#', }) def queries(self): + """Reguster bare-ASCII matching.""" prefix = self.config['prefix'].as_str() return {prefix: BareascQuery} diff --git a/test/test_bareasc.py b/test/test_bareasc.py index 4b004ff62..d49ea4448 100644 --- a/test/test_bareasc.py +++ b/test/test_bareasc.py @@ -14,8 +14,10 @@ from beets import logging class BareascPluginTest(unittest.TestCase, TestHelper): + """Test bare ASCII query matching.""" def setUp(self): + """Set up test environment for bare ASCII query matching.""" self.setup_beets() self.log = logging.getLogger('beets.web') self.config['bareasc']['prefix'] = u'#' @@ -31,22 +33,36 @@ class BareascPluginTest(unittest.TestCase, TestHelper): self.add_item(title=u'with umlaut', album_id=2, artist=u'Brüggen') - self.add_item(title=u'without umlaut', + self.add_item(title=u'without umlaut or e', artist=u'Bruggen') + self.add_item(title=u'without umlaut with e', + artist=u'Brueggen') def test_search_normal_noaccent(self): + """Normal search, no accents, not using bare-ASCII match. + + Finds just the unaccented entry. + """ items = self.lib.items(u'dvorak') self.assertEqual(len(items), 1) self.assertEqual([items[0].title], [u'without accents']) def test_search_normal_accent(self): + """Normal search, with accents, not using bare-ASCII match. + + Finds just the accented entry. + """ items = self.lib.items(u'dvořák') self.assertEqual(len(items), 1) self.assertEqual([items[0].title], [u'with accents']) def test_search_bareasc_noaccent(self): + """Bare-ASCII search, no accents. + + Finds both entries. + """ items = self.lib.items(u'#dvorak') self.assertEqual(len(items), 2) @@ -56,6 +72,10 @@ class BareascPluginTest(unittest.TestCase, TestHelper): ) def test_search_bareasc_accent(self): + """Bare-ASCII search, with accents. + + Finds both entries. + """ items = self.lib.items(u'#dvořák') self.assertEqual(len(items), 2) @@ -64,26 +84,54 @@ class BareascPluginTest(unittest.TestCase, TestHelper): {u'without accents', u'with accents'} ) + def test_search_bareasc_wrong_accent(self): + """Bare-ASCII search, with incorrect accent. + + Finds both entries. + """ + items = self.lib.items(u'#dvořäk') + + self.assertEqual(len(items), 2) + self.assertEqual( + {items[0].title, items[1].title}, + {u'without accents', u'with accents'} + ) + def test_search_bareasc_noumlaut(self): + """Bare-ASCII search, with no umlaut. + + Finds entry with 'u' not 'ue', although German speaker would + normally replace ü with ue. + + This is expected behaviour for this simple plugin. + """ items = self.lib.items(u'#Bruggen') self.assertEqual(len(items), 2) self.assertEqual( {items[0].title, items[1].title}, - {u'without umlaut', u'with umlaut'} + {u'without umlaut or e', u'with umlaut'} ) def test_search_bareasc_umlaut(self): + """Bare-ASCII search, with umlaut. + + Finds entry with 'u' not 'ue', although German speaker would + normally replace ü with ue. + + This is expected behaviour for this simple plugin. + """ items = self.lib.items(u'#Brüggen') self.assertEqual(len(items), 2) self.assertEqual( {items[0].title, items[1].title}, - {u'without umlaut', u'with umlaut'} + {u'without umlaut or e', u'with umlaut'} ) def suite(): + """loader.""" return unittest.TestLoader().loadTestsFromName(__name__) if __name__ == '__main__': From 341a0a0adf0b86769fa111aa32fdb860d8ba7769 Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Mon, 15 Mar 2021 15:34:58 +0000 Subject: [PATCH 06/15] Added documentation and changelog for bareasc. Signed-off-by: Graham R. Cobb --- docs/changelog.rst | 5 +++++ docs/plugins/bareasc.rst | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 docs/plugins/bareasc.rst diff --git a/docs/changelog.rst b/docs/changelog.rst index 7338282f5..07fa573f9 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -38,6 +38,11 @@ Major new features: `Deezer`_ database. Thanks to :user:`rhlahuja`. :bug:`3355` +* A new :doc:`/plugins/bareasc` provides a new query type: `bare ASCII` + which ignores accented characters, treating them as though they + were the base ASCII character. To perform `bare ASCII` searches, use + the ``#`` prefix with :ref:`list-cmd` or other commands. + :bug:`3882` Other new things: diff --git a/docs/plugins/bareasc.rst b/docs/plugins/bareasc.rst new file mode 100644 index 000000000..114b5d8f9 --- /dev/null +++ b/docs/plugins/bareasc.rst @@ -0,0 +1,37 @@ +Bare-ASCII Search Plugin +======================== + +The ``bareasc`` plugin provides a prefixed query that searches your library using +simple ASCII character matching, with accented characters folded to their base +ASCII character. This can be useful if you want to find a track with accented +characters in the title or artist, particularly if you are not confident +you have the accents correct. It is also not unknown for the accents +to not be correct in the database entry or wrong in the CD information. + +First, enable the plugin named ``bareasc`` (see :ref:`using-plugins`). +You'll then be able to use the ``#`` prefix to use bare-ASCII matching:: + + $ beet ls '#dvorak' + István Kertész - REQUIEM - Dvořàk: Requiem, op.89 - Confutatis maledictis + +Notes +----- + +If the query string is all in lower case, the comparison ignores case as well as +accents. + +The default ``bareasc`` prefix (``#``) is used as a comment character in some shells +so may need to be protected (for example in quotes) when typed into the command line. + +The bare ASCII transformation is quite simple. It may not work perfectly for all +languages and does not handle transformations which change the number of letters. +For example, German u-umlaut ``ü`` is transformed into ASCII ``u``, not into ``ue``. + +Configuration +------------- + +To configure the plugin, make a ``bareasc:`` section in your configuration +file. The only available option is: + +- **prefix**: The character used to designate bare-ASCII queries. + Default: ``#``, which may need to be escaped in some shells. From f4fccfa05c960d91472d01ceb58a470794707c80 Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Mon, 15 Mar 2021 15:53:14 +0000 Subject: [PATCH 07/15] Add documentation for bareasc to the plugins index. Signed-off-by: Graham R. Cobb --- docs/plugins/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst index ae14b8166..14dd5137b 100644 --- a/docs/plugins/index.rst +++ b/docs/plugins/index.rst @@ -63,6 +63,7 @@ following to your configuration:: acousticbrainz aura badfiles + bareasc beatport bpd bpm @@ -218,6 +219,7 @@ Interoperability Miscellaneous ------------- +* :doc:`bareasc`: Search albums and tracks with bare ASCII string matching. * :doc:`bpd`: A music player for your beets library that emulates `MPD`_ and is compatible with `MPD clients`_. * :doc:`convert`: Transcode music and embed album art while exporting to From 06b6b72e0eff1171c9eea1095af027a38aa0a96c Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Mon, 15 Mar 2021 22:52:14 +0000 Subject: [PATCH 08/15] Add credit to Unidecode library into bareasc docs. Signed-off-by: Graham R. Cobb --- docs/plugins/bareasc.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/plugins/bareasc.rst b/docs/plugins/bareasc.rst index 114b5d8f9..63aca2317 100644 --- a/docs/plugins/bareasc.rst +++ b/docs/plugins/bareasc.rst @@ -35,3 +35,10 @@ file. The only available option is: - **prefix**: The character used to designate bare-ASCII queries. Default: ``#``, which may need to be escaped in some shells. + +Credits +------- + +The hard work in this plugin is done in Sean Burke's Unidecode library. +Thanks are due to Sean and to all the people who created the Python +version and the beets extensible query architecture. From d1ec7b4b70f91ed5fa932db35716057a589911fa Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Tue, 16 Mar 2021 11:57:52 +0000 Subject: [PATCH 09/15] Add ``bareasc`` command to display entries with the unidecode transformation applied. Signed-off-by: Graham R. Cobb --- beetsplug/bareasc.py | 27 ++++++++++++++++++++++++++- docs/plugins/bareasc.rst | 30 +++++++++++++++++++++++++++--- test/test_bareasc.py | 13 ++++++++++--- 3 files changed, 63 insertions(+), 7 deletions(-) diff --git a/beetsplug/bareasc.py b/beetsplug/bareasc.py index bdae32284..138da8f82 100644 --- a/beetsplug/bareasc.py +++ b/beetsplug/bareasc.py @@ -21,6 +21,8 @@ from __future__ import division, absolute_import, print_function +from beets import ui +from beets.ui import print_, decargs from beets.plugins import BeetsPlugin from beets.dbcore.query import StringFieldQuery from unidecode import unidecode @@ -53,6 +55,29 @@ class BareascPlugin(BeetsPlugin): }) def queries(self): - """Reguster bare-ASCII matching.""" + """Register bare-ASCII matching.""" prefix = self.config['prefix'].as_str() return {prefix: BareascQuery} + + def commands(self): + """Add bareasc command as unidecode version of 'list'.""" + cmd = ui.Subcommand('bareasc', + help='unidecode version of beet list command') + cmd.parser.usage += u"\n" \ + u'Example: %prog -f \'$album: $title\' artist:beatles' + cmd.parser.add_all_common_options() + cmd.func = self.unidecode_list + return [cmd] + + def unidecode_list(self, lib, opts, args): + """Emulate normal 'list' command but with unidecode output.""" + query = decargs(args) + album = opts.album + fmt = u'' + # Copied from commands.py - list_items + if album: + for album in lib.albums(query): + print_(unidecode(format(album, fmt))) + else: + for item in lib.items(query): + print_(unidecode(format(item, fmt))) diff --git a/docs/plugins/bareasc.rst b/docs/plugins/bareasc.rst index 63aca2317..765c9a81a 100644 --- a/docs/plugins/bareasc.rst +++ b/docs/plugins/bareasc.rst @@ -14,6 +14,23 @@ You'll then be able to use the ``#`` prefix to use bare-ASCII matching:: $ beet ls '#dvorak' István Kertész - REQUIEM - Dvořàk: Requiem, op.89 - Confutatis maledictis +Command +------- + +In addition to the query prefix, the plugin provides a utility ``bareasc`` command. +This command is **exactly** the same as the ``beet list`` command except that +the output is passed through the bare-ASCII transformation before being printed. +This allows you to easily check what the library data looks like in bare ASCII, +which can be useful if you are trying to work out why a query is not matching. + +Using the same example track as above:: + + $ beet bareasc 'Dvořàk' + Istvan Kertesz - REQUIEM - Dvorak: Requiem, op.89 - Confutatis maledictis + +Note: the ``bareasc`` command does NOT automatically use bare-ASCII queries. +If you want a bare-ASCII query you still need to specify the ``#`` prefix. + Notes ----- @@ -23,9 +40,16 @@ accents. The default ``bareasc`` prefix (``#``) is used as a comment character in some shells so may need to be protected (for example in quotes) when typed into the command line. -The bare ASCII transformation is quite simple. It may not work perfectly for all -languages and does not handle transformations which change the number of letters. -For example, German u-umlaut ``ü`` is transformed into ASCII ``u``, not into ``ue``. +The bare ASCII transformation is quite simple. It may not give the expected output +for all languages. For example, German u-umlaut ``ü`` is transformed into ASCII ``u``, +not into ``ue``. + +The bare ASCII transformation also changes Unicode punctuation like double quotes, +apostrophes and even some hyphens. It is often best to leave out punctuation +in the queries. Note that the punctuation changes are often not even visible +with normal terminal fonts. You can always use the ``bareasc`` command to print the +transformed entries and use a command like ``diff`` to compare with the output +from the ``list`` command. Configuration ------------- diff --git a/test/test_bareasc.py b/test/test_bareasc.py index d49ea4448..abb42a730 100644 --- a/test/test_bareasc.py +++ b/test/test_bareasc.py @@ -8,7 +8,7 @@ from __future__ import division, absolute_import, print_function import unittest -from test.helper import TestHelper +from test.helper import capture_stdout, TestHelper from beets import logging @@ -27,9 +27,9 @@ class BareascPluginTest(unittest.TestCase, TestHelper): # and assigns the next free id number. self.add_item(title=u'with accents', album_id=2, - artist=u'Antonín dvořák') + artist=u'Antonín Dvořák') self.add_item(title=u'without accents', - artist=u'Antonín dvorak') + artist=u'Antonín Dvorak') self.add_item(title=u'with umlaut', album_id=2, artist=u'Brüggen') @@ -129,6 +129,13 @@ class BareascPluginTest(unittest.TestCase, TestHelper): {u'without umlaut or e', u'with umlaut'} ) + def test_bareasc_list_output(self): + """Bare-ASCII version of list command - check output.""" + with capture_stdout() as output: + self.run_command('bareasc', 'with accents') + + self.assertIn('Antonin Dvorak', output.getvalue()) + def suite(): """loader.""" From 0078b02085047eb8e2d95364212f6a4c35d69a82 Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Tue, 16 Mar 2021 14:50:51 +0000 Subject: [PATCH 10/15] Python2 support for bareasc command Signed-off-by: Graham R. Cobb --- beetsplug/bareasc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beetsplug/bareasc.py b/beetsplug/bareasc.py index 138da8f82..43d83005a 100644 --- a/beetsplug/bareasc.py +++ b/beetsplug/bareasc.py @@ -26,6 +26,7 @@ from beets.ui import print_, decargs from beets.plugins import BeetsPlugin from beets.dbcore.query import StringFieldQuery from unidecode import unidecode +import six class BareascQuery(StringFieldQuery): @@ -80,4 +81,4 @@ class BareascPlugin(BeetsPlugin): print_(unidecode(format(album, fmt))) else: for item in lib.items(query): - print_(unidecode(format(item, fmt))) + print_(six.ensure_str(unidecode(format(item, fmt)))) From 2df3765521c334de16b5a9203d0d409878bcc8be Mon Sep 17 00:00:00 2001 From: Graham Cobb Date: Tue, 16 Mar 2021 15:29:35 +0000 Subject: [PATCH 11/15] Tidy doc as suggested by @sampsyo Co-authored-by: Adrian Sampson --- docs/plugins/bareasc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugins/bareasc.rst b/docs/plugins/bareasc.rst index 765c9a81a..9be102e27 100644 --- a/docs/plugins/bareasc.rst +++ b/docs/plugins/bareasc.rst @@ -28,7 +28,7 @@ Using the same example track as above:: $ beet bareasc 'Dvořàk' Istvan Kertesz - REQUIEM - Dvorak: Requiem, op.89 - Confutatis maledictis -Note: the ``bareasc`` command does NOT automatically use bare-ASCII queries. +Note: the ``bareasc`` command does *not* automatically use bare-ASCII queries. If you want a bare-ASCII query you still need to specify the ``#`` prefix. Notes From 4b9c9d0a5fd551fdcd52737cd88e2aa09b89665a Mon Sep 17 00:00:00 2001 From: Graham Cobb Date: Tue, 16 Mar 2021 15:30:43 +0000 Subject: [PATCH 12/15] doc improvement as suggested by @sampsyo Co-authored-by: Adrian Sampson --- docs/plugins/bareasc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugins/bareasc.rst b/docs/plugins/bareasc.rst index 9be102e27..a22063805 100644 --- a/docs/plugins/bareasc.rst +++ b/docs/plugins/bareasc.rst @@ -40,7 +40,7 @@ accents. The default ``bareasc`` prefix (``#``) is used as a comment character in some shells so may need to be protected (for example in quotes) when typed into the command line. -The bare ASCII transformation is quite simple. It may not give the expected output +The bare ASCII transliteration is quite simple. It may not give the expected output for all languages. For example, German u-umlaut ``ü`` is transformed into ASCII ``u``, not into ``ue``. From cad2c055c5e1e595b76e013598fe454caa957bf2 Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Tue, 16 Mar 2021 16:29:57 +0000 Subject: [PATCH 13/15] Make unicode handling explicit, to support python 2 and 3. Add link to Unidecode library in docs. Signed-off-by: Graham R. Cobb --- beetsplug/bareasc.py | 6 ++++-- docs/plugins/bareasc.rst | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/beetsplug/bareasc.py b/beetsplug/bareasc.py index 43d83005a..d17cc4ac1 100644 --- a/beetsplug/bareasc.py +++ b/beetsplug/bareasc.py @@ -78,7 +78,9 @@ class BareascPlugin(BeetsPlugin): # Copied from commands.py - list_items if album: for album in lib.albums(query): - print_(unidecode(format(album, fmt))) + bare = unidecode(six.ensure_text(format(album, fmt))) + print_(six.ensure_str(bare)) else: for item in lib.items(query): - print_(six.ensure_str(unidecode(format(item, fmt)))) + bare = unidecode(six.ensure_text(format(item, fmt))) + print_(six.ensure_str(bare)) diff --git a/docs/plugins/bareasc.rst b/docs/plugins/bareasc.rst index a22063805..0c8d6636c 100644 --- a/docs/plugins/bareasc.rst +++ b/docs/plugins/bareasc.rst @@ -63,6 +63,7 @@ file. The only available option is: Credits ------- -The hard work in this plugin is done in Sean Burke's Unidecode library. +The hard work in this plugin is done in Sean Burke's +`Unidecode `__ library. Thanks are due to Sean and to all the people who created the Python version and the beets extensible query architecture. From b0110fa224e2af73125edb88a8b6cce0d6da5b12 Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Tue, 16 Mar 2021 16:40:43 +0000 Subject: [PATCH 14/15] Try again to fix unidecode_list for python2 support Signed-off-by: Graham R. Cobb --- beetsplug/bareasc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beetsplug/bareasc.py b/beetsplug/bareasc.py index d17cc4ac1..2bdad98fc 100644 --- a/beetsplug/bareasc.py +++ b/beetsplug/bareasc.py @@ -79,8 +79,8 @@ class BareascPlugin(BeetsPlugin): if album: for album in lib.albums(query): bare = unidecode(six.ensure_text(format(album, fmt))) - print_(six.ensure_str(bare)) + print_(six.ensure_text(bare)) else: for item in lib.items(query): bare = unidecode(six.ensure_text(format(item, fmt))) - print_(six.ensure_str(bare)) + print_(six.ensure_text(bare)) From c3485b5b0415fb4e4bdf64b1f0bf6cf0840c4567 Mon Sep 17 00:00:00 2001 From: "Graham R. Cobb" Date: Tue, 16 Mar 2021 22:36:42 +0000 Subject: [PATCH 15/15] Remove unnecessary call to format. Signed-off-by: Graham R. Cobb --- beetsplug/bareasc.py | 5 ++--- test/test_bareasc.py | 9 +++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/beetsplug/bareasc.py b/beetsplug/bareasc.py index 2bdad98fc..4d574c756 100644 --- a/beetsplug/bareasc.py +++ b/beetsplug/bareasc.py @@ -74,13 +74,12 @@ class BareascPlugin(BeetsPlugin): """Emulate normal 'list' command but with unidecode output.""" query = decargs(args) album = opts.album - fmt = u'' # Copied from commands.py - list_items if album: for album in lib.albums(query): - bare = unidecode(six.ensure_text(format(album, fmt))) + bare = unidecode(six.ensure_text(str(album))) print_(six.ensure_text(bare)) else: for item in lib.items(query): - bare = unidecode(six.ensure_text(format(item, fmt))) + bare = unidecode(six.ensure_text(str(item))) print_(six.ensure_text(bare)) diff --git a/test/test_bareasc.py b/test/test_bareasc.py index abb42a730..1ce4e6176 100644 --- a/test/test_bareasc.py +++ b/test/test_bareasc.py @@ -136,6 +136,15 @@ class BareascPluginTest(unittest.TestCase, TestHelper): self.assertIn('Antonin Dvorak', output.getvalue()) + def test_bareasc_format_output(self): + """Bare-ASCII version of list -f command - check output.""" + with capture_stdout() as output: + self.run_command('bareasc', 'with accents', + '-f', '$artist:: $title') + + self.assertEqual('Antonin Dvorak:: with accents\n', + output.getvalue()) + def suite(): """loader."""