mirror of
https://github.com/beetbox/beets.git
synced 2026-02-17 04:43:40 +01:00
Merge pull request #3883 from GrahamCobb/bareasc
Experimental "bare-ASCII" matching query
This commit is contained in:
commit
39cb013bd3
5 changed files with 315 additions and 0 deletions
85
beetsplug/bareasc.py
Normal file
85
beetsplug/bareasc.py
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This file is part of beets.
|
||||
# Copyright 2016, Philippe Mongeau.
|
||||
# Copyright 2021, Graham R. Cobb.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and ascociated documentation files (the
|
||||
# "Software"), to deal in the Software without restriction, including
|
||||
# without limitation the rights to use, copy, modify, merge, publish,
|
||||
# distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so, subject to
|
||||
# the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# This module is adapted from Fuzzy in accordance to the licence of
|
||||
# that module
|
||||
|
||||
"""Provides a bare-ASCII matching query."""
|
||||
|
||||
from __future__ import division, absolute_import, print_function
|
||||
|
||||
from beets import ui
|
||||
from beets.ui import print_, decargs
|
||||
from beets.plugins import BeetsPlugin
|
||||
from beets.dbcore.query import StringFieldQuery
|
||||
from unidecode import unidecode
|
||||
import six
|
||||
|
||||
|
||||
class BareascQuery(StringFieldQuery):
|
||||
"""Compare items using bare ASCII, without accents etc."""
|
||||
@classmethod
|
||||
def string_match(cls, pattern, val):
|
||||
"""Convert both pattern and string to plain ASCII before matching.
|
||||
|
||||
If pattern is all lower case, also convert string to lower case so
|
||||
match is also case insensitive
|
||||
"""
|
||||
# smartcase
|
||||
if pattern.islower():
|
||||
val = val.lower()
|
||||
pattern = unidecode(pattern)
|
||||
val = unidecode(val)
|
||||
return pattern in val
|
||||
|
||||
|
||||
class BareascPlugin(BeetsPlugin):
|
||||
"""Plugin to provide bare-ASCII option for beets matching."""
|
||||
def __init__(self):
|
||||
"""Default prefix for selecting bare-ASCII matching is #."""
|
||||
super(BareascPlugin, self).__init__()
|
||||
self.config.add({
|
||||
'prefix': '#',
|
||||
})
|
||||
|
||||
def queries(self):
|
||||
"""Register bare-ASCII matching."""
|
||||
prefix = self.config['prefix'].as_str()
|
||||
return {prefix: BareascQuery}
|
||||
|
||||
def commands(self):
|
||||
"""Add bareasc command as unidecode version of 'list'."""
|
||||
cmd = ui.Subcommand('bareasc',
|
||||
help='unidecode version of beet list command')
|
||||
cmd.parser.usage += u"\n" \
|
||||
u'Example: %prog -f \'$album: $title\' artist:beatles'
|
||||
cmd.parser.add_all_common_options()
|
||||
cmd.func = self.unidecode_list
|
||||
return [cmd]
|
||||
|
||||
def unidecode_list(self, lib, opts, args):
|
||||
"""Emulate normal 'list' command but with unidecode output."""
|
||||
query = decargs(args)
|
||||
album = opts.album
|
||||
# Copied from commands.py - list_items
|
||||
if album:
|
||||
for album in lib.albums(query):
|
||||
bare = unidecode(six.ensure_text(str(album)))
|
||||
print_(six.ensure_text(bare))
|
||||
else:
|
||||
for item in lib.items(query):
|
||||
bare = unidecode(six.ensure_text(str(item)))
|
||||
print_(six.ensure_text(bare))
|
||||
|
|
@ -38,6 +38,11 @@ Major new features:
|
|||
`Deezer`_ database.
|
||||
Thanks to :user:`rhlahuja`.
|
||||
:bug:`3355`
|
||||
* A new :doc:`/plugins/bareasc` provides a new query type: `bare ASCII`
|
||||
which ignores accented characters, treating them as though they
|
||||
were the base ASCII character. To perform `bare ASCII` searches, use
|
||||
the ``#`` prefix with :ref:`list-cmd` or other commands.
|
||||
:bug:`3882`
|
||||
|
||||
Other new things:
|
||||
|
||||
|
|
|
|||
69
docs/plugins/bareasc.rst
Normal file
69
docs/plugins/bareasc.rst
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
Bare-ASCII Search Plugin
|
||||
========================
|
||||
|
||||
The ``bareasc`` plugin provides a prefixed query that searches your library using
|
||||
simple ASCII character matching, with accented characters folded to their base
|
||||
ASCII character. This can be useful if you want to find a track with accented
|
||||
characters in the title or artist, particularly if you are not confident
|
||||
you have the accents correct. It is also not unknown for the accents
|
||||
to not be correct in the database entry or wrong in the CD information.
|
||||
|
||||
First, enable the plugin named ``bareasc`` (see :ref:`using-plugins`).
|
||||
You'll then be able to use the ``#`` prefix to use bare-ASCII matching::
|
||||
|
||||
$ beet ls '#dvorak'
|
||||
István Kertész - REQUIEM - Dvořàk: Requiem, op.89 - Confutatis maledictis
|
||||
|
||||
Command
|
||||
-------
|
||||
|
||||
In addition to the query prefix, the plugin provides a utility ``bareasc`` command.
|
||||
This command is **exactly** the same as the ``beet list`` command except that
|
||||
the output is passed through the bare-ASCII transformation before being printed.
|
||||
This allows you to easily check what the library data looks like in bare ASCII,
|
||||
which can be useful if you are trying to work out why a query is not matching.
|
||||
|
||||
Using the same example track as above::
|
||||
|
||||
$ beet bareasc 'Dvořàk'
|
||||
Istvan Kertesz - REQUIEM - Dvorak: Requiem, op.89 - Confutatis maledictis
|
||||
|
||||
Note: the ``bareasc`` command does *not* automatically use bare-ASCII queries.
|
||||
If you want a bare-ASCII query you still need to specify the ``#`` prefix.
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
If the query string is all in lower case, the comparison ignores case as well as
|
||||
accents.
|
||||
|
||||
The default ``bareasc`` prefix (``#``) is used as a comment character in some shells
|
||||
so may need to be protected (for example in quotes) when typed into the command line.
|
||||
|
||||
The bare ASCII transliteration is quite simple. It may not give the expected output
|
||||
for all languages. For example, German u-umlaut ``ü`` is transformed into ASCII ``u``,
|
||||
not into ``ue``.
|
||||
|
||||
The bare ASCII transformation also changes Unicode punctuation like double quotes,
|
||||
apostrophes and even some hyphens. It is often best to leave out punctuation
|
||||
in the queries. Note that the punctuation changes are often not even visible
|
||||
with normal terminal fonts. You can always use the ``bareasc`` command to print the
|
||||
transformed entries and use a command like ``diff`` to compare with the output
|
||||
from the ``list`` command.
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
To configure the plugin, make a ``bareasc:`` section in your configuration
|
||||
file. The only available option is:
|
||||
|
||||
- **prefix**: The character used to designate bare-ASCII queries.
|
||||
Default: ``#``, which may need to be escaped in some shells.
|
||||
|
||||
Credits
|
||||
-------
|
||||
|
||||
The hard work in this plugin is done in Sean Burke's
|
||||
`Unidecode <https://pypi.org/project/Unidecode/>`__ library.
|
||||
Thanks are due to Sean and to all the people who created the Python
|
||||
version and the beets extensible query architecture.
|
||||
|
|
@ -63,6 +63,7 @@ following to your configuration::
|
|||
acousticbrainz
|
||||
aura
|
||||
badfiles
|
||||
bareasc
|
||||
beatport
|
||||
bpd
|
||||
bpm
|
||||
|
|
@ -218,6 +219,7 @@ Interoperability
|
|||
Miscellaneous
|
||||
-------------
|
||||
|
||||
* :doc:`bareasc`: Search albums and tracks with bare ASCII string matching.
|
||||
* :doc:`bpd`: A music player for your beets library that emulates `MPD`_ and is
|
||||
compatible with `MPD clients`_.
|
||||
* :doc:`convert`: Transcode music and embed album art while exporting to
|
||||
|
|
|
|||
154
test/test_bareasc.py
Normal file
154
test/test_bareasc.py
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This file is part of beets.
|
||||
# Copyright 2021, Graham R. Cobb.
|
||||
|
||||
"""Tests for the 'bareasc' plugin."""
|
||||
|
||||
from __future__ import division, absolute_import, print_function
|
||||
|
||||
import unittest
|
||||
|
||||
from test.helper import capture_stdout, TestHelper
|
||||
|
||||
from beets import logging
|
||||
|
||||
|
||||
class BareascPluginTest(unittest.TestCase, TestHelper):
|
||||
"""Test bare ASCII query matching."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment for bare ASCII query matching."""
|
||||
self.setup_beets()
|
||||
self.log = logging.getLogger('beets.web')
|
||||
self.config['bareasc']['prefix'] = u'#'
|
||||
self.load_plugins('bareasc')
|
||||
|
||||
# Add library elements. Note that self.lib.add overrides any "id=<n>"
|
||||
# and assigns the next free id number.
|
||||
self.add_item(title=u'with accents',
|
||||
album_id=2,
|
||||
artist=u'Antonín Dvořák')
|
||||
self.add_item(title=u'without accents',
|
||||
artist=u'Antonín Dvorak')
|
||||
self.add_item(title=u'with umlaut',
|
||||
album_id=2,
|
||||
artist=u'Brüggen')
|
||||
self.add_item(title=u'without umlaut or e',
|
||||
artist=u'Bruggen')
|
||||
self.add_item(title=u'without umlaut with e',
|
||||
artist=u'Brueggen')
|
||||
|
||||
def test_search_normal_noaccent(self):
|
||||
"""Normal search, no accents, not using bare-ASCII match.
|
||||
|
||||
Finds just the unaccented entry.
|
||||
"""
|
||||
items = self.lib.items(u'dvorak')
|
||||
|
||||
self.assertEqual(len(items), 1)
|
||||
self.assertEqual([items[0].title], [u'without accents'])
|
||||
|
||||
def test_search_normal_accent(self):
|
||||
"""Normal search, with accents, not using bare-ASCII match.
|
||||
|
||||
Finds just the accented entry.
|
||||
"""
|
||||
items = self.lib.items(u'dvořák')
|
||||
|
||||
self.assertEqual(len(items), 1)
|
||||
self.assertEqual([items[0].title], [u'with accents'])
|
||||
|
||||
def test_search_bareasc_noaccent(self):
|
||||
"""Bare-ASCII search, no accents.
|
||||
|
||||
Finds both entries.
|
||||
"""
|
||||
items = self.lib.items(u'#dvorak')
|
||||
|
||||
self.assertEqual(len(items), 2)
|
||||
self.assertEqual(
|
||||
{items[0].title, items[1].title},
|
||||
{u'without accents', u'with accents'}
|
||||
)
|
||||
|
||||
def test_search_bareasc_accent(self):
|
||||
"""Bare-ASCII search, with accents.
|
||||
|
||||
Finds both entries.
|
||||
"""
|
||||
items = self.lib.items(u'#dvořák')
|
||||
|
||||
self.assertEqual(len(items), 2)
|
||||
self.assertEqual(
|
||||
{items[0].title, items[1].title},
|
||||
{u'without accents', u'with accents'}
|
||||
)
|
||||
|
||||
def test_search_bareasc_wrong_accent(self):
|
||||
"""Bare-ASCII search, with incorrect accent.
|
||||
|
||||
Finds both entries.
|
||||
"""
|
||||
items = self.lib.items(u'#dvořäk')
|
||||
|
||||
self.assertEqual(len(items), 2)
|
||||
self.assertEqual(
|
||||
{items[0].title, items[1].title},
|
||||
{u'without accents', u'with accents'}
|
||||
)
|
||||
|
||||
def test_search_bareasc_noumlaut(self):
|
||||
"""Bare-ASCII search, with no umlaut.
|
||||
|
||||
Finds entry with 'u' not 'ue', although German speaker would
|
||||
normally replace ü with ue.
|
||||
|
||||
This is expected behaviour for this simple plugin.
|
||||
"""
|
||||
items = self.lib.items(u'#Bruggen')
|
||||
|
||||
self.assertEqual(len(items), 2)
|
||||
self.assertEqual(
|
||||
{items[0].title, items[1].title},
|
||||
{u'without umlaut or e', u'with umlaut'}
|
||||
)
|
||||
|
||||
def test_search_bareasc_umlaut(self):
|
||||
"""Bare-ASCII search, with umlaut.
|
||||
|
||||
Finds entry with 'u' not 'ue', although German speaker would
|
||||
normally replace ü with ue.
|
||||
|
||||
This is expected behaviour for this simple plugin.
|
||||
"""
|
||||
items = self.lib.items(u'#Brüggen')
|
||||
|
||||
self.assertEqual(len(items), 2)
|
||||
self.assertEqual(
|
||||
{items[0].title, items[1].title},
|
||||
{u'without umlaut or e', u'with umlaut'}
|
||||
)
|
||||
|
||||
def test_bareasc_list_output(self):
|
||||
"""Bare-ASCII version of list command - check output."""
|
||||
with capture_stdout() as output:
|
||||
self.run_command('bareasc', 'with accents')
|
||||
|
||||
self.assertIn('Antonin Dvorak', output.getvalue())
|
||||
|
||||
def test_bareasc_format_output(self):
|
||||
"""Bare-ASCII version of list -f command - check output."""
|
||||
with capture_stdout() as output:
|
||||
self.run_command('bareasc', 'with accents',
|
||||
'-f', '$artist:: $title')
|
||||
|
||||
self.assertEqual('Antonin Dvorak:: with accents\n',
|
||||
output.getvalue())
|
||||
|
||||
|
||||
def suite():
|
||||
"""loader."""
|
||||
return unittest.TestLoader().loadTestsFromName(__name__)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(defaultTest='suite')
|
||||
Loading…
Reference in a new issue