mirror of
https://github.com/beetbox/beets.git
synced 2025-12-24 01:25:47 +01:00
cleanup and docs for regular expression queries
This commit is contained in:
parent
a6e6da245a
commit
104aec3c2f
4 changed files with 85 additions and 51 deletions
|
|
@ -425,7 +425,7 @@ class RegexpQuery(FieldQuery):
|
|||
|
||||
def match(self, item):
|
||||
value = getattr(item, self.field) or ''
|
||||
return self.regexp.match(value) is not None
|
||||
return self.regexp.search(value) is not None
|
||||
|
||||
class BooleanQuery(MatchQuery):
|
||||
"""Matches a boolean field. Pattern should either be a boolean or a
|
||||
|
|
@ -476,24 +476,28 @@ class CollectionQuery(Query):
|
|||
subvals += subq_subvals
|
||||
clause = (' ' + joiner + ' ').join(clause_parts)
|
||||
return clause, subvals
|
||||
|
||||
# regular expression for _parse_query_part, below
|
||||
_pq_regex = re.compile(# non-grouping optional segment for the keyword
|
||||
r'(?:'
|
||||
r'(\S+?)' # the keyword
|
||||
r'(?<!\\):' # unescaped :
|
||||
r')?'
|
||||
r'((?<!\\):?)' # unescaped : for regexps
|
||||
r'(.+)', # the term itself
|
||||
re.I) # case-insensitive
|
||||
|
||||
# Regular expression for _parse_query_part, below.
|
||||
_pq_regex = re.compile(
|
||||
# Non-capturing optional segment for the keyword.
|
||||
r'(?:'
|
||||
r'(\S+?)' # The field key.
|
||||
r'(?<!\\):' # Unescaped :
|
||||
r')?'
|
||||
|
||||
r'((?<!\\):?)' # Unescaped : indicating a regex.
|
||||
r'(.+)', # The term itself.
|
||||
|
||||
re.I # Case-insensitive.
|
||||
)
|
||||
@classmethod
|
||||
def _parse_query_part(cls, part):
|
||||
"""Takes a query in the form of a key/value pair separated by a
|
||||
colon. An additional colon before the value indicates that the
|
||||
value is a regular expression.
|
||||
Returns tuple (key, term, is_regexp) where key is None if
|
||||
the search term has no key and is_regexp indicates whether term
|
||||
is a regular expression or not.
|
||||
value is a regular expression. Returns tuple (key, term,
|
||||
is_regexp) where key is None if the search term has no key and
|
||||
is_regexp indicates whether term is a regular expression or an
|
||||
ordinary substring match.
|
||||
|
||||
For instance,
|
||||
parse_query('stapler') == (None, 'stapler', false)
|
||||
|
|
@ -507,10 +511,15 @@ class CollectionQuery(Query):
|
|||
part = part.strip()
|
||||
match = cls._pq_regex.match(part)
|
||||
if match:
|
||||
return match.group(1), match.group(3).replace(r'\:', ':'), match.group(2)==':'
|
||||
return (
|
||||
match.group(1), # Key.
|
||||
match.group(3).replace(r'\:', ':'), # Term.
|
||||
match.group(2) == ':', # Regular expression.
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_strings(cls, query_parts, default_fields=None, all_keys=ITEM_KEYS):
|
||||
def from_strings(cls, query_parts, default_fields=None,
|
||||
all_keys=ITEM_KEYS):
|
||||
"""Creates a query from a list of strings in the format used by
|
||||
_parse_query_part. If default_fields are specified, they are the
|
||||
fields to be searched by unqualified search terms. Otherwise,
|
||||
|
|
@ -522,30 +531,40 @@ class CollectionQuery(Query):
|
|||
if not res:
|
||||
continue
|
||||
key, pattern, is_regexp = res
|
||||
if key is None: # No key specified.
|
||||
|
||||
# No key specified.
|
||||
if key is None:
|
||||
if os.sep in pattern and 'path' in all_keys:
|
||||
# This looks like a path.
|
||||
subqueries.append(PathQuery(pattern))
|
||||
else:
|
||||
# Match any field.
|
||||
if is_regexp:
|
||||
subqueries.append(
|
||||
AnyRegexpQuery(pattern, default_fields))
|
||||
subq = AnyRegexpQuery(pattern, default_fields)
|
||||
else:
|
||||
subqueries.append(
|
||||
AnySubstringQuery(pattern, default_fields))
|
||||
elif key.lower() == 'comp': # a boolean field
|
||||
subq = AnySubstringQuery(pattern, default_fields)
|
||||
subqueries.append(subq)
|
||||
|
||||
# A boolean field.
|
||||
elif key.lower() == 'comp':
|
||||
subqueries.append(BooleanQuery(key.lower(), pattern))
|
||||
|
||||
# Path field.
|
||||
elif key.lower() == 'path' and 'path' in all_keys:
|
||||
subqueries.append(PathQuery(pattern))
|
||||
elif key.lower() in all_keys: # ignore unrecognized keys
|
||||
|
||||
# Other (recognized) field.
|
||||
elif key.lower() in all_keys:
|
||||
if is_regexp:
|
||||
subqueries.append(RegexpQuery(key.lower(), pattern))
|
||||
else:
|
||||
subqueries.append(SubstringQuery(key.lower(), pattern))
|
||||
|
||||
# Singleton query (not a real field).
|
||||
elif key.lower() == 'singleton':
|
||||
subqueries.append(SingletonQuery(util.str2bool(pattern)))
|
||||
if not subqueries: # no terms in query
|
||||
|
||||
if not subqueries: # No terms in query.
|
||||
subqueries = [TrueQuery()]
|
||||
return cls(subqueries)
|
||||
|
||||
|
|
@ -890,19 +909,21 @@ class Library(BaseLibrary):
|
|||
|
||||
self.timeout = timeout
|
||||
self.conn = sqlite3.connect(self.path, timeout)
|
||||
# This way we can access our SELECT results like dictionaries.
|
||||
self.conn.row_factory = sqlite3.Row
|
||||
# this way we can access our SELECT results like dictionaries
|
||||
|
||||
# Add REGEXP function to SQLite queries.
|
||||
def regexp(expr, item):
|
||||
if item == None:
|
||||
def regexp(expr, val):
|
||||
if val is None or expr is None:
|
||||
return False
|
||||
if not isinstance(val, basestring):
|
||||
val = unicode(val)
|
||||
try:
|
||||
reg = re.compile(expr)
|
||||
res = reg.search(item)
|
||||
return res is not None
|
||||
except:
|
||||
res = re.search(expr, val)
|
||||
except re.error:
|
||||
# Invalid regular expression.
|
||||
return False
|
||||
return res is not None
|
||||
self.conn.create_function("REGEXP", 2, regexp)
|
||||
|
||||
self._make_table('items', item_fields)
|
||||
|
|
|
|||
|
|
@ -14,6 +14,9 @@ Changelog
|
|||
The new ``%aunique{}`` template function, which is included in the default
|
||||
path formats, ensures that Crystal Castles' albums will be placed into
|
||||
different directories. See :ref:`aunique` for details.
|
||||
* Beets queries can now use **regular expressions**. Use an additional `:` in
|
||||
your query to enable regex matching. See :ref:`regex` for the full details.
|
||||
Thanks to Matteo Mecucci.
|
||||
* Artist **sort names** are now fetched from MusicBrainz. There are two new data
|
||||
fields, ``artist_sort`` and ``albumartist_sort``, that contain sortable artist
|
||||
names like "Beatles, The". These fields are also used to sort albums and items
|
||||
|
|
|
|||
|
|
@ -79,6 +79,33 @@ backslashes are not part of beets' syntax; I'm just using the escaping
|
|||
functionality of by shell (bash or zsh, for instance) to pass ``the rebel`` as a
|
||||
single argument instead of two.
|
||||
|
||||
.. _regex:
|
||||
|
||||
Regular Expressions
|
||||
-------------------
|
||||
|
||||
While ordinary keywords perform simple substring matches, beets also supports
|
||||
regular expression matching for more advanced queries. To run a regex query, use
|
||||
an additional ``:`` between the field name and the expression::
|
||||
|
||||
$ beet list 'artist::Ann(a|ie)'
|
||||
|
||||
That query finds songs by Anna Calvi and Annie but not Annuals. Similarly, this
|
||||
query prints the path to any file in my library that's missing a track title::
|
||||
|
||||
$ beet list -p title::^$
|
||||
|
||||
To search *all* fields using a regular expression, just prefix the expression
|
||||
with a single `:`, like so::
|
||||
|
||||
$ beet list :Ho[pm]eless
|
||||
|
||||
Regular expressions are case-sensitive and build on `Python's built-in
|
||||
implementation`_. See Python's documentation for specifics on regex syntax.
|
||||
|
||||
.. _Python's built-in implementation: http://docs.python.org/library/re.html
|
||||
|
||||
|
||||
Path Queries
|
||||
------------
|
||||
|
||||
|
|
@ -97,19 +124,3 @@ equivalent::
|
|||
Note that this only matches items that are *already in your library*, so a path
|
||||
query won't necessarily find *all* the audio files in a directory---just the
|
||||
ones you've already added to your beets library.
|
||||
|
||||
Future Work
|
||||
-----------
|
||||
|
||||
Here are a few things that the query syntax should eventually support but aren't
|
||||
yet implemented. Please drop me a line if you have other ideas.
|
||||
|
||||
* "Null" queries. It's currently impossible to query for items that have an
|
||||
empty artist. Perhaps the syntax should look like ``artist:NULL`` or
|
||||
``artist:EMPTY``.
|
||||
|
||||
* Regular expressions. Beets queries are based on simple case-insensitive
|
||||
substring matching, but regexes might be useful occasionally as well. Maybe
|
||||
the syntax should look something like ``re:artist:^.*$`` or, perhaps,
|
||||
``artist:/^.*$/``. Having regular expressions could help with null queries
|
||||
(above): ``re:artist:^$``.
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class AnySubstringQueryTest(unittest.TestCase):
|
|||
def test_restriction_completeness(self):
|
||||
q = beets.library.AnySubstringQuery('title', ['title'])
|
||||
self.assertEqual(self.lib.items(q).next().title, 'the title')
|
||||
|
||||
|
||||
def test_restriction_soundness(self):
|
||||
q = beets.library.AnySubstringQuery('title', ['artist'])
|
||||
self.assertRaises(StopIteration, self.lib.items(q).next)
|
||||
|
|
@ -98,7 +98,6 @@ class AnyRegexpQueryTest(unittest.TestCase):
|
|||
q = beets.library.AnyRegexpQuery(r'the ti$', ['title'])
|
||||
self.assertRaises(StopIteration, self.lib.items(q).next)
|
||||
|
||||
|
||||
# Convenient asserts for matching items.
|
||||
class AssertsMixin(object):
|
||||
def assert_matched(self, result_iterator, title):
|
||||
|
|
@ -111,7 +110,7 @@ class AssertsMixin(object):
|
|||
self.assert_matched(result_iterator, 'Lovers Who Uncover')
|
||||
self.assert_matched(result_iterator, 'Boracay')
|
||||
self.assert_done(result_iterator)
|
||||
|
||||
|
||||
class GetTest(unittest.TestCase, AssertsMixin):
|
||||
def setUp(self):
|
||||
self.lib = beets.library.Library(
|
||||
|
|
|
|||
Loading…
Reference in a new issue