Merge pull request #1728 from diego-plan9/notquery

Add boolean "not" query operator
This commit is contained in:
Adrian Sampson 2015-11-20 10:00:05 -08:00
commit 6cb8db3b2e
6 changed files with 331 additions and 17 deletions

View file

@ -447,6 +447,36 @@ class OrQuery(MutableCollectionQuery):
return any([q.match(item) for q in self.subqueries])
class NotQuery(Query):
"""A query that matches the negation of its `subquery`, as a shorcut for
performing `not(subquery)` without using regular expressions.
"""
def __init__(self, subquery):
self.subquery = subquery
def clause(self):
clause, subvals = self.subquery.clause()
if clause:
return 'not ({0})'.format(clause), subvals
else:
# If there is no clause, there is nothing to negate. All the logic
# is handled by match() for slow queries.
return clause, subvals
def match(self, item):
return not self.subquery.match(item)
def __repr__(self):
return "{0.__class__.__name__}({0.subquery})".format(self)
def __eq__(self, other):
return super(NotQuery, self).__eq__(other) and \
self.subquery == other.subquery
def __hash__(self):
return hash(('not', hash(self.subquery)))
class TrueQuery(Query):
"""A query that always matches."""
def clause(self):

View file

@ -24,6 +24,8 @@ import beets
PARSE_QUERY_PART_REGEX = re.compile(
# Non-capturing optional segment for the keyword.
r'(-|\^)?' # Negation prefixes.
r'(?:'
r'(\S+?)' # The field key.
r'(?<!\\):' # Unescaped :
@ -38,9 +40,9 @@ PARSE_QUERY_PART_REGEX = re.compile(
def parse_query_part(part, query_classes={}, prefixes={},
default_class=query.SubstringQuery):
"""Take a query in the form of a key/value pair separated by a
colon and return a tuple of `(key, value, cls)`. `key` may be None,
colon and return a tuple of `(key, value, cls, negate)`. `key` may be None,
indicating that any field may be matched. `cls` is a subclass of
`FieldQuery`.
`FieldQuery`. `negate` is a boolean indicating if the query is negated.
The optional `query_classes` parameter maps field names to default
query types; `default_class` is the fallback. `prefixes` is a map
@ -54,10 +56,11 @@ def parse_query_part(part, query_classes={}, prefixes={},
class is available, `default_class` is used.
For instance,
'stapler' -> (None, 'stapler', SubstringQuery)
'color:red' -> ('color', 'red', SubstringQuery)
':^Quiet' -> (None, '^Quiet', RegexpQuery)
'color::b..e' -> ('color', 'b..e', RegexpQuery)
'stapler' -> (None, 'stapler', SubstringQuery, False)
'color:red' -> ('color', 'red', SubstringQuery, False)
':^Quiet' -> (None, '^Quiet', RegexpQuery, False)
'color::b..e' -> ('color', 'b..e', RegexpQuery, False)
'-color:red' -> ('color', 'red', SubstringQuery, True)
Prefixes may be "escaped" with a backslash to disable the keying
behavior.
@ -65,18 +68,19 @@ def parse_query_part(part, query_classes={}, prefixes={},
part = part.strip()
match = PARSE_QUERY_PART_REGEX.match(part)
assert match # Regex should always match.
key = match.group(1)
term = match.group(2).replace('\:', ':')
assert match # Regex should always match
negate = bool(match.group(1))
key = match.group(2)
term = match.group(3).replace('\:', ':')
# Match the search term against the list of prefixes.
for pre, query_class in prefixes.items():
if term.startswith(pre):
return key, term[len(pre):], query_class
return key, term[len(pre):], query_class, negate
# No matching prefix: use type-based or fallback/default query.
query_class = query_classes.get(key, default_class)
return key, term, query_class
return key, term, query_class, negate
def construct_query_part(model_cls, prefixes, query_part):
@ -94,7 +98,7 @@ def construct_query_part(model_cls, prefixes, query_part):
query_classes[k] = t.query
# Parse the string.
key, pattern, query_class = \
key, pattern, query_class, negate = \
parse_query_part(query_part, query_classes, prefixes)
# No key specified.
@ -103,14 +107,24 @@ def construct_query_part(model_cls, prefixes, query_part):
# The query type matches a specific field, but none was
# specified. So we use a version of the query that matches
# any field.
return query.AnyFieldQuery(pattern, model_cls._search_fields,
query_class)
q = query.AnyFieldQuery(pattern, model_cls._search_fields,
query_class)
if negate:
return query.NotQuery(q)
else:
return q
else:
# Other query type.
return query_class(pattern)
if negate:
return query.NotQuery(query_class(pattern))
else:
return query_class(pattern)
key = key.lower()
return query_class(key.lower(), pattern, key in model_cls._fields)
q = query_class(key.lower(), pattern, key in model_cls._fields)
if negate:
return query.NotQuery(q)
return q
def query_from_strings(query_cls, model_cls, prefixes, query_parts):

View file

@ -20,6 +20,10 @@ New:
:doc:`/plugins/discogs` also adopts the same setting.
* :doc:`/plugins/embyupdate`: A plugin to trigger a library refresh on a
`Emby Server`_ if database changed.
* Queries can now use "not" logic: if you prepend a query term with "-" or
"^", items or albums matching that term will be excluded from the results.
For example, ``beet ls foo ^artist:bar`` will get all the items matching
`foo` but whose artist do not match `bar`. See :ref:`not_query`. :bug:`819`
For developers:

View file

@ -181,6 +181,35 @@ Find all items with a file modification time between 2008-12-01 and
$ beet ls 'mtime:2008-12-01..2008-12-02'
.. _not_query:
Query Term Negation
-------------------
Query terms can also be negated, acting like a Boolean "not", by prepending
them with ``-`` or ``^``. This has the effect of returning all the items that
do **not** match the query term. For example, this command::
$ beet list ^love
matches all the songs in the library that do not have "love" in any of their
fields.
Negation can be combined with the rest of the query mechanisms, allowing to
negate specific fields, regular expressions, etc. For example, this command::
$ beet list -a artist:dylan ^year:1980..1990 "^album::the(y)?"
matches all the albums with an artist containing "dylan", but excluding those
released on the eighties and those that have "the" or "they" on the title.
Note that the ``-`` character is treated by most shells as a reserved character
for passing arguments, and as such needs to be escaped if using it for query
negation. In most UNIX derivatives shells, using a double dash ``--``
(indicating that everything after that point should not be treated as
arguments) before the query terms should prevent conflicts, such as::
$ beet list -a -- artist:dylan -year:1980..1990 "-album::the(y)?"
.. _pathquery:

View file

@ -352,7 +352,7 @@ class QueryParseTest(unittest.TestCase):
part,
{'year': dbcore.query.NumericQuery},
{':': dbcore.query.RegexpQuery},
)
)[:-1] # remove the negate flag
def test_one_basic_term(self):
q = 'test'

View file

@ -715,6 +715,243 @@ class NoneQueryTest(unittest.TestCase, TestHelper):
self.assertInResult(item, matched)
class NotQueryMatchTest(_common.TestCase):
"""Test `query.NotQuery` matching against a single item, using the same
cases and assertions as on `MatchTest`, plus assertion on the negated
queries (ie. assertTrue(q) -> assertFalse(NotQuery(q))).
"""
def setUp(self):
super(NotQueryMatchTest, self).setUp()
self.item = _common.item()
def test_regex_match_positive(self):
q = dbcore.query.RegexpQuery('album', '^the album$')
self.assertTrue(q.match(self.item))
self.assertFalse(dbcore.query.NotQuery(q).match(self.item))
def test_regex_match_negative(self):
q = dbcore.query.RegexpQuery('album', '^album$')
self.assertFalse(q.match(self.item))
self.assertTrue(dbcore.query.NotQuery(q).match(self.item))
def test_regex_match_non_string_value(self):
q = dbcore.query.RegexpQuery('disc', '^6$')
self.assertTrue(q.match(self.item))
self.assertFalse(dbcore.query.NotQuery(q).match(self.item))
def test_substring_match_positive(self):
q = dbcore.query.SubstringQuery('album', 'album')
self.assertTrue(q.match(self.item))
self.assertFalse(dbcore.query.NotQuery(q).match(self.item))
def test_substring_match_negative(self):
q = dbcore.query.SubstringQuery('album', 'ablum')
self.assertFalse(q.match(self.item))
self.assertTrue(dbcore.query.NotQuery(q).match(self.item))
def test_substring_match_non_string_value(self):
q = dbcore.query.SubstringQuery('disc', '6')
self.assertTrue(q.match(self.item))
self.assertFalse(dbcore.query.NotQuery(q).match(self.item))
def test_year_match_positive(self):
q = dbcore.query.NumericQuery('year', '1')
self.assertTrue(q.match(self.item))
self.assertFalse(dbcore.query.NotQuery(q).match(self.item))
def test_year_match_negative(self):
q = dbcore.query.NumericQuery('year', '10')
self.assertFalse(q.match(self.item))
self.assertTrue(dbcore.query.NotQuery(q).match(self.item))
def test_bitrate_range_positive(self):
q = dbcore.query.NumericQuery('bitrate', '100000..200000')
self.assertTrue(q.match(self.item))
self.assertFalse(dbcore.query.NotQuery(q).match(self.item))
def test_bitrate_range_negative(self):
q = dbcore.query.NumericQuery('bitrate', '200000..300000')
self.assertFalse(q.match(self.item))
self.assertTrue(dbcore.query.NotQuery(q).match(self.item))
def test_open_range(self):
q = dbcore.query.NumericQuery('bitrate', '100000..')
dbcore.query.NotQuery(q)
class NotQueryTest(DummyDataTestCase):
"""Test `query.NotQuery` against the dummy data:
- `test_type_xxx`: tests for the negation of a particular XxxQuery class.
- `test_get_yyy`: tests on query strings (similar to `GetTest`)
"""
def assertNegationProperties(self, q):
"""Given a Query `q`, assert that:
- q OR not(q) == all items
- q AND not(q) == 0
- not(not(q)) == q
"""
not_q = dbcore.query.NotQuery(q)
# assert using OrQuery, AndQuery
q_or = dbcore.query.OrQuery([q, not_q])
q_and = dbcore.query.AndQuery([q, not_q])
self.assert_items_matched_all(self.lib.items(q_or))
self.assert_items_matched(self.lib.items(q_and), [])
# assert manually checking the item titles
all_titles = set([i.title for i in self.lib.items()])
q_results = set([i.title for i in self.lib.items(q)])
not_q_results = set([i.title for i in self.lib.items(not_q)])
self.assertEqual(q_results.union(not_q_results), all_titles)
self.assertEqual(q_results.intersection(not_q_results), set())
# round trip
not_not_q = dbcore.query.NotQuery(not_q)
self.assertEqual(set([i.title for i in self.lib.items(q)]),
set([i.title for i in self.lib.items(not_not_q)]))
def test_type_and(self):
# not(a and b) <-> not(a) or not(b)
q = dbcore.query.AndQuery([dbcore.query.BooleanQuery('comp', True),
dbcore.query.NumericQuery('year', '2002')])
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, ['foo bar', 'beets 4 eva'])
self.assertNegationProperties(q)
def test_type_anyfield(self):
q = dbcore.query.AnyFieldQuery('foo', ['title', 'artist', 'album'],
dbcore.query.SubstringQuery)
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, ['baz qux'])
self.assertNegationProperties(q)
def test_type_boolean(self):
q = dbcore.query.BooleanQuery('comp', True)
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, ['beets 4 eva'])
self.assertNegationProperties(q)
def test_type_date(self):
q = dbcore.query.DateQuery('mtime', '0.0')
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, [])
self.assertNegationProperties(q)
def test_type_false(self):
q = dbcore.query.FalseQuery()
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched_all(not_results)
self.assertNegationProperties(q)
def test_type_match(self):
q = dbcore.query.MatchQuery('year', '2003')
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, ['foo bar', 'baz qux'])
self.assertNegationProperties(q)
def test_type_none(self):
q = dbcore.query.NoneQuery('rg_track_gain')
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, [])
self.assertNegationProperties(q)
def test_type_numeric(self):
q = dbcore.query.NumericQuery('year', '2001..2002')
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, ['beets 4 eva'])
self.assertNegationProperties(q)
def test_type_or(self):
# not(a or b) <-> not(a) and not(b)
q = dbcore.query.OrQuery([dbcore.query.BooleanQuery('comp', True),
dbcore.query.NumericQuery('year', '2002')])
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, ['beets 4 eva'])
self.assertNegationProperties(q)
def test_type_regexp(self):
q = dbcore.query.RegexpQuery('artist', '^t')
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, ['foo bar'])
self.assertNegationProperties(q)
def test_type_substring(self):
q = dbcore.query.SubstringQuery('album', 'ba')
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, ['beets 4 eva'])
self.assertNegationProperties(q)
def test_type_true(self):
q = dbcore.query.TrueQuery()
not_results = self.lib.items(dbcore.query.NotQuery(q))
self.assert_items_matched(not_results, [])
self.assertNegationProperties(q)
def test_get_prefixes_keyed(self):
"""Test both negation prefixes on a keyed query."""
q0 = '-title:qux'
q1 = '^title:qux'
results0 = self.lib.items(q0)
results1 = self.lib.items(q1)
self.assert_items_matched(results0, ['foo bar', 'beets 4 eva'])
self.assert_items_matched(results1, ['foo bar', 'beets 4 eva'])
def test_get_prefixes_unkeyed(self):
"""Test both negation prefixes on an unkeyed query."""
q0 = '-qux'
q1 = '^qux'
results0 = self.lib.items(q0)
results1 = self.lib.items(q1)
self.assert_items_matched(results0, ['foo bar', 'beets 4 eva'])
self.assert_items_matched(results1, ['foo bar', 'beets 4 eva'])
def test_get_one_keyed_regexp(self):
q = r'-artist::t.+r'
results = self.lib.items(q)
self.assert_items_matched(results, ['foo bar', 'baz qux'])
def test_get_one_unkeyed_regexp(self):
q = r'-:x$'
results = self.lib.items(q)
self.assert_items_matched(results, ['foo bar', 'beets 4 eva'])
def test_get_multiple_terms(self):
q = 'baz -bar'
results = self.lib.items(q)
self.assert_items_matched(results, ['baz qux'])
def test_get_mixed_terms(self):
q = 'baz -title:bar'
results = self.lib.items(q)
self.assert_items_matched(results, ['baz qux'])
def test_fast_vs_slow(self):
"""Test that the results are the same regardless of the `fast` flag
for negated `FieldQuery`s.
TODO: investigate NoneQuery(fast=False), as it is raising
AttributeError: type object 'NoneQuery' has no attribute 'field'
at NoneQuery.match() (due to being @classmethod, and no self?)
"""
classes = [(dbcore.query.DateQuery, ['mtime', '0.0']),
(dbcore.query.MatchQuery, ['artist', 'one']),
# (dbcore.query.NoneQuery, ['rg_track_gain']),
(dbcore.query.NumericQuery, ['year', '2002']),
(dbcore.query.StringFieldQuery, ['year', '2001']),
(dbcore.query.RegexpQuery, ['album', '^.a']),
(dbcore.query.SubstringQuery, ['title', 'x'])]
for klass, args in classes:
q_fast = dbcore.query.NotQuery(klass(*(args + [True])))
q_slow = dbcore.query.NotQuery(klass(*(args + [False])))
try:
self.assertEqual([i.title for i in self.lib.items(q_fast)],
[i.title for i in self.lib.items(q_slow)])
except NotImplementedError:
# ignore classes that do not provide `fast` implementation
pass
def suite():
return unittest.TestLoader().loadTestsFromName(__name__)