From eae98aff0e6af64810e67302baeb808ab6fe2b48 Mon Sep 17 00:00:00 2001 From: Bruno Cauet Date: Tue, 17 Feb 2015 12:19:21 +0100 Subject: [PATCH 1/8] PathQuery is case-{,in}sensitive on {UNIX,Windows} PathQuery use LIKE on Windows and instr() = 1 on UNIX. Fix #1165. --- beets/library.py | 27 +++++++++++++++++++++------ test/test_query.py | 13 +++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/beets/library.py b/beets/library.py index 8d95561f7..35b763321 100644 --- a/beets/library.py +++ b/beets/library.py @@ -24,6 +24,7 @@ import unicodedata import time import re from unidecode import unidecode +import platform from beets import logging from beets.mediafile import MediaFile, MutagenError, UnreadableFileError @@ -42,30 +43,44 @@ log = logging.getLogger('beets') # Library-specific query types. class PathQuery(dbcore.FieldQuery): - """A query that matches all items under a given path.""" + """A query that matches all items under a given path. + + On Windows paths are case-insensitive, contratly to UNIX platforms. + """ escape_re = re.compile(r'[\\_%]') escape_char = b'\\' + _is_windows = platform.system() == 'Windows' + def __init__(self, field, pattern, fast=True): super(PathQuery, self).__init__(field, pattern, fast) + if self._is_windows: + pattern = pattern.lower() + # Match the path as a single file. self.file_path = util.bytestring_path(util.normpath(pattern)) # As a directory (prefix). self.dir_path = util.bytestring_path(os.path.join(self.file_path, b'')) def match(self, item): - return (item.path == self.file_path) or \ - item.path.startswith(self.dir_path) + path = item.path.lower() if self._is_windows else item.path + return (path == self.file_path) or path.startswith(self.dir_path) def col_clause(self): + file_blob = buffer(self.file_path) + + if not self._is_windows: + dir_blob = buffer(self.dir_path) + return '({0} = ?) || (instr({0}, ?) = 1)'.format(self.field), \ + (file_blob, dir_blob) + escape = lambda m: self.escape_char + m.group(0) dir_pattern = self.escape_re.sub(escape, self.dir_path) - dir_pattern = buffer(dir_pattern + b'%') - file_blob = buffer(self.file_path) + dir_blob = buffer(dir_pattern + b'%') return '({0} = ?) || ({0} LIKE ? ESCAPE ?)'.format(self.field), \ - (file_blob, dir_pattern, self.escape_char) + (file_blob, dir_blob, self.escape_char) # Library-specific field types. diff --git a/test/test_query.py b/test/test_query.py index a9b1058bd..c6aec6185 100644 --- a/test/test_query.py +++ b/test/test_query.py @@ -17,6 +17,8 @@ from __future__ import (division, absolute_import, print_function, unicode_literals) +from mock import patch + from test import _common from test._common import unittest from test import helper @@ -461,6 +463,17 @@ class PathQueryTest(_common.LibTestCase, TestHelper, AssertsMixin): results = self.lib.albums(q) self.assert_albums_matched(results, ['album with backslash']) + def test_case_sensitivity(self): + self.add_album(path='/A/B/C2.mp3', title='caps path') + q = b'path:/A/B' + with patch('beets.library.PathQuery._is_windows', False): + results = self.lib.items(q) + self.assert_items_matched(results, ['caps path']) + + with patch('beets.library.PathQuery._is_windows', True): + results = self.lib.items(q) + self.assert_items_matched(results, ['path item', 'caps path']) + class IntQueryTest(unittest.TestCase, TestHelper): From 83e34322e911ab3e3a74751442edea36ab31ef7b Mon Sep 17 00:00:00 2001 From: Bruno Cauet Date: Tue, 17 Feb 2015 13:13:30 +0100 Subject: [PATCH 2/8] Update changelog & docs --- docs/changelog.rst | 1 + docs/reference/query.rst | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 329770102..6b9c57781 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -65,6 +65,7 @@ Core changes: Fixes: +* Path queries are case-sensitive on UNIX OSes. :bug:`1165` * :doc:`/plugins/lyrics`: Silence a warning about insecure requests in the new MusixMatch backend. :bug:`1204` * Fix a crash when ``beet`` is invoked without arguments. :bug:`1205` diff --git a/docs/reference/query.rst b/docs/reference/query.rst index 7dc79461a..af676a50d 100644 --- a/docs/reference/query.rst +++ b/docs/reference/query.rst @@ -184,6 +184,8 @@ Note that this only matches items that are *already in your library*, so a path query won't necessarily find *all* the audio files in a directory---just the ones you've already added to your beets library. +Such queries are case-sensitive on UNIX and case-insensitive on Microsoft +Windows. .. _query-sort: From 6fc678e9470e1469ba93b5011867ee29d406df82 Mon Sep 17 00:00:00 2001 From: Bruno Cauet Date: Wed, 18 Feb 2015 18:52:22 +0100 Subject: [PATCH 3/8] PathQuery: use substr() instead of instr() substr() is only available in SQLite 3.7.15+, which is not available yet on Debian stable, CentOS & co. Use substr() instead. --- beets/library.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/library.py b/beets/library.py index 35b763321..c5503dd0a 100644 --- a/beets/library.py +++ b/beets/library.py @@ -73,8 +73,8 @@ class PathQuery(dbcore.FieldQuery): if not self._is_windows: dir_blob = buffer(self.dir_path) - return '({0} = ?) || (instr({0}, ?) = 1)'.format(self.field), \ - (file_blob, dir_blob) + return '({0} = ?) || (substr({0}, 1, ?) = ?)'.format(self.field), \ + (file_blob, len(dir_blob), dir_blob) escape = lambda m: self.escape_char + m.group(0) dir_pattern = self.escape_re.sub(escape, self.dir_path) From e00d7b7ddcecd3696d93e0d22b3b05c0f1733ee4 Mon Sep 17 00:00:00 2001 From: Bruno Cauet Date: Wed, 18 Feb 2015 19:28:03 +0100 Subject: [PATCH 4/8] PathQuery: simple utf8 comparison Test usqge of SQL's substr() with a UTF8 example. The ideal would be to test with non-UTF8 code points, however it is impossible to perform such a query: queries can only be unicode or utf8. --- test/test_query.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_query.py b/test/test_query.py index c6aec6185..e53efc29f 100644 --- a/test/test_query.py +++ b/test/test_query.py @@ -474,6 +474,12 @@ class PathQueryTest(_common.LibTestCase, TestHelper, AssertsMixin): results = self.lib.items(q) self.assert_items_matched(results, ['path item', 'caps path']) + def test_utf8_bytes(self): + self.add_album(path=b'/\xc3\xa0/b/c.mp3', title='latin byte') + q = b'path:/\xc3\xa0/b/c.mp3' + results = self.lib.items(q) + self.assert_items_matched(results, ['latin byte']) + class IntQueryTest(unittest.TestCase, TestHelper): From 9e5e7a28e5574b45b92e5a282abbd796cbb18b28 Mon Sep 17 00:00:00 2001 From: Bruno Cauet Date: Wed, 18 Feb 2015 19:31:07 +0100 Subject: [PATCH 5/8] InvalidQueryError: resist to any query Even though queries may not contain non-utf8 code points InvalidQueryError ought to be prudent, for such an invalid query would raise an InvalidQueryError which therefore has to be able to manipulate the invalid query. --- beets/dbcore/query.py | 8 +++++++- test/test_library.py | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/beets/dbcore/query.py b/beets/dbcore/query.py index cd891148e..e80010ccf 100644 --- a/beets/dbcore/query.py +++ b/beets/dbcore/query.py @@ -37,7 +37,13 @@ class InvalidQueryError(ParsingError): def __init__(self, query, explanation): if isinstance(query, list): query = " ".join(query) - message = "'{0}': {1}".format(query, explanation) + try: + message = "'{0}': {1}".format(query, explanation) + except UnicodeDecodeError: + # queries are unicode. however if for an unholy reason it's not + # the case, an InvalidQueryError may be raised -- and report it + # correctly than fail again here + message = "{0!r}: {1}".format(query, explanation) super(InvalidQueryError, self).__init__(message) diff --git a/test/test_library.py b/test/test_library.py index 6bb88076e..d2193b25f 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -1195,6 +1195,13 @@ class ParseQueryTest(unittest.TestCase): self.assertIsInstance(raised.exception, beets.dbcore.query.ParsingError) + def test_parse_byte_string(self): + with self.assertRaises(beets.dbcore.InvalidQueryError) as raised: + beets.library.parse_query_string(b'f\xf2o', None) + self.assertIn("can't decode", unicode(raised.exception)) + self.assertIsInstance(raised.exception, + beets.dbcore.query.ParsingError) + def suite(): return unittest.TestLoader().loadTestsFromName(__name__) From 952081e5edf6f87fec85537c7d14e4e02ae02149 Mon Sep 17 00:00:00 2001 From: Bruno Cauet Date: Sun, 1 Mar 2015 14:52:31 +0100 Subject: [PATCH 6/8] Revert "InvalidQueryError: resist to any query" This reverts commit 9e5e7a28e5574b45b92e5a282abbd796cbb18b28. --- beets/dbcore/query.py | 8 +------- test/test_library.py | 7 ------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/beets/dbcore/query.py b/beets/dbcore/query.py index e80010ccf..cd891148e 100644 --- a/beets/dbcore/query.py +++ b/beets/dbcore/query.py @@ -37,13 +37,7 @@ class InvalidQueryError(ParsingError): def __init__(self, query, explanation): if isinstance(query, list): query = " ".join(query) - try: - message = "'{0}': {1}".format(query, explanation) - except UnicodeDecodeError: - # queries are unicode. however if for an unholy reason it's not - # the case, an InvalidQueryError may be raised -- and report it - # correctly than fail again here - message = "{0!r}: {1}".format(query, explanation) + message = "'{0}': {1}".format(query, explanation) super(InvalidQueryError, self).__init__(message) diff --git a/test/test_library.py b/test/test_library.py index d2193b25f..6bb88076e 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -1195,13 +1195,6 @@ class ParseQueryTest(unittest.TestCase): self.assertIsInstance(raised.exception, beets.dbcore.query.ParsingError) - def test_parse_byte_string(self): - with self.assertRaises(beets.dbcore.InvalidQueryError) as raised: - beets.library.parse_query_string(b'f\xf2o', None) - self.assertIn("can't decode", unicode(raised.exception)) - self.assertIsInstance(raised.exception, - beets.dbcore.query.ParsingError) - def suite(): return unittest.TestLoader().loadTestsFromName(__name__) From cb504ad163ec81348c2b30c5d169a7cb9f448495 Mon Sep 17 00:00:00 2001 From: Bruno Cauet Date: Sun, 1 Mar 2015 14:57:10 +0100 Subject: [PATCH 7/8] library.parse_query_string: assert query is unicode --- beets/library.py | 5 +++-- test/test_library.py | 4 ++++ test/test_query.py | 8 +------- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/beets/library.py b/beets/library.py index c5503dd0a..0c1acba22 100644 --- a/beets/library.py +++ b/beets/library.py @@ -1107,11 +1107,12 @@ def parse_query_string(s, model_cls): The string is split into components using shell-like syntax. """ + assert isinstance(s, unicode), "Query is not unicode: {0!r}".format(s) + # A bug in Python < 2.7.3 prevents correct shlex splitting of # Unicode strings. # http://bugs.python.org/issue6988 - if isinstance(s, unicode): - s = s.encode('utf8') + s = s.encode('utf8') try: parts = [p.decode('utf8') for p in shlex.split(s)] except ValueError as exc: diff --git a/test/test_library.py b/test/test_library.py index 6bb88076e..9ec43c2bf 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -1195,6 +1195,10 @@ class ParseQueryTest(unittest.TestCase): self.assertIsInstance(raised.exception, beets.dbcore.query.ParsingError) + def test_parse_bytes(self): + with self.assertRaises(AssertionError): + beets.library.parse_query_string(b"query", None) + def suite(): return unittest.TestLoader().loadTestsFromName(__name__) diff --git a/test/test_query.py b/test/test_query.py index e53efc29f..2511aa841 100644 --- a/test/test_query.py +++ b/test/test_query.py @@ -465,7 +465,7 @@ class PathQueryTest(_common.LibTestCase, TestHelper, AssertsMixin): def test_case_sensitivity(self): self.add_album(path='/A/B/C2.mp3', title='caps path') - q = b'path:/A/B' + q = 'path:/A/B' with patch('beets.library.PathQuery._is_windows', False): results = self.lib.items(q) self.assert_items_matched(results, ['caps path']) @@ -474,12 +474,6 @@ class PathQueryTest(_common.LibTestCase, TestHelper, AssertsMixin): results = self.lib.items(q) self.assert_items_matched(results, ['path item', 'caps path']) - def test_utf8_bytes(self): - self.add_album(path=b'/\xc3\xa0/b/c.mp3', title='latin byte') - q = b'path:/\xc3\xa0/b/c.mp3' - results = self.lib.items(q) - self.assert_items_matched(results, ['latin byte']) - class IntQueryTest(unittest.TestCase, TestHelper): From 9efcfbb8fa32c1a4ac2a2147a6250414f0d53d0c Mon Sep 17 00:00:00 2001 From: Bruno Cauet Date: Sun, 1 Mar 2015 18:10:07 +0100 Subject: [PATCH 8/8] PathQuery: add 'case_sensitivity' param - fully tested - default value is platform-aware --- beets/library.py | 16 +++++++++++----- test/test_query.py | 20 +++++++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/beets/library.py b/beets/library.py index 0c1acba22..b6d8f4c07 100644 --- a/beets/library.py +++ b/beets/library.py @@ -45,7 +45,8 @@ log = logging.getLogger('beets') class PathQuery(dbcore.FieldQuery): """A query that matches all items under a given path. - On Windows paths are case-insensitive, contratly to UNIX platforms. + On Windows paths are case-insensitive by default, contrarly to UNIX + platforms. """ escape_re = re.compile(r'[\\_%]') @@ -53,11 +54,16 @@ class PathQuery(dbcore.FieldQuery): _is_windows = platform.system() == 'Windows' - def __init__(self, field, pattern, fast=True): + def __init__(self, field, pattern, fast=True, case_sensitive=None): super(PathQuery, self).__init__(field, pattern, fast) - if self._is_windows: + if case_sensitive is None: + # setting this value as the default one would make it un-patchable + # and therefore un-testable + case_sensitive = not self._is_windows + if not case_sensitive: pattern = pattern.lower() + self.case_sensitive = case_sensitive # Match the path as a single file. self.file_path = util.bytestring_path(util.normpath(pattern)) @@ -65,13 +71,13 @@ class PathQuery(dbcore.FieldQuery): self.dir_path = util.bytestring_path(os.path.join(self.file_path, b'')) def match(self, item): - path = item.path.lower() if self._is_windows else item.path + path = item.path if self.case_sensitive else item.path.lower() return (path == self.file_path) or path.startswith(self.dir_path) def col_clause(self): file_blob = buffer(self.file_path) - if not self._is_windows: + if self.case_sensitive: dir_blob = buffer(self.dir_path) return '({0} = ?) || (substr({0}, 1, ?) = ?)'.format(self.field), \ (file_blob, len(dir_blob), dir_blob) diff --git a/test/test_query.py b/test/test_query.py index 2511aa841..ee0f3d0ba 100644 --- a/test/test_query.py +++ b/test/test_query.py @@ -17,6 +17,7 @@ from __future__ import (division, absolute_import, print_function, unicode_literals) +from functools import partial from mock import patch from test import _common @@ -465,14 +466,23 @@ class PathQueryTest(_common.LibTestCase, TestHelper, AssertsMixin): def test_case_sensitivity(self): self.add_album(path='/A/B/C2.mp3', title='caps path') - q = 'path:/A/B' + + makeq = partial(beets.library.PathQuery, 'path', '/A/B') + + results = self.lib.items(makeq(case_sensitive=True)) + self.assert_items_matched(results, ['caps path']) + + results = self.lib.items(makeq(case_sensitive=False)) + self.assert_items_matched(results, ['path item', 'caps path']) + + # test platform-aware default sensitivity with patch('beets.library.PathQuery._is_windows', False): - results = self.lib.items(q) - self.assert_items_matched(results, ['caps path']) + q = makeq() + self.assertEqual(q.case_sensitive, True) with patch('beets.library.PathQuery._is_windows', True): - results = self.lib.items(q) - self.assert_items_matched(results, ['path item', 'caps path']) + q = makeq() + self.assertEqual(q.case_sensitive, False) class IntQueryTest(unittest.TestCase, TestHelper):