From c2c1e7236d29505b7c11a63754a0c3f6bd4ffb1d Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Tue, 16 Dec 2014 11:49:54 +0000 Subject: [PATCH] Simplify word boundaries (#1060) Use lookahead/lookbehind matching to ensure there is whitespace around the token. Replaces the use of \b, which doesn't work for "ft.", etc. --- beets/util/__init__.py | 11 ++++------- test/test_ftintitle.py | 1 + 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index a8b13f140..188df8cc3 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -686,12 +686,9 @@ def feat_tokens(for_artist=True): The `for_artist` option determines whether the regex should be suitable for matching artist fields (the default) or title fields. """ - feat_special_chars = ['&', 'feat.', 'ft.'] feat_words = ['ft', 'featuring', 'feat'] - if for_artist: # appending to artist name enables more tokens - feat_words += ['with', 'vs', 'and', 'con'] - regex = r'%s' % '|'.join(['\\b%s\\b' % re.escape(x) for x in feat_words]) if for_artist: - regex = r'%s|%s' % \ - ('|'.join([re.escape(x) for x in feat_special_chars]), regex) - return '(?:{0})'.format(regex) + feat_words += ['with', 'vs', 'and', 'con', '&', 'feat.', 'ft.'] + return '(?<=\s)(?:{0})(?=\s)'.format( + '|'.join(re.escape(x) for x in feat_words) + ) diff --git a/test/test_ftintitle.py b/test/test_ftintitle.py index 2637dbaf9..77e416c5a 100644 --- a/test/test_ftintitle.py +++ b/test/test_ftintitle.py @@ -50,6 +50,7 @@ class FtInTitlePluginTest(unittest.TestCase): self.assertTrue(ftintitle.contains_feat('Alice and Bob')) self.assertTrue(ftintitle.contains_feat('Alice With Bob')) self.assertFalse(ftintitle.contains_feat('Alice defeat Bob')) + self.assertFalse(ftintitle.contains_feat('Aliceft.Bob')) def suite():