Simplify word boundaries (#1060)

Use lookahead/lookbehind matching to ensure there is whitespace around the
token. Replaces the use of \b, which doesn't work for "ft.", etc.
This commit is contained in:
Adrian Sampson 2014-12-16 11:49:54 +00:00
parent a984c1aa44
commit c2c1e7236d
2 changed files with 5 additions and 7 deletions

View file

@ -686,12 +686,9 @@ def feat_tokens(for_artist=True):
The `for_artist` option determines whether the regex should be
suitable for matching artist fields (the default) or title fields.
"""
feat_special_chars = ['&', 'feat.', 'ft.']
feat_words = ['ft', 'featuring', 'feat']
if for_artist: # appending to artist name enables more tokens
feat_words += ['with', 'vs', 'and', 'con']
regex = r'%s' % '|'.join(['\\b%s\\b' % re.escape(x) for x in feat_words])
if for_artist:
regex = r'%s|%s' % \
('|'.join([re.escape(x) for x in feat_special_chars]), regex)
return '(?:{0})'.format(regex)
feat_words += ['with', 'vs', 'and', 'con', '&', 'feat.', 'ft.']
return '(?<=\s)(?:{0})(?=\s)'.format(
'|'.join(re.escape(x) for x in feat_words)
)

View file

@ -50,6 +50,7 @@ class FtInTitlePluginTest(unittest.TestCase):
self.assertTrue(ftintitle.contains_feat('Alice and Bob'))
self.assertTrue(ftintitle.contains_feat('Alice With Bob'))
self.assertFalse(ftintitle.contains_feat('Alice defeat Bob'))
self.assertFalse(ftintitle.contains_feat('Aliceft.Bob'))
def suite():