feat_tokens: change argument name, fix regex flag

This commit is contained in:
Fabrice Laporte 2014-12-14 22:46:51 +01:00
parent 82de2a55bc
commit b62f15d9d9
3 changed files with 9 additions and 14 deletions

View file

@ -680,15 +680,16 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH):
return limit
def feat_tokens(extended=False):
def feat_tokens(for_artist=True):
"""Returns the tokens to use to detect featuring artists in strings."""
FEAT_SPECIAL_CHARS = ['&', 'feat.', 'ft.']
FEAT_WORDS = ['ft', 'featuring', 'feat']
if extended:
if for_artist: # appending to artist name enables more tokens
FEAT_WORDS += ['with', 'vs', 'and', 'con']
regex = r'(%s)' % '|'.join(['\\b%s\\b' % re.escape(x) for x in FEAT_WORDS])
if extended:
if for_artist:
regex = r'(%s|%s)' % \
('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex)
return regex

View file

@ -31,11 +31,8 @@ def split_on_feat(artist):
may be a string or None if none is present.
"""
# split on the first "feat".
feat_tokens(extended=True).strip('()')
parts = re.split(
feat_tokens(extended=True).translate(None, '()'),
artist, 1, flags=re.IGNORECASE)
parts = [s.strip() for s in parts]
regex = re.compile(feat_tokens().translate(None, '()'), re.IGNORECASE)
parts = [s.strip() for s in regex.split(artist, 1)]
if len(parts) == 1:
return parts[0], None
else:
@ -45,10 +42,7 @@ def split_on_feat(artist):
def contains_feat(title):
"""Determine whether the title contains a "featured" marker.
"""
return bool(re.search(
feat_tokens(extended=True),
title, flags=re.IGNORECASE
))
return bool(re.search(feat_tokens(), title, flags=re.IGNORECASE))
def update_metadata(item, feat_part, drop_feat):

View file

@ -138,7 +138,7 @@ def search_pairs(item):
artists = [artist]
# Remove any featuring artists from the artists name
pattern = r"(.*?) %s" % feat_tokens(extended=True)
pattern = r"(.*?) %s" % feat_tokens()
match = re.search(pattern, artist, re.IGNORECASE)
if match:
artists.append(match.group(1))
@ -151,7 +151,7 @@ def search_pairs(item):
titles.append(match.group(1))
# Remove any featuring artists from the title
pattern = r"(.*?) %s" % feat_tokens()
pattern = r"(.*?) %s" % feat_tokens(for_artist=False)
for title in titles[:]:
match = re.search(pattern, title, re.IGNORECASE)
if match: