From 91a998df3cb5b994fcb61f43ab004b33641911cc Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Sat, 13 Dec 2014 23:34:50 +0100 Subject: [PATCH 1/8] fix #1060 --- beets/util/__init__.py | 14 ++++++++++++++ beetsplug/ftintitle.py | 16 ++++++++-------- beetsplug/lyrics.py | 7 ++++--- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 529bbb2f3..56134621d 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -678,3 +678,17 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH): return min(res[9], limit) else: return limit + + +def feat_tokens(extended=False): + """Returns the tokens to use to detect featuring artists in strings.""" + + FEAT_SPECIAL_CHARS = ['&', 'feat.', 'ft.'] + FEAT_WORDS = ['ft', 'featuring', 'feat'] + if extended: + FEAT_WORDS += ['with', 'vs', 'and', 'con'] + regex = r'(%s)' % '|'.join(['\\b%s\\b' % re.escape(x) for x in FEAT_WORDS]) + if extended: + regex = r'(%s|%s)' % \ + ('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex) + return regex diff --git a/beetsplug/ftintitle.py b/beetsplug/ftintitle.py index 5de16f69a..3fc1badb7 100644 --- a/beetsplug/ftintitle.py +++ b/beetsplug/ftintitle.py @@ -16,7 +16,7 @@ """ from beets.plugins import BeetsPlugin from beets import ui -from beets.util import displayable_path +from beets.util import displayable_path, feat_tokens from beets import config import logging import re @@ -30,24 +30,24 @@ def split_on_feat(artist): artist, which is always a string, and the featuring artist, which may be a string or None if none is present. """ + # split on the first "feat". + feat_tokens(extended=True).strip('()') parts = re.split( - r'[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&|vs\.|and', - artist, - 1, # Only split on the first "feat". - ) + feat_tokens(extended=True).translate(None, '()'), + artist, 1, flags=re.IGNORECASE) parts = [s.strip() for s in parts] if len(parts) == 1: return parts[0], None else: - return parts + return tuple(parts) def contains_feat(title): """Determine whether the title contains a "featured" marker. """ return bool(re.search( - r'[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&', - title, + feat_tokens(extended=True), + title, flags=re.IGNORECASE )) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 73821ee22..487628ee6 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -29,6 +29,7 @@ from HTMLParser import HTMLParseError from beets.plugins import BeetsPlugin from beets import ui from beets import config +from beets.util import feat_tokens # Global logger. @@ -137,7 +138,7 @@ def search_pairs(item): artists = [artist] # Remove any featuring artists from the artists name - pattern = r"(.*?) (&|\b(and|ft|feat(uring)?\b))" + pattern = r"(.*?) %s" % feat_tokens(extended=True) match = re.search(pattern, artist, re.IGNORECASE) if match: artists.append(match.group(1)) @@ -150,8 +151,8 @@ def search_pairs(item): titles.append(match.group(1)) # Remove any featuring artists from the title - pattern = r"(.*?) \b(ft|feat(uring)?)\b" - for title in titles: + pattern = r"(.*?) %s" % feat_tokens() + for title in titles[:]: match = re.search(pattern, title, re.IGNORECASE) if match: titles.append(match.group(1)) From 82de2a55bc0f58f9bf6c2b760ae8501f8c53aed6 Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Sat, 13 Dec 2014 23:35:59 +0100 Subject: [PATCH 2/8] ftintitle unit tests --- test/test_ftintitle.py | 59 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 test/test_ftintitle.py diff --git a/test/test_ftintitle.py b/test/test_ftintitle.py new file mode 100644 index 000000000..2637dbaf9 --- /dev/null +++ b/test/test_ftintitle.py @@ -0,0 +1,59 @@ +# This file is part of beets. +# Copyright 2014, Fabrice Laporte. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Tests for the 'ftintitle' plugin.""" + +from _common import unittest +from beetsplug import ftintitle + + +class FtInTitlePluginTest(unittest.TestCase): + def setUp(self): + """Set up configuration""" + ftintitle.FtInTitlePlugin() + + def test_split_on_feat(self): + parts = ftintitle.split_on_feat('Alice ft. Bob') + self.assertEqual(parts, ('Alice', 'Bob')) + parts = ftintitle.split_on_feat('Alice feat Bob') + self.assertEqual(parts, ('Alice', 'Bob')) + parts = ftintitle.split_on_feat('Alice feat. Bob') + self.assertEqual(parts, ('Alice', 'Bob')) + parts = ftintitle.split_on_feat('Alice featuring Bob') + self.assertEqual(parts, ('Alice', 'Bob')) + parts = ftintitle.split_on_feat('Alice & Bob') + self.assertEqual(parts, ('Alice', 'Bob')) + parts = ftintitle.split_on_feat('Alice and Bob') + self.assertEqual(parts, ('Alice', 'Bob')) + parts = ftintitle.split_on_feat('Alice With Bob') + self.assertEqual(parts, ('Alice', 'Bob')) + parts = ftintitle.split_on_feat('Alice defeat Bob') + self.assertEqual(parts, ('Alice defeat Bob', None)) + + def test_contains_feat(self): + self.assertTrue(ftintitle.contains_feat('Alice ft. Bob')) + self.assertTrue(ftintitle.contains_feat('Alice feat. Bob')) + self.assertTrue(ftintitle.contains_feat('Alice feat Bob')) + self.assertTrue(ftintitle.contains_feat('Alice featuring Bob')) + self.assertTrue(ftintitle.contains_feat('Alice & Bob')) + self.assertTrue(ftintitle.contains_feat('Alice and Bob')) + self.assertTrue(ftintitle.contains_feat('Alice With Bob')) + self.assertFalse(ftintitle.contains_feat('Alice defeat Bob')) + + +def suite(): + return unittest.TestLoader().loadTestsFromName(__name__) + +if __name__ == '__main__': + unittest.main(defaultTest='suite') From b62f15d9d911b84301483c7dc1989ded62c9873e Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Sun, 14 Dec 2014 22:46:51 +0100 Subject: [PATCH 3/8] feat_tokens: change argument name, fix regex flag --- beets/util/__init__.py | 7 ++++--- beetsplug/ftintitle.py | 12 +++--------- beetsplug/lyrics.py | 4 ++-- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 56134621d..f1ae2793a 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -680,15 +680,16 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH): return limit -def feat_tokens(extended=False): +def feat_tokens(for_artist=True): """Returns the tokens to use to detect featuring artists in strings.""" FEAT_SPECIAL_CHARS = ['&', 'feat.', 'ft.'] FEAT_WORDS = ['ft', 'featuring', 'feat'] - if extended: + if for_artist: # appending to artist name enables more tokens FEAT_WORDS += ['with', 'vs', 'and', 'con'] regex = r'(%s)' % '|'.join(['\\b%s\\b' % re.escape(x) for x in FEAT_WORDS]) - if extended: + if for_artist: regex = r'(%s|%s)' % \ ('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex) return regex + diff --git a/beetsplug/ftintitle.py b/beetsplug/ftintitle.py index 3fc1badb7..96d1f54c9 100644 --- a/beetsplug/ftintitle.py +++ b/beetsplug/ftintitle.py @@ -31,11 +31,8 @@ def split_on_feat(artist): may be a string or None if none is present. """ # split on the first "feat". - feat_tokens(extended=True).strip('()') - parts = re.split( - feat_tokens(extended=True).translate(None, '()'), - artist, 1, flags=re.IGNORECASE) - parts = [s.strip() for s in parts] + regex = re.compile(feat_tokens().translate(None, '()'), re.IGNORECASE) + parts = [s.strip() for s in regex.split(artist, 1)] if len(parts) == 1: return parts[0], None else: @@ -45,10 +42,7 @@ def split_on_feat(artist): def contains_feat(title): """Determine whether the title contains a "featured" marker. """ - return bool(re.search( - feat_tokens(extended=True), - title, flags=re.IGNORECASE - )) + return bool(re.search(feat_tokens(), title, flags=re.IGNORECASE)) def update_metadata(item, feat_part, drop_feat): diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 487628ee6..a90e3d645 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -138,7 +138,7 @@ def search_pairs(item): artists = [artist] # Remove any featuring artists from the artists name - pattern = r"(.*?) %s" % feat_tokens(extended=True) + pattern = r"(.*?) %s" % feat_tokens() match = re.search(pattern, artist, re.IGNORECASE) if match: artists.append(match.group(1)) @@ -151,7 +151,7 @@ def search_pairs(item): titles.append(match.group(1)) # Remove any featuring artists from the title - pattern = r"(.*?) %s" % feat_tokens() + pattern = r"(.*?) %s" % feat_tokens(for_artist=False) for title in titles[:]: match = re.search(pattern, title, re.IGNORECASE) if match: From 693e994729f88daebda17242deb1b63f6c486b4d Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Sun, 14 Dec 2014 22:52:32 +0100 Subject: [PATCH 4/8] fix flake8 --- beets/util/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index f1ae2793a..234b564af 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -692,4 +692,3 @@ def feat_tokens(for_artist=True): regex = r'(%s|%s)' % \ ('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex) return regex - From 829b623665b2a2b7f8a8fb4d961dad1db2cfe903 Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Mon, 15 Dec 2014 22:48:01 +0100 Subject: [PATCH 5/8] remove capturing parentheses --- beets/util/__init__.py | 12 ++++++------ beetsplug/ftintitle.py | 2 +- beetsplug/lyrics.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 234b564af..dd63983b5 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -683,12 +683,12 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH): def feat_tokens(for_artist=True): """Returns the tokens to use to detect featuring artists in strings.""" - FEAT_SPECIAL_CHARS = ['&', 'feat.', 'ft.'] - FEAT_WORDS = ['ft', 'featuring', 'feat'] + feat_special_chars = ['&', 'feat.', 'ft.'] + feat_words = ['ft', 'featuring', 'feat'] if for_artist: # appending to artist name enables more tokens - FEAT_WORDS += ['with', 'vs', 'and', 'con'] - regex = r'(%s)' % '|'.join(['\\b%s\\b' % re.escape(x) for x in FEAT_WORDS]) + feat_words += ['with', 'vs', 'and', 'con'] + regex = r'%s' % '|'.join(['\\b%s\\b' % re.escape(x) for x in feat_words]) if for_artist: - regex = r'(%s|%s)' % \ - ('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex) + regex = r'%s|%s' % \ + ('|'.join([re.escape(x) for x in feat_special_chars]), regex) return regex diff --git a/beetsplug/ftintitle.py b/beetsplug/ftintitle.py index 96d1f54c9..e83836e0e 100644 --- a/beetsplug/ftintitle.py +++ b/beetsplug/ftintitle.py @@ -31,7 +31,7 @@ def split_on_feat(artist): may be a string or None if none is present. """ # split on the first "feat". - regex = re.compile(feat_tokens().translate(None, '()'), re.IGNORECASE) + regex = re.compile(feat_tokens(), re.IGNORECASE) parts = [s.strip() for s in regex.split(artist, 1)] if len(parts) == 1: return parts[0], None diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index a90e3d645..976aaf5c5 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -138,7 +138,7 @@ def search_pairs(item): artists = [artist] # Remove any featuring artists from the artists name - pattern = r"(.*?) %s" % feat_tokens() + pattern = r"(.*?) (%s)" % feat_tokens() match = re.search(pattern, artist, re.IGNORECASE) if match: artists.append(match.group(1)) @@ -151,7 +151,7 @@ def search_pairs(item): titles.append(match.group(1)) # Remove any featuring artists from the title - pattern = r"(.*?) %s" % feat_tokens(for_artist=False) + pattern = r"(.*?) (%s)" % feat_tokens(for_artist=False) for title in titles[:]: match = re.search(pattern, title, re.IGNORECASE) if match: From a984c1aa44c1898b5155a1ce82ab7de6a1ddd7f8 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Tue, 16 Dec 2014 11:37:40 +0000 Subject: [PATCH 6/8] Use a non-capturing group by default (#1060) Now clients don't have to decide whether they need parentheses or not. --- beets/util/__init__.py | 9 ++++++--- beetsplug/lyrics.py | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index dd63983b5..a8b13f140 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -681,8 +681,11 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH): def feat_tokens(for_artist=True): - """Returns the tokens to use to detect featuring artists in strings.""" - + """Return a regular expression that matches phrases like "featuring" + that separate a main artist or a song title from secondary artists. + The `for_artist` option determines whether the regex should be + suitable for matching artist fields (the default) or title fields. + """ feat_special_chars = ['&', 'feat.', 'ft.'] feat_words = ['ft', 'featuring', 'feat'] if for_artist: # appending to artist name enables more tokens @@ -691,4 +694,4 @@ def feat_tokens(for_artist=True): if for_artist: regex = r'%s|%s' % \ ('|'.join([re.escape(x) for x in feat_special_chars]), regex) - return regex + return '(?:{0})'.format(regex) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 976aaf5c5..462bf55c0 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -138,7 +138,7 @@ def search_pairs(item): artists = [artist] # Remove any featuring artists from the artists name - pattern = r"(.*?) (%s)" % feat_tokens() + pattern = r"(.*?) {0}".format(feat_tokens()) match = re.search(pattern, artist, re.IGNORECASE) if match: artists.append(match.group(1)) @@ -151,7 +151,7 @@ def search_pairs(item): titles.append(match.group(1)) # Remove any featuring artists from the title - pattern = r"(.*?) (%s)" % feat_tokens(for_artist=False) + pattern = r"(.*?) {0}".format(feat_tokens(for_artist=False)) for title in titles[:]: match = re.search(pattern, title, re.IGNORECASE) if match: From c2c1e7236d29505b7c11a63754a0c3f6bd4ffb1d Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Tue, 16 Dec 2014 11:49:54 +0000 Subject: [PATCH 7/8] Simplify word boundaries (#1060) Use lookahead/lookbehind matching to ensure there is whitespace around the token. Replaces the use of \b, which doesn't work for "ft.", etc. --- beets/util/__init__.py | 11 ++++------- test/test_ftintitle.py | 1 + 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index a8b13f140..188df8cc3 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -686,12 +686,9 @@ def feat_tokens(for_artist=True): The `for_artist` option determines whether the regex should be suitable for matching artist fields (the default) or title fields. """ - feat_special_chars = ['&', 'feat.', 'ft.'] feat_words = ['ft', 'featuring', 'feat'] - if for_artist: # appending to artist name enables more tokens - feat_words += ['with', 'vs', 'and', 'con'] - regex = r'%s' % '|'.join(['\\b%s\\b' % re.escape(x) for x in feat_words]) if for_artist: - regex = r'%s|%s' % \ - ('|'.join([re.escape(x) for x in feat_special_chars]), regex) - return '(?:{0})'.format(regex) + feat_words += ['with', 'vs', 'and', 'con', '&', 'feat.', 'ft.'] + return '(?<=\s)(?:{0})(?=\s)'.format( + '|'.join(re.escape(x) for x in feat_words) + ) diff --git a/test/test_ftintitle.py b/test/test_ftintitle.py index 2637dbaf9..77e416c5a 100644 --- a/test/test_ftintitle.py +++ b/test/test_ftintitle.py @@ -50,6 +50,7 @@ class FtInTitlePluginTest(unittest.TestCase): self.assertTrue(ftintitle.contains_feat('Alice and Bob')) self.assertTrue(ftintitle.contains_feat('Alice With Bob')) self.assertFalse(ftintitle.contains_feat('Alice defeat Bob')) + self.assertFalse(ftintitle.contains_feat('Aliceft.Bob')) def suite(): From af11dedb51efb89458dbdbc8ed5259b020ec95f2 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Tue, 16 Dec 2014 11:53:37 +0000 Subject: [PATCH 8/8] More flexible title splitting (#1060) These tokens should not be artist-exclusive. --- beets/util/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 188df8cc3..54cf423e1 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -686,9 +686,9 @@ def feat_tokens(for_artist=True): The `for_artist` option determines whether the regex should be suitable for matching artist fields (the default) or title fields. """ - feat_words = ['ft', 'featuring', 'feat'] + feat_words = ['ft', 'featuring', 'feat', 'feat.', 'ft.'] if for_artist: - feat_words += ['with', 'vs', 'and', 'con', '&', 'feat.', 'ft.'] + feat_words += ['with', 'vs', 'and', 'con', '&'] return '(?<=\s)(?:{0})(?=\s)'.format( '|'.join(re.escape(x) for x in feat_words) )