From 91a998df3cb5b994fcb61f43ab004b33641911cc Mon Sep 17 00:00:00 2001
From: Fabrice Laporte <kraymer@gmail.com>
Date: Sat, 13 Dec 2014 23:34:50 +0100
Subject: [PATCH 1/8] fix #1060

---
 beets/util/__init__.py | 14 ++++++++++++++
 beetsplug/ftintitle.py | 16 ++++++++--------
 beetsplug/lyrics.py    |  7 ++++---
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/beets/util/__init__.py b/beets/util/__init__.py
index 529bbb2f3..56134621d 100644
--- a/beets/util/__init__.py
+++ b/beets/util/__init__.py
@@ -678,3 +678,17 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH):
         return min(res[9], limit)
     else:
         return limit
+
+
+def feat_tokens(extended=False):
+    """Returns the tokens to use to detect featuring artists in strings."""
+
+    FEAT_SPECIAL_CHARS = ['&', 'feat.', 'ft.']
+    FEAT_WORDS = ['ft', 'featuring', 'feat']
+    if extended:
+        FEAT_WORDS += ['with', 'vs', 'and', 'con']
+    regex = r'(%s)' % '|'.join(['\\b%s\\b' % re.escape(x) for x in FEAT_WORDS])
+    if extended:
+        regex = r'(%s|%s)' % \
+            ('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex)
+    return regex
diff --git a/beetsplug/ftintitle.py b/beetsplug/ftintitle.py
index 5de16f69a..3fc1badb7 100644
--- a/beetsplug/ftintitle.py
+++ b/beetsplug/ftintitle.py
@@ -16,7 +16,7 @@
 """
 from beets.plugins import BeetsPlugin
 from beets import ui
-from beets.util import displayable_path
+from beets.util import displayable_path, feat_tokens
 from beets import config
 import logging
 import re
@@ -30,24 +30,24 @@ def split_on_feat(artist):
     artist, which is always a string, and the featuring artist, which
     may be a string or None if none is present.
     """
+    # split on the first "feat".
+    feat_tokens(extended=True).strip('()')
     parts = re.split(
-        r'[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&|vs\.|and',
-        artist,
-        1,  # Only split on the first "feat".
-    )
+        feat_tokens(extended=True).translate(None, '()'),
+        artist, 1, flags=re.IGNORECASE)
     parts = [s.strip() for s in parts]
     if len(parts) == 1:
         return parts[0], None
     else:
-        return parts
+        return tuple(parts)
 
 
 def contains_feat(title):
     """Determine whether the title contains a "featured" marker.
     """
     return bool(re.search(
-        r'[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&',
-        title,
+        feat_tokens(extended=True),
+        title, flags=re.IGNORECASE
     ))
 
 
diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 73821ee22..487628ee6 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -29,6 +29,7 @@ from HTMLParser import HTMLParseError
 from beets.plugins import BeetsPlugin
 from beets import ui
 from beets import config
+from beets.util import feat_tokens
 
 
 # Global logger.
@@ -137,7 +138,7 @@ def search_pairs(item):
     artists = [artist]
 
     # Remove any featuring artists from the artists name
-    pattern = r"(.*?) (&|\b(and|ft|feat(uring)?\b))"
+    pattern = r"(.*?) %s" % feat_tokens(extended=True)
     match = re.search(pattern, artist, re.IGNORECASE)
     if match:
         artists.append(match.group(1))
@@ -150,8 +151,8 @@ def search_pairs(item):
         titles.append(match.group(1))
 
     # Remove any featuring artists from the title
-    pattern = r"(.*?) \b(ft|feat(uring)?)\b"
-    for title in titles:
+    pattern = r"(.*?) %s" % feat_tokens()
+    for title in titles[:]:
         match = re.search(pattern, title, re.IGNORECASE)
         if match:
             titles.append(match.group(1))

From 82de2a55bc0f58f9bf6c2b760ae8501f8c53aed6 Mon Sep 17 00:00:00 2001
From: Fabrice Laporte <kraymer@gmail.com>
Date: Sat, 13 Dec 2014 23:35:59 +0100
Subject: [PATCH 2/8] ftintitle unit tests

---
 test/test_ftintitle.py | 59 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 test/test_ftintitle.py

diff --git a/test/test_ftintitle.py b/test/test_ftintitle.py
new file mode 100644
index 000000000..2637dbaf9
--- /dev/null
+++ b/test/test_ftintitle.py
@@ -0,0 +1,59 @@
+# This file is part of beets.
+# Copyright 2014, Fabrice Laporte.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+"""Tests for the 'ftintitle' plugin."""
+
+from _common import unittest
+from beetsplug import ftintitle
+
+
+class FtInTitlePluginTest(unittest.TestCase):
+    def setUp(self):
+        """Set up configuration"""
+        ftintitle.FtInTitlePlugin()
+
+    def test_split_on_feat(self):
+        parts = ftintitle.split_on_feat('Alice ft. Bob')
+        self.assertEqual(parts, ('Alice', 'Bob'))
+        parts = ftintitle.split_on_feat('Alice feat Bob')
+        self.assertEqual(parts, ('Alice', 'Bob'))
+        parts = ftintitle.split_on_feat('Alice feat. Bob')
+        self.assertEqual(parts, ('Alice', 'Bob'))
+        parts = ftintitle.split_on_feat('Alice featuring Bob')
+        self.assertEqual(parts, ('Alice', 'Bob'))
+        parts = ftintitle.split_on_feat('Alice & Bob')
+        self.assertEqual(parts, ('Alice', 'Bob'))
+        parts = ftintitle.split_on_feat('Alice and Bob')
+        self.assertEqual(parts, ('Alice', 'Bob'))
+        parts = ftintitle.split_on_feat('Alice With Bob')
+        self.assertEqual(parts, ('Alice', 'Bob'))
+        parts = ftintitle.split_on_feat('Alice defeat Bob')
+        self.assertEqual(parts, ('Alice defeat Bob', None))
+
+    def test_contains_feat(self):
+        self.assertTrue(ftintitle.contains_feat('Alice ft. Bob'))
+        self.assertTrue(ftintitle.contains_feat('Alice feat. Bob'))
+        self.assertTrue(ftintitle.contains_feat('Alice feat Bob'))
+        self.assertTrue(ftintitle.contains_feat('Alice featuring Bob'))
+        self.assertTrue(ftintitle.contains_feat('Alice & Bob'))
+        self.assertTrue(ftintitle.contains_feat('Alice and Bob'))
+        self.assertTrue(ftintitle.contains_feat('Alice With Bob'))
+        self.assertFalse(ftintitle.contains_feat('Alice defeat Bob'))
+
+
+def suite():
+    return unittest.TestLoader().loadTestsFromName(__name__)
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')

From b62f15d9d911b84301483c7dc1989ded62c9873e Mon Sep 17 00:00:00 2001
From: Fabrice Laporte <kraymer@gmail.com>
Date: Sun, 14 Dec 2014 22:46:51 +0100
Subject: [PATCH 3/8] feat_tokens: change argument name, fix regex flag

---
 beets/util/__init__.py |  7 ++++---
 beetsplug/ftintitle.py | 12 +++---------
 beetsplug/lyrics.py    |  4 ++--
 3 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/beets/util/__init__.py b/beets/util/__init__.py
index 56134621d..f1ae2793a 100644
--- a/beets/util/__init__.py
+++ b/beets/util/__init__.py
@@ -680,15 +680,16 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH):
         return limit
 
 
-def feat_tokens(extended=False):
+def feat_tokens(for_artist=True):
     """Returns the tokens to use to detect featuring artists in strings."""
 
     FEAT_SPECIAL_CHARS = ['&', 'feat.', 'ft.']
     FEAT_WORDS = ['ft', 'featuring', 'feat']
-    if extended:
+    if for_artist:  # appending to artist name enables more tokens
         FEAT_WORDS += ['with', 'vs', 'and', 'con']
     regex = r'(%s)' % '|'.join(['\\b%s\\b' % re.escape(x) for x in FEAT_WORDS])
-    if extended:
+    if for_artist:
         regex = r'(%s|%s)' % \
             ('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex)
     return regex
+
diff --git a/beetsplug/ftintitle.py b/beetsplug/ftintitle.py
index 3fc1badb7..96d1f54c9 100644
--- a/beetsplug/ftintitle.py
+++ b/beetsplug/ftintitle.py
@@ -31,11 +31,8 @@ def split_on_feat(artist):
     may be a string or None if none is present.
     """
     # split on the first "feat".
-    feat_tokens(extended=True).strip('()')
-    parts = re.split(
-        feat_tokens(extended=True).translate(None, '()'),
-        artist, 1, flags=re.IGNORECASE)
-    parts = [s.strip() for s in parts]
+    regex = re.compile(feat_tokens().translate(None, '()'), re.IGNORECASE)
+    parts = [s.strip() for s in regex.split(artist, 1)]
     if len(parts) == 1:
         return parts[0], None
     else:
@@ -45,10 +42,7 @@ def split_on_feat(artist):
 def contains_feat(title):
     """Determine whether the title contains a "featured" marker.
     """
-    return bool(re.search(
-        feat_tokens(extended=True),
-        title, flags=re.IGNORECASE
-    ))
+    return bool(re.search(feat_tokens(), title, flags=re.IGNORECASE))
 
 
 def update_metadata(item, feat_part, drop_feat):
diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 487628ee6..a90e3d645 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -138,7 +138,7 @@ def search_pairs(item):
     artists = [artist]
 
     # Remove any featuring artists from the artists name
-    pattern = r"(.*?) %s" % feat_tokens(extended=True)
+    pattern = r"(.*?) %s" % feat_tokens()
     match = re.search(pattern, artist, re.IGNORECASE)
     if match:
         artists.append(match.group(1))
@@ -151,7 +151,7 @@ def search_pairs(item):
         titles.append(match.group(1))
 
     # Remove any featuring artists from the title
-    pattern = r"(.*?) %s" % feat_tokens()
+    pattern = r"(.*?) %s" % feat_tokens(for_artist=False)
     for title in titles[:]:
         match = re.search(pattern, title, re.IGNORECASE)
         if match:

From 693e994729f88daebda17242deb1b63f6c486b4d Mon Sep 17 00:00:00 2001
From: Fabrice Laporte <kraymer@gmail.com>
Date: Sun, 14 Dec 2014 22:52:32 +0100
Subject: [PATCH 4/8] fix flake8

---
 beets/util/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/beets/util/__init__.py b/beets/util/__init__.py
index f1ae2793a..234b564af 100644
--- a/beets/util/__init__.py
+++ b/beets/util/__init__.py
@@ -692,4 +692,3 @@ def feat_tokens(for_artist=True):
         regex = r'(%s|%s)' % \
             ('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex)
     return regex
-

From 829b623665b2a2b7f8a8fb4d961dad1db2cfe903 Mon Sep 17 00:00:00 2001
From: Fabrice Laporte <kraymer@gmail.com>
Date: Mon, 15 Dec 2014 22:48:01 +0100
Subject: [PATCH 5/8] remove capturing parentheses

---
 beets/util/__init__.py | 12 ++++++------
 beetsplug/ftintitle.py |  2 +-
 beetsplug/lyrics.py    |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/beets/util/__init__.py b/beets/util/__init__.py
index 234b564af..dd63983b5 100644
--- a/beets/util/__init__.py
+++ b/beets/util/__init__.py
@@ -683,12 +683,12 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH):
 def feat_tokens(for_artist=True):
     """Returns the tokens to use to detect featuring artists in strings."""
 
-    FEAT_SPECIAL_CHARS = ['&', 'feat.', 'ft.']
-    FEAT_WORDS = ['ft', 'featuring', 'feat']
+    feat_special_chars = ['&', 'feat.', 'ft.']
+    feat_words = ['ft', 'featuring', 'feat']
     if for_artist:  # appending to artist name enables more tokens
-        FEAT_WORDS += ['with', 'vs', 'and', 'con']
-    regex = r'(%s)' % '|'.join(['\\b%s\\b' % re.escape(x) for x in FEAT_WORDS])
+        feat_words += ['with', 'vs', 'and', 'con']
+    regex = r'%s' % '|'.join(['\\b%s\\b' % re.escape(x) for x in feat_words])
     if for_artist:
-        regex = r'(%s|%s)' % \
-            ('|'.join([re.escape(x) for x in FEAT_SPECIAL_CHARS]), regex)
+        regex = r'%s|%s' % \
+            ('|'.join([re.escape(x) for x in feat_special_chars]), regex)
     return regex
diff --git a/beetsplug/ftintitle.py b/beetsplug/ftintitle.py
index 96d1f54c9..e83836e0e 100644
--- a/beetsplug/ftintitle.py
+++ b/beetsplug/ftintitle.py
@@ -31,7 +31,7 @@ def split_on_feat(artist):
     may be a string or None if none is present.
     """
     # split on the first "feat".
-    regex = re.compile(feat_tokens().translate(None, '()'), re.IGNORECASE)
+    regex = re.compile(feat_tokens(), re.IGNORECASE)
     parts = [s.strip() for s in regex.split(artist, 1)]
     if len(parts) == 1:
         return parts[0], None
diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index a90e3d645..976aaf5c5 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -138,7 +138,7 @@ def search_pairs(item):
     artists = [artist]
 
     # Remove any featuring artists from the artists name
-    pattern = r"(.*?) %s" % feat_tokens()
+    pattern = r"(.*?) (%s)" % feat_tokens()
     match = re.search(pattern, artist, re.IGNORECASE)
     if match:
         artists.append(match.group(1))
@@ -151,7 +151,7 @@ def search_pairs(item):
         titles.append(match.group(1))
 
     # Remove any featuring artists from the title
-    pattern = r"(.*?) %s" % feat_tokens(for_artist=False)
+    pattern = r"(.*?) (%s)" % feat_tokens(for_artist=False)
     for title in titles[:]:
         match = re.search(pattern, title, re.IGNORECASE)
         if match:

From a984c1aa44c1898b5155a1ce82ab7de6a1ddd7f8 Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Tue, 16 Dec 2014 11:37:40 +0000
Subject: [PATCH 6/8] Use a non-capturing group by default (#1060)

Now clients don't have to decide whether they need parentheses or not.
---
 beets/util/__init__.py | 9 ++++++---
 beetsplug/lyrics.py    | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/beets/util/__init__.py b/beets/util/__init__.py
index dd63983b5..a8b13f140 100644
--- a/beets/util/__init__.py
+++ b/beets/util/__init__.py
@@ -681,8 +681,11 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH):
 
 
 def feat_tokens(for_artist=True):
-    """Returns the tokens to use to detect featuring artists in strings."""
-
+    """Return a regular expression that matches phrases like "featuring"
+    that separate a main artist or a song title from secondary artists.
+    The `for_artist` option determines whether the regex should be
+    suitable for matching artist fields (the default) or title fields.
+    """
     feat_special_chars = ['&', 'feat.', 'ft.']
     feat_words = ['ft', 'featuring', 'feat']
     if for_artist:  # appending to artist name enables more tokens
@@ -691,4 +694,4 @@ def feat_tokens(for_artist=True):
     if for_artist:
         regex = r'%s|%s' % \
             ('|'.join([re.escape(x) for x in feat_special_chars]), regex)
-    return regex
+    return '(?:{0})'.format(regex)
diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 976aaf5c5..462bf55c0 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -138,7 +138,7 @@ def search_pairs(item):
     artists = [artist]
 
     # Remove any featuring artists from the artists name
-    pattern = r"(.*?) (%s)" % feat_tokens()
+    pattern = r"(.*?) {0}".format(feat_tokens())
     match = re.search(pattern, artist, re.IGNORECASE)
     if match:
         artists.append(match.group(1))
@@ -151,7 +151,7 @@ def search_pairs(item):
         titles.append(match.group(1))
 
     # Remove any featuring artists from the title
-    pattern = r"(.*?) (%s)" % feat_tokens(for_artist=False)
+    pattern = r"(.*?) {0}".format(feat_tokens(for_artist=False))
     for title in titles[:]:
         match = re.search(pattern, title, re.IGNORECASE)
         if match:

From c2c1e7236d29505b7c11a63754a0c3f6bd4ffb1d Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Tue, 16 Dec 2014 11:49:54 +0000
Subject: [PATCH 7/8] Simplify word boundaries (#1060)

Use lookahead/lookbehind matching to ensure there is whitespace around the
token. Replaces the use of \b, which doesn't work for "ft.", etc.
---
 beets/util/__init__.py | 11 ++++-------
 test/test_ftintitle.py |  1 +
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/beets/util/__init__.py b/beets/util/__init__.py
index a8b13f140..188df8cc3 100644
--- a/beets/util/__init__.py
+++ b/beets/util/__init__.py
@@ -686,12 +686,9 @@ def feat_tokens(for_artist=True):
     The `for_artist` option determines whether the regex should be
     suitable for matching artist fields (the default) or title fields.
     """
-    feat_special_chars = ['&', 'feat.', 'ft.']
     feat_words = ['ft', 'featuring', 'feat']
-    if for_artist:  # appending to artist name enables more tokens
-        feat_words += ['with', 'vs', 'and', 'con']
-    regex = r'%s' % '|'.join(['\\b%s\\b' % re.escape(x) for x in feat_words])
     if for_artist:
-        regex = r'%s|%s' % \
-            ('|'.join([re.escape(x) for x in feat_special_chars]), regex)
-    return '(?:{0})'.format(regex)
+        feat_words += ['with', 'vs', 'and', 'con', '&', 'feat.', 'ft.']
+    return '(?<=\s)(?:{0})(?=\s)'.format(
+        '|'.join(re.escape(x) for x in feat_words)
+    )
diff --git a/test/test_ftintitle.py b/test/test_ftintitle.py
index 2637dbaf9..77e416c5a 100644
--- a/test/test_ftintitle.py
+++ b/test/test_ftintitle.py
@@ -50,6 +50,7 @@ class FtInTitlePluginTest(unittest.TestCase):
         self.assertTrue(ftintitle.contains_feat('Alice and Bob'))
         self.assertTrue(ftintitle.contains_feat('Alice With Bob'))
         self.assertFalse(ftintitle.contains_feat('Alice defeat Bob'))
+        self.assertFalse(ftintitle.contains_feat('Aliceft.Bob'))
 
 
 def suite():

From af11dedb51efb89458dbdbc8ed5259b020ec95f2 Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Tue, 16 Dec 2014 11:53:37 +0000
Subject: [PATCH 8/8] More flexible title splitting (#1060)

These tokens should not be artist-exclusive.
---
 beets/util/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/beets/util/__init__.py b/beets/util/__init__.py
index 188df8cc3..54cf423e1 100644
--- a/beets/util/__init__.py
+++ b/beets/util/__init__.py
@@ -686,9 +686,9 @@ def feat_tokens(for_artist=True):
     The `for_artist` option determines whether the regex should be
     suitable for matching artist fields (the default) or title fields.
     """
-    feat_words = ['ft', 'featuring', 'feat']
+    feat_words = ['ft', 'featuring', 'feat', 'feat.', 'ft.']
     if for_artist:
-        feat_words += ['with', 'vs', 'and', 'con', '&', 'feat.', 'ft.']
+        feat_words += ['with', 'vs', 'and', 'con', '&']
     return '(?<=\s)(?:{0})(?=\s)'.format(
         '|'.join(re.escape(x) for x in feat_words)
     )