From 4add189608c19ff70847c763b30f9015adc981a9 Mon Sep 17 00:00:00 2001
From: Fabrice Laporte <kraymer@gmail.com>
Date: Thu, 8 May 2014 01:23:32 +0200
Subject: [PATCH] Add option to extrapolate year buckets names

- spans are now tracked via a list of dicts instead of 2 lists
previously (simpler code)
- extend_year_spans() pregenerates all possible ranges at plugin setup
stage
- a BucketError is now raised if declared bucket format not accepted
---
 beetsplug/bucket.py     | 200 ++++++++++++++++++++++++++++++----------
 docs/plugins/bucket.rst |  18 +++-
 test/test_bucket.py     |  52 ++++++++---
 3 files changed, 205 insertions(+), 65 deletions(-)

diff --git a/beetsplug/bucket.py b/beetsplug/bucket.py
index 5473a4b65..9a6939601 100644
--- a/beetsplug/bucket.py
+++ b/beetsplug/bucket.py
@@ -12,37 +12,152 @@
 # The above copyright notice and this permission notice shall be
 # included in all copies or substantial portions of the Software.
 
-"""Enrich path formatting with %bucket_alpha and %bucket_date functions
+"""Provides %bucket_alpha and %bucket_year functions for path formatting.
 """
 
 from datetime import datetime
 import logging
 import re
 import string
+from itertools import tee, izip
 from beets import plugins
 
 log = logging.getLogger('beets')
 
 
-def extract_years(lst):
-    """Extract years from a list of strings"""
+class BucketError(Exception):
+    pass
 
-    def make_date(s):
-        """Convert string representing a year to int
+
+def pairwise(iterable):
+    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
+    a, b = tee(iterable)
+    next(b, None)
+    return izip(a, b)
+
+
+def span_from_str(span_str):
+    """Build a span dict from the span string representation.
+    """
+
+    def normalize_year(d, yearfrom):
+        """Convert string to a 4 digits year
         """
-        d = int(s)
-        if d < 100:  # two digits imply it is 20th century
-            d = 1900 + d
+        if yearfrom < 100:
+            raise BucketError("Bucket 'from' year %d must be expressed on 4 "
+                              "digits" % yearfrom)
+        # if two digits only, pick closest year that ends by these two
+        # digits starting from yearfrom
+        if d < 100:
+            if (d % 100) < (yearfrom % 100):
+                d = (yearfrom - yearfrom % 100) + 100 + d
+            else:
+                d = (yearfrom - yearfrom % 100) + d
         return d
 
-    res = []
-    for bucket in lst:
-        yearspan_str = re.findall('\d+', bucket)
-        yearspan = [make_date(x) for x in yearspan_str]
-        res.append(yearspan)
+    years = [int(x) for x in re.findall('\d+', span_str)]
+    years = [normalize_year(x, years[0]) for x in years]
+
+    res = {'from': years[0], 'str': span_str}
+    if len(years) > 1:
+        res['to'] = years[-1]
     return res
 
 
+def complete_year_spans(spans):
+    """Set the `to` value of spans if empty and sort them chronologically.
+    """
+    spans.sort(key=lambda x: x['from'])
+    for (x, y) in pairwise(spans):
+        if 'to' not in x:
+            x['to'] = y['from'] - 1
+    if spans and 'to' not in spans[-1]:
+        spans[-1]['to'] = datetime.now().year
+
+
+def extend_year_spans(spans, spanlen, start=1900, end=2014):
+    """Add new spans to given spans list so that every year of [start,end]
+    belongs to a span.
+    """
+    extended_spans = spans[:]
+    for (x, y) in pairwise(spans):
+        # if a gap between two spans, fill the gap with as much spans of
+        # spanlen length as necessary
+        for span_from in range(x['to'] + 1, y['from'], spanlen):
+            extended_spans.append({'from': span_from})
+    # Create spans prior to declared ones
+    for span_from in range(spans[0]['from'] - spanlen, start, -spanlen):
+        extended_spans.append({'from': span_from})
+    # Create spans after the declared ones
+    for span_from in range(spans[-1]['to'] + 1, end, spanlen):
+        extended_spans.append({'from': span_from})
+
+    complete_year_spans(extended_spans)
+    return extended_spans
+
+
+def build_year_spans(year_spans_str):
+    """Build a chronologically ordered list of spans dict from unordered spans
+    stringlist.
+    """
+    spans = []
+    for elem in year_spans_str:
+        spans.append(span_from_str(elem))
+    complete_year_spans(spans)
+    return spans
+
+
+def str2fmt(s):
+    """Deduces formatting syntax from a span string.
+    """
+    regex = re.compile("(?P<bef>\D*)(?P<fromyear>\d+)(?P<sep>\D*)"
+                       "(?P<toyear>\d*)(?P<after>\D*)")
+    m = re.match(regex, s)
+
+    def year_format(year):
+        return '%%0%dd' % len(year)
+
+    res = {'fromnchars': len(m.group('fromyear')),
+           'tonchars': len(m.group('toyear'))}
+    res['fmt'] = "%s%%s%s%s%s" % (m.group('bef'),
+                                  m.group('sep'),
+                                  '%s' if res['tonchars'] else '',
+                                  m.group('after'))
+    return res
+
+
+def format_span(fmt, yearfrom, yearto, fromnchars, tonchars):
+    """Return a span string representation.
+    """
+    args = (str(yearfrom)[-fromnchars:])
+    if tonchars:
+        args = (str(yearfrom)[-fromnchars:], str(yearto)[-tonchars:])
+    return fmt % args
+
+
+def extract_modes(spans):
+    """Extract the most common spans lengths and representation formats
+    """
+    rangelen = sorted([x['to'] - x['from'] + 1 for x in spans])
+    deflen = sorted(rangelen, key=rangelen.count)[-1]
+    reprs = [str2fmt(x['str']) for x in spans]
+    deffmt = sorted(reprs, key=reprs.count)[-1]
+    return deflen, deffmt
+
+
+def build_alpha_spans(alpha_spans_str):
+    """Extract alphanumerics from string and return sorted list of chars
+    [from...to]
+    """
+    spans = []
+    for elem in alpha_spans_str:
+        bucket = sorted([x for x in elem.lower() if x.isalnum()])
+        beginIdx = string.ascii_lowercase.index(bucket[0])
+        endIdx = string.ascii_lowercase.index(bucket[-1])
+        spans.append(string.ascii_lowercase[beginIdx:endIdx + 1])
+    return spans
+
+
 class BucketPlugin(plugins.BeetsPlugin):
     def __init__(self):
         super(BucketPlugin, self).__init__()
@@ -51,56 +166,43 @@ class BucketPlugin(plugins.BeetsPlugin):
         self.config.add({
             'bucket_year': [],
             'bucket_alpha': [],
+            'extrapolate': False
         })
         self.setup()
 
     def setup(self):
         """Setup plugin from config options
         """
-        yearranges = extract_years(self.config['bucket_year'].get())
-        self.yearbounds = sorted([y for ys in yearranges for y in ys])
-        self.yearranges = [self.make_year_range(b) for b in yearranges]
-        self.alpharanges = [self.make_alpha_range(b) for b in
-                            self.config['bucket_alpha'].get()]
+        self.year_spans = build_year_spans(self.config['bucket_year'].get())
+        if self.year_spans and self.config['extrapolate']:
+            [self.ys_len_mode,
+                self.ys_repr_mode] = extract_modes(self.year_spans)
+            self.year_spans = extend_year_spans(self.year_spans,
+                                                self.ys_len_mode)
 
-    def make_year_range(self, ys):
-        """Express year-range as a list of years [from...to].
-           If input year-range only contains the 'from' year, the 'to' is
-           defined as the 'from' year of the next year-range minus one or is
-           set to current year if there is no next year-range.
-        """
-        if len(ys) == 1:  # miss upper bound
-            lb_idx = self.yearbounds.index(ys[0])
-            try:
-                ys.append(self.yearbounds[lb_idx + 1])
-            except:
-                ys.append(datetime.now().year)
-        return range(ys[0], ys[-1] + 1)
+        self.alpha_spans = build_alpha_spans(self.config['bucket_alpha'].get())
 
-    def make_alpha_range(self, s):
-        """Extract alphanumerics from string and return sorted list of chars
-        [from...to]
-        """
-        bucket = sorted([x for x in s.lower() if x.isalnum()])
-        beginIdx = string.ascii_lowercase.index(bucket[0])
-        endIdx = string.ascii_lowercase.index(bucket[-1])
-        return string.ascii_lowercase[beginIdx:endIdx + 1]
-
-    def find_bucket_timerange(self, date):
-        """Return year-range bucket that matches given date or return the date
+    def find_bucket_year(self, year):
+        """Return  bucket that matches given year or return the year
         if no matching bucket.
         """
-        for (i, r) in enumerate(self.yearranges):
-            if int(date) in r:
-                return self.config['bucket_year'].get()[i]
-        return date
+        for ys in self.year_spans:
+            if ys['from'] <= int(year) <= ys['to']:
+                if 'str' in ys:
+                    return ys['str']
+                else:
+                    return format_span(self.ys_repr_mode['fmt'],
+                                       ys['from'], ys['to'],
+                                       self.ys_repr_mode['fromnchars'],
+                                       self.ys_repr_mode['tonchars'])
+        return year
 
     def find_bucket_alpha(self, s):
         """Return alpha-range bucket that matches given string or return the
         string initial if no matching bucket.
         """
-        for (i, r) in enumerate(self.alpharanges):
-            if s.lower()[0] in r:
+        for (i, span) in enumerate(self.alpha_spans):
+            if s.lower()[0] in span:
                 return self.config['bucket_alpha'].get()[i]
         return s[0].upper()
 
@@ -109,7 +211,7 @@ class BucketPlugin(plugins.BeetsPlugin):
             field = 'year'
 
         if field == 'year':
-            func = self.find_bucket_timerange
+            func = self.find_bucket_year
         else:
             func = self.find_bucket_alpha
         return func(text)
diff --git a/docs/plugins/bucket.rst b/docs/plugins/bucket.rst
index 0302ed005..ac1291aa8 100644
--- a/docs/plugins/bucket.rst
+++ b/docs/plugins/bucket.rst
@@ -5,8 +5,8 @@ The ``bucket`` plugin helps you keep a balanced files tree for your library
 by grouping your files into buckets folders representing ranges.
 This kind of files organization is usually used to classify your music by
 periods (eg *1960s*, *1970s* etc), or to divide bloated folders into smaller
-subfolders by grouping albums/artist alphabetically (eg *A-F*, *G-M*, *N-Z*).
-To use plugin, enable it by including ``bucket`` into ``plugins`` line of your
+subfolders by grouping albums/artists alphabetically (eg *A-F*, *G-M*, *N-Z*).
+To use this plugin, enable it by including ``bucket`` into ``plugins`` line of your
 beets config. The plugin provides a template function called ``%bucket`` for
 use in path format expressions::
 
@@ -26,5 +26,17 @@ The ``bucket_year`` parameter is used for all substitutions occuring on the
 The definition of a range is somewhat loose, and multiple formats are allowed :
 
 - for alpha ranges: the range is defined by the lowest and highest (ascii-wise) alphanumeric characters. eg *'ABCD'*, *'A-D'*, *'A->D'*, *[AD]* are equivalent.
-- for year ranges: digits characters are extracted, and in case of doubt XXth century is assumed. eg *'1975-77'*, *'1975,76,77'* and *'1975-1977'* are equivalent. If no upper bound is given, the range is extended to current year (unless a later range is defined). eg *'1975'* encompasses all years from 1975 until now.
+- for year ranges: digits characters are extracted and the two extremes years define the range. eg *'1975-77'*, *'1975,76,77'* and *'1975-1977'* are equivalent. If no upper bound is given, the range is extended to current year (unless a later range is defined). eg *'1975'* encompasses all years from 1975 until now.
+
+If you want to group your files into many small year ranges, you don't have to
+enumerate them all in `bucket_year` parameter but can activate the ``extrapolate``
+option instead. This option will generate year bucket names by reproducing characteristics
+of declared buckets.
+
+    bucket:
+        bucket_year: ['2000-05']
+        extrapolate: true
+
+is enough to make the plugin return a five years range for any input year.
+
 
diff --git a/test/test_bucket.py b/test/test_bucket.py
index b76bd973d..2b4d546c8 100644
--- a/test/test_bucket.py
+++ b/test/test_bucket.py
@@ -29,46 +29,72 @@ class BucketPluginTest(unittest.TestCase, TestHelper):
     def tearDown(self):
         self.teardown_beets()
 
-    def _setup_config(self, bucket_year=[], bucket_alpha=[]):
+    def _setup_config(self, bucket_year=[], bucket_alpha=[],
+                      extrapolate=False):
         config['bucket']['bucket_year'] = bucket_year
         config['bucket']['bucket_alpha'] = bucket_alpha
+        config['bucket']['extrapolate'] = extrapolate
         self.plugin.setup()
 
     def test_year_single_year(self):
-        """If a single year is given, folder represents a range from this year
-        to the next 'from year' of next folder."""
-        self._setup_config(bucket_year=['50', '70'])
-
-        self.assertEqual(self.plugin._tmpl_bucket('1959'), '50')
-        self.assertEqual(self.plugin._tmpl_bucket('1969'), '50')
+        """If a single year is given, range starts from this year and stops at
+        the year preceding the one of next bucket."""
+        self._setup_config(bucket_year=['1950s', '1970s'])
+        self.assertEqual(self.plugin._tmpl_bucket('1959'), '1950s')
+        self.assertEqual(self.plugin._tmpl_bucket('1969'), '1950s')
 
     def test_year_single_year_last_folder(self):
-        """Last folder of a range extends from its year to current year."""
-        self._setup_config(bucket_year=['50', '70'])
-        self.assertEqual(self.plugin._tmpl_bucket('2014'), '70')
+        """If a single year is given for the last bucket, extend it to current
+        year."""
+        self._setup_config(bucket_year=['1950', '1970'])
+        self.assertEqual(self.plugin._tmpl_bucket('2014'), '1970')
         self.assertEqual(self.plugin._tmpl_bucket('2015'), '2015')
 
     def test_year_two_years(self):
-        self._setup_config(bucket_year=['50-59', '1960-69'])
-        self.assertEqual(self.plugin._tmpl_bucket('1959'), '50-59')
+        """Buckets can be named with the 'from-to' syntax."""
+        self._setup_config(bucket_year=['1950-59', '1960-1969'])
+        self.assertEqual(self.plugin._tmpl_bucket('1959'), '1950-59')
+        self.assertEqual(self.plugin._tmpl_bucket('1969'), '1960-1969')
 
     def test_year_multiple_years(self):
+        """Buckets can be named by listing all the years"""
         self._setup_config(bucket_year=['1950,51,52,53'])
         self.assertEqual(self.plugin._tmpl_bucket('1953'), '1950,51,52,53')
         self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974')
 
     def test_year_out_of_range(self):
         """If no range match, return the year"""
-        self._setup_config(bucket_year=['50-59', '1960-69'])
+        self._setup_config(bucket_year=['1950-59', '1960-69'])
         self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974')
         self._setup_config(bucket_year=[])
         self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974')
 
+    def test_year_out_of_range_extrapolate(self):
+        """If no defined range match, extrapolate all ranges using the most
+        common syntax amongst existing buckets and return the matching one."""
+        self._setup_config(bucket_year=['1950-59', '1960-69'],
+                           extrapolate=True)
+        self.assertEqual(self.plugin._tmpl_bucket('1914'), '1910-19')
+        # pick single year format
+        self._setup_config(bucket_year=['1962-81', '2002', '2012'],
+                           extrapolate=True)
+        self.assertEqual(self.plugin._tmpl_bucket('1983'), '1982')
+        # pick from-end format
+        self._setup_config(bucket_year=['1962-81', '2002', '2012-14'],
+                           extrapolate=True)
+        self.assertEqual(self.plugin._tmpl_bucket('1983'), '1982-01')
+        # extrapolate add ranges, but never modifies existing ones
+        self._setup_config(bucket_year=['1932', '1942', '1952', '1962-81',
+                                        '2002'], extrapolate=True)
+        self.assertEqual(self.plugin._tmpl_bucket('1975'), '1962-81')
+
     def test_alpha_all_chars(self):
+        """Alphabet buckets can be named by listing all their chars"""
         self._setup_config(bucket_alpha=['ABCD', 'FGH', 'IJKL'])
         self.assertEqual(self.plugin._tmpl_bucket('garry'), 'FGH')
 
     def test_alpha_first_last_chars(self):
+        """Alphabet buckets can be named by listing the 'from-to' syntax"""
         self._setup_config(bucket_alpha=['A-D', 'F-H', 'I-Z'])
         self.assertEqual(self.plugin._tmpl_bucket('garry'), 'F-H')