mirror of
https://github.com/beetbox/beets.git
synced 2026-01-05 23:43:31 +01:00
Add option to extrapolate year buckets names
- spans are now tracked via a list of dicts instead of 2 lists previously (simpler code) - extend_year_spans() pregenerates all possible ranges at plugin setup stage - a BucketError is now raised if declared bucket format not accepted
This commit is contained in:
parent
509af59d4e
commit
4add189608
3 changed files with 205 additions and 65 deletions
|
|
@ -12,37 +12,152 @@
|
|||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
|
||||
"""Enrich path formatting with %bucket_alpha and %bucket_date functions
|
||||
"""Provides %bucket_alpha and %bucket_year functions for path formatting.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import re
|
||||
import string
|
||||
from itertools import tee, izip
|
||||
from beets import plugins
|
||||
|
||||
log = logging.getLogger('beets')
|
||||
|
||||
|
||||
def extract_years(lst):
|
||||
"""Extract years from a list of strings"""
|
||||
class BucketError(Exception):
|
||||
pass
|
||||
|
||||
def make_date(s):
|
||||
"""Convert string representing a year to int
|
||||
|
||||
def pairwise(iterable):
|
||||
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
|
||||
a, b = tee(iterable)
|
||||
next(b, None)
|
||||
return izip(a, b)
|
||||
|
||||
|
||||
def span_from_str(span_str):
|
||||
"""Build a span dict from the span string representation.
|
||||
"""
|
||||
|
||||
def normalize_year(d, yearfrom):
|
||||
"""Convert string to a 4 digits year
|
||||
"""
|
||||
d = int(s)
|
||||
if d < 100: # two digits imply it is 20th century
|
||||
d = 1900 + d
|
||||
if yearfrom < 100:
|
||||
raise BucketError("Bucket 'from' year %d must be expressed on 4 "
|
||||
"digits" % yearfrom)
|
||||
# if two digits only, pick closest year that ends by these two
|
||||
# digits starting from yearfrom
|
||||
if d < 100:
|
||||
if (d % 100) < (yearfrom % 100):
|
||||
d = (yearfrom - yearfrom % 100) + 100 + d
|
||||
else:
|
||||
d = (yearfrom - yearfrom % 100) + d
|
||||
return d
|
||||
|
||||
res = []
|
||||
for bucket in lst:
|
||||
yearspan_str = re.findall('\d+', bucket)
|
||||
yearspan = [make_date(x) for x in yearspan_str]
|
||||
res.append(yearspan)
|
||||
years = [int(x) for x in re.findall('\d+', span_str)]
|
||||
years = [normalize_year(x, years[0]) for x in years]
|
||||
|
||||
res = {'from': years[0], 'str': span_str}
|
||||
if len(years) > 1:
|
||||
res['to'] = years[-1]
|
||||
return res
|
||||
|
||||
|
||||
def complete_year_spans(spans):
|
||||
"""Set the `to` value of spans if empty and sort them chronologically.
|
||||
"""
|
||||
spans.sort(key=lambda x: x['from'])
|
||||
for (x, y) in pairwise(spans):
|
||||
if 'to' not in x:
|
||||
x['to'] = y['from'] - 1
|
||||
if spans and 'to' not in spans[-1]:
|
||||
spans[-1]['to'] = datetime.now().year
|
||||
|
||||
|
||||
def extend_year_spans(spans, spanlen, start=1900, end=2014):
|
||||
"""Add new spans to given spans list so that every year of [start,end]
|
||||
belongs to a span.
|
||||
"""
|
||||
extended_spans = spans[:]
|
||||
for (x, y) in pairwise(spans):
|
||||
# if a gap between two spans, fill the gap with as much spans of
|
||||
# spanlen length as necessary
|
||||
for span_from in range(x['to'] + 1, y['from'], spanlen):
|
||||
extended_spans.append({'from': span_from})
|
||||
# Create spans prior to declared ones
|
||||
for span_from in range(spans[0]['from'] - spanlen, start, -spanlen):
|
||||
extended_spans.append({'from': span_from})
|
||||
# Create spans after the declared ones
|
||||
for span_from in range(spans[-1]['to'] + 1, end, spanlen):
|
||||
extended_spans.append({'from': span_from})
|
||||
|
||||
complete_year_spans(extended_spans)
|
||||
return extended_spans
|
||||
|
||||
|
||||
def build_year_spans(year_spans_str):
|
||||
"""Build a chronologically ordered list of spans dict from unordered spans
|
||||
stringlist.
|
||||
"""
|
||||
spans = []
|
||||
for elem in year_spans_str:
|
||||
spans.append(span_from_str(elem))
|
||||
complete_year_spans(spans)
|
||||
return spans
|
||||
|
||||
|
||||
def str2fmt(s):
|
||||
"""Deduces formatting syntax from a span string.
|
||||
"""
|
||||
regex = re.compile("(?P<bef>\D*)(?P<fromyear>\d+)(?P<sep>\D*)"
|
||||
"(?P<toyear>\d*)(?P<after>\D*)")
|
||||
m = re.match(regex, s)
|
||||
|
||||
def year_format(year):
|
||||
return '%%0%dd' % len(year)
|
||||
|
||||
res = {'fromnchars': len(m.group('fromyear')),
|
||||
'tonchars': len(m.group('toyear'))}
|
||||
res['fmt'] = "%s%%s%s%s%s" % (m.group('bef'),
|
||||
m.group('sep'),
|
||||
'%s' if res['tonchars'] else '',
|
||||
m.group('after'))
|
||||
return res
|
||||
|
||||
|
||||
def format_span(fmt, yearfrom, yearto, fromnchars, tonchars):
|
||||
"""Return a span string representation.
|
||||
"""
|
||||
args = (str(yearfrom)[-fromnchars:])
|
||||
if tonchars:
|
||||
args = (str(yearfrom)[-fromnchars:], str(yearto)[-tonchars:])
|
||||
return fmt % args
|
||||
|
||||
|
||||
def extract_modes(spans):
|
||||
"""Extract the most common spans lengths and representation formats
|
||||
"""
|
||||
rangelen = sorted([x['to'] - x['from'] + 1 for x in spans])
|
||||
deflen = sorted(rangelen, key=rangelen.count)[-1]
|
||||
reprs = [str2fmt(x['str']) for x in spans]
|
||||
deffmt = sorted(reprs, key=reprs.count)[-1]
|
||||
return deflen, deffmt
|
||||
|
||||
|
||||
def build_alpha_spans(alpha_spans_str):
|
||||
"""Extract alphanumerics from string and return sorted list of chars
|
||||
[from...to]
|
||||
"""
|
||||
spans = []
|
||||
for elem in alpha_spans_str:
|
||||
bucket = sorted([x for x in elem.lower() if x.isalnum()])
|
||||
beginIdx = string.ascii_lowercase.index(bucket[0])
|
||||
endIdx = string.ascii_lowercase.index(bucket[-1])
|
||||
spans.append(string.ascii_lowercase[beginIdx:endIdx + 1])
|
||||
return spans
|
||||
|
||||
|
||||
class BucketPlugin(plugins.BeetsPlugin):
|
||||
def __init__(self):
|
||||
super(BucketPlugin, self).__init__()
|
||||
|
|
@ -51,56 +166,43 @@ class BucketPlugin(plugins.BeetsPlugin):
|
|||
self.config.add({
|
||||
'bucket_year': [],
|
||||
'bucket_alpha': [],
|
||||
'extrapolate': False
|
||||
})
|
||||
self.setup()
|
||||
|
||||
def setup(self):
|
||||
"""Setup plugin from config options
|
||||
"""
|
||||
yearranges = extract_years(self.config['bucket_year'].get())
|
||||
self.yearbounds = sorted([y for ys in yearranges for y in ys])
|
||||
self.yearranges = [self.make_year_range(b) for b in yearranges]
|
||||
self.alpharanges = [self.make_alpha_range(b) for b in
|
||||
self.config['bucket_alpha'].get()]
|
||||
self.year_spans = build_year_spans(self.config['bucket_year'].get())
|
||||
if self.year_spans and self.config['extrapolate']:
|
||||
[self.ys_len_mode,
|
||||
self.ys_repr_mode] = extract_modes(self.year_spans)
|
||||
self.year_spans = extend_year_spans(self.year_spans,
|
||||
self.ys_len_mode)
|
||||
|
||||
def make_year_range(self, ys):
|
||||
"""Express year-range as a list of years [from...to].
|
||||
If input year-range only contains the 'from' year, the 'to' is
|
||||
defined as the 'from' year of the next year-range minus one or is
|
||||
set to current year if there is no next year-range.
|
||||
"""
|
||||
if len(ys) == 1: # miss upper bound
|
||||
lb_idx = self.yearbounds.index(ys[0])
|
||||
try:
|
||||
ys.append(self.yearbounds[lb_idx + 1])
|
||||
except:
|
||||
ys.append(datetime.now().year)
|
||||
return range(ys[0], ys[-1] + 1)
|
||||
self.alpha_spans = build_alpha_spans(self.config['bucket_alpha'].get())
|
||||
|
||||
def make_alpha_range(self, s):
|
||||
"""Extract alphanumerics from string and return sorted list of chars
|
||||
[from...to]
|
||||
"""
|
||||
bucket = sorted([x for x in s.lower() if x.isalnum()])
|
||||
beginIdx = string.ascii_lowercase.index(bucket[0])
|
||||
endIdx = string.ascii_lowercase.index(bucket[-1])
|
||||
return string.ascii_lowercase[beginIdx:endIdx + 1]
|
||||
|
||||
def find_bucket_timerange(self, date):
|
||||
"""Return year-range bucket that matches given date or return the date
|
||||
def find_bucket_year(self, year):
|
||||
"""Return bucket that matches given year or return the year
|
||||
if no matching bucket.
|
||||
"""
|
||||
for (i, r) in enumerate(self.yearranges):
|
||||
if int(date) in r:
|
||||
return self.config['bucket_year'].get()[i]
|
||||
return date
|
||||
for ys in self.year_spans:
|
||||
if ys['from'] <= int(year) <= ys['to']:
|
||||
if 'str' in ys:
|
||||
return ys['str']
|
||||
else:
|
||||
return format_span(self.ys_repr_mode['fmt'],
|
||||
ys['from'], ys['to'],
|
||||
self.ys_repr_mode['fromnchars'],
|
||||
self.ys_repr_mode['tonchars'])
|
||||
return year
|
||||
|
||||
def find_bucket_alpha(self, s):
|
||||
"""Return alpha-range bucket that matches given string or return the
|
||||
string initial if no matching bucket.
|
||||
"""
|
||||
for (i, r) in enumerate(self.alpharanges):
|
||||
if s.lower()[0] in r:
|
||||
for (i, span) in enumerate(self.alpha_spans):
|
||||
if s.lower()[0] in span:
|
||||
return self.config['bucket_alpha'].get()[i]
|
||||
return s[0].upper()
|
||||
|
||||
|
|
@ -109,7 +211,7 @@ class BucketPlugin(plugins.BeetsPlugin):
|
|||
field = 'year'
|
||||
|
||||
if field == 'year':
|
||||
func = self.find_bucket_timerange
|
||||
func = self.find_bucket_year
|
||||
else:
|
||||
func = self.find_bucket_alpha
|
||||
return func(text)
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@ The ``bucket`` plugin helps you keep a balanced files tree for your library
|
|||
by grouping your files into buckets folders representing ranges.
|
||||
This kind of files organization is usually used to classify your music by
|
||||
periods (eg *1960s*, *1970s* etc), or to divide bloated folders into smaller
|
||||
subfolders by grouping albums/artist alphabetically (eg *A-F*, *G-M*, *N-Z*).
|
||||
To use plugin, enable it by including ``bucket`` into ``plugins`` line of your
|
||||
subfolders by grouping albums/artists alphabetically (eg *A-F*, *G-M*, *N-Z*).
|
||||
To use this plugin, enable it by including ``bucket`` into ``plugins`` line of your
|
||||
beets config. The plugin provides a template function called ``%bucket`` for
|
||||
use in path format expressions::
|
||||
|
||||
|
|
@ -26,5 +26,17 @@ The ``bucket_year`` parameter is used for all substitutions occuring on the
|
|||
The definition of a range is somewhat loose, and multiple formats are allowed :
|
||||
|
||||
- for alpha ranges: the range is defined by the lowest and highest (ascii-wise) alphanumeric characters. eg *'ABCD'*, *'A-D'*, *'A->D'*, *[AD]* are equivalent.
|
||||
- for year ranges: digits characters are extracted, and in case of doubt XXth century is assumed. eg *'1975-77'*, *'1975,76,77'* and *'1975-1977'* are equivalent. If no upper bound is given, the range is extended to current year (unless a later range is defined). eg *'1975'* encompasses all years from 1975 until now.
|
||||
- for year ranges: digits characters are extracted and the two extremes years define the range. eg *'1975-77'*, *'1975,76,77'* and *'1975-1977'* are equivalent. If no upper bound is given, the range is extended to current year (unless a later range is defined). eg *'1975'* encompasses all years from 1975 until now.
|
||||
|
||||
If you want to group your files into many small year ranges, you don't have to
|
||||
enumerate them all in `bucket_year` parameter but can activate the ``extrapolate``
|
||||
option instead. This option will generate year bucket names by reproducing characteristics
|
||||
of declared buckets.
|
||||
|
||||
bucket:
|
||||
bucket_year: ['2000-05']
|
||||
extrapolate: true
|
||||
|
||||
is enough to make the plugin return a five years range for any input year.
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -29,46 +29,72 @@ class BucketPluginTest(unittest.TestCase, TestHelper):
|
|||
def tearDown(self):
|
||||
self.teardown_beets()
|
||||
|
||||
def _setup_config(self, bucket_year=[], bucket_alpha=[]):
|
||||
def _setup_config(self, bucket_year=[], bucket_alpha=[],
|
||||
extrapolate=False):
|
||||
config['bucket']['bucket_year'] = bucket_year
|
||||
config['bucket']['bucket_alpha'] = bucket_alpha
|
||||
config['bucket']['extrapolate'] = extrapolate
|
||||
self.plugin.setup()
|
||||
|
||||
def test_year_single_year(self):
|
||||
"""If a single year is given, folder represents a range from this year
|
||||
to the next 'from year' of next folder."""
|
||||
self._setup_config(bucket_year=['50', '70'])
|
||||
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1959'), '50')
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1969'), '50')
|
||||
"""If a single year is given, range starts from this year and stops at
|
||||
the year preceding the one of next bucket."""
|
||||
self._setup_config(bucket_year=['1950s', '1970s'])
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1959'), '1950s')
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1969'), '1950s')
|
||||
|
||||
def test_year_single_year_last_folder(self):
|
||||
"""Last folder of a range extends from its year to current year."""
|
||||
self._setup_config(bucket_year=['50', '70'])
|
||||
self.assertEqual(self.plugin._tmpl_bucket('2014'), '70')
|
||||
"""If a single year is given for the last bucket, extend it to current
|
||||
year."""
|
||||
self._setup_config(bucket_year=['1950', '1970'])
|
||||
self.assertEqual(self.plugin._tmpl_bucket('2014'), '1970')
|
||||
self.assertEqual(self.plugin._tmpl_bucket('2015'), '2015')
|
||||
|
||||
def test_year_two_years(self):
|
||||
self._setup_config(bucket_year=['50-59', '1960-69'])
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1959'), '50-59')
|
||||
"""Buckets can be named with the 'from-to' syntax."""
|
||||
self._setup_config(bucket_year=['1950-59', '1960-1969'])
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1959'), '1950-59')
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1969'), '1960-1969')
|
||||
|
||||
def test_year_multiple_years(self):
|
||||
"""Buckets can be named by listing all the years"""
|
||||
self._setup_config(bucket_year=['1950,51,52,53'])
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1953'), '1950,51,52,53')
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974')
|
||||
|
||||
def test_year_out_of_range(self):
|
||||
"""If no range match, return the year"""
|
||||
self._setup_config(bucket_year=['50-59', '1960-69'])
|
||||
self._setup_config(bucket_year=['1950-59', '1960-69'])
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974')
|
||||
self._setup_config(bucket_year=[])
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974')
|
||||
|
||||
def test_year_out_of_range_extrapolate(self):
|
||||
"""If no defined range match, extrapolate all ranges using the most
|
||||
common syntax amongst existing buckets and return the matching one."""
|
||||
self._setup_config(bucket_year=['1950-59', '1960-69'],
|
||||
extrapolate=True)
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1914'), '1910-19')
|
||||
# pick single year format
|
||||
self._setup_config(bucket_year=['1962-81', '2002', '2012'],
|
||||
extrapolate=True)
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1983'), '1982')
|
||||
# pick from-end format
|
||||
self._setup_config(bucket_year=['1962-81', '2002', '2012-14'],
|
||||
extrapolate=True)
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1983'), '1982-01')
|
||||
# extrapolate add ranges, but never modifies existing ones
|
||||
self._setup_config(bucket_year=['1932', '1942', '1952', '1962-81',
|
||||
'2002'], extrapolate=True)
|
||||
self.assertEqual(self.plugin._tmpl_bucket('1975'), '1962-81')
|
||||
|
||||
def test_alpha_all_chars(self):
|
||||
"""Alphabet buckets can be named by listing all their chars"""
|
||||
self._setup_config(bucket_alpha=['ABCD', 'FGH', 'IJKL'])
|
||||
self.assertEqual(self.plugin._tmpl_bucket('garry'), 'FGH')
|
||||
|
||||
def test_alpha_first_last_chars(self):
|
||||
"""Alphabet buckets can be named by listing the 'from-to' syntax"""
|
||||
self._setup_config(bucket_alpha=['A-D', 'F-H', 'I-Z'])
|
||||
self.assertEqual(self.plugin._tmpl_bucket('garry'), 'F-H')
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue