Merge pull request #747 from KraYmer/issue740_buckets

Issue740: introduce bucket plugin to organize files in bucket directories
This commit is contained in:
Fabrice L. 2014-05-10 23:04:28 +02:00
commit c0f7e1fec4
4 changed files with 411 additions and 0 deletions

230
beetsplug/bucket.py Normal file
View file

@ -0,0 +1,230 @@
# This file is part of beets.
# Copyright 2014, Fabrice Laporte.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Provides %bucket_alpha and %bucket_year functions for path formatting.
"""
from datetime import datetime
import logging
import re
import string
from itertools import tee, izip
from beets import plugins, ui
log = logging.getLogger('beets')
class BucketError(Exception):
pass
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = tee(iterable)
next(b, None)
return izip(a, b)
def span_from_str(span_str):
"""Build a span dict from the span string representation.
"""
def normalize_year(d, yearfrom):
"""Convert string to a 4 digits year
"""
if yearfrom < 100:
raise BucketError("%d must be expressed on 4 digits" % yearfrom)
# if two digits only, pick closest year that ends by these two
# digits starting from yearfrom
if d < 100:
if (d % 100) < (yearfrom % 100):
d = (yearfrom - yearfrom % 100) + 100 + d
else:
d = (yearfrom - yearfrom % 100) + d
return d
years = [int(x) for x in re.findall('\d+', span_str)]
if not years:
raise ui.UserError("invalid range defined for year bucket '%s': no "
"year found" % span_str)
try:
years = [normalize_year(x, years[0]) for x in years]
except BucketError as exc:
raise ui.UserError("invalid range defined for year bucket '%s': %s" %
(span_str, exc))
res = {'from': years[0], 'str': span_str}
if len(years) > 1:
res['to'] = years[-1]
return res
def complete_year_spans(spans):
"""Set the `to` value of spans if empty and sort them chronologically.
"""
spans.sort(key=lambda x: x['from'])
for (x, y) in pairwise(spans):
if 'to' not in x:
x['to'] = y['from'] - 1
if spans and 'to' not in spans[-1]:
spans[-1]['to'] = datetime.now().year
def extend_year_spans(spans, spanlen, start=1900, end=2014):
"""Add new spans to given spans list so that every year of [start,end]
belongs to a span.
"""
extended_spans = spans[:]
for (x, y) in pairwise(spans):
# if a gap between two spans, fill the gap with as much spans of
# spanlen length as necessary
for span_from in range(x['to'] + 1, y['from'], spanlen):
extended_spans.append({'from': span_from})
# Create spans prior to declared ones
for span_from in range(spans[0]['from'] - spanlen, start, -spanlen):
extended_spans.append({'from': span_from})
# Create spans after the declared ones
for span_from in range(spans[-1]['to'] + 1, end, spanlen):
extended_spans.append({'from': span_from})
complete_year_spans(extended_spans)
return extended_spans
def build_year_spans(year_spans_str):
"""Build a chronologically ordered list of spans dict from unordered spans
stringlist.
"""
spans = []
for elem in year_spans_str:
spans.append(span_from_str(elem))
complete_year_spans(spans)
return spans
def str2fmt(s):
"""Deduces formatting syntax from a span string.
"""
regex = re.compile("(?P<bef>\D*)(?P<fromyear>\d+)(?P<sep>\D*)"
"(?P<toyear>\d*)(?P<after>\D*)")
m = re.match(regex, s)
def year_format(year):
return '%%0%dd' % len(year)
res = {'fromnchars': len(m.group('fromyear')),
'tonchars': len(m.group('toyear'))}
res['fmt'] = "%s%%s%s%s%s" % (m.group('bef'),
m.group('sep'),
'%s' if res['tonchars'] else '',
m.group('after'))
return res
def format_span(fmt, yearfrom, yearto, fromnchars, tonchars):
"""Return a span string representation.
"""
args = (str(yearfrom)[-fromnchars:])
if tonchars:
args = (str(yearfrom)[-fromnchars:], str(yearto)[-tonchars:])
return fmt % args
def extract_modes(spans):
"""Extract the most common spans lengths and representation formats
"""
rangelen = sorted([x['to'] - x['from'] + 1 for x in spans])
deflen = sorted(rangelen, key=rangelen.count)[-1]
reprs = [str2fmt(x['str']) for x in spans]
deffmt = sorted(reprs, key=reprs.count)[-1]
return deflen, deffmt
def build_alpha_spans(alpha_spans_str):
"""Extract alphanumerics from string and return sorted list of chars
[from...to]
"""
spans = []
ASCII_DIGITS = string.digits + string.ascii_lowercase
for elem in alpha_spans_str:
bucket = sorted([x for x in elem.lower() if x.isalnum()])
if bucket:
beginIdx = ASCII_DIGITS.index(bucket[0])
endIdx = ASCII_DIGITS.index(bucket[-1])
else:
raise ui.UserError("invalid range defined for alpha bucket '%s'"
" : no alphanumeric character found" %
elem)
spans.append(ASCII_DIGITS[beginIdx:endIdx + 1])
return spans
class BucketPlugin(plugins.BeetsPlugin):
def __init__(self):
super(BucketPlugin, self).__init__()
self.template_funcs['bucket'] = self._tmpl_bucket
self.config.add({
'bucket_year': [],
'bucket_alpha': [],
'extrapolate': False
})
self.setup()
def setup(self):
"""Setup plugin from config options
"""
self.year_spans = build_year_spans(self.config['bucket_year'].get())
if self.year_spans and self.config['extrapolate']:
[self.ys_len_mode,
self.ys_repr_mode] = extract_modes(self.year_spans)
self.year_spans = extend_year_spans(self.year_spans,
self.ys_len_mode)
self.alpha_spans = build_alpha_spans(self.config['bucket_alpha'].get())
def find_bucket_year(self, year):
"""Return bucket that matches given year or return the year
if no matching bucket.
"""
for ys in self.year_spans:
if ys['from'] <= int(year) <= ys['to']:
if 'str' in ys:
return ys['str']
else:
return format_span(self.ys_repr_mode['fmt'],
ys['from'], ys['to'],
self.ys_repr_mode['fromnchars'],
self.ys_repr_mode['tonchars'])
return year
def find_bucket_alpha(self, s):
"""Return alpha-range bucket that matches given string or return the
string initial if no matching bucket.
"""
for (i, span) in enumerate(self.alpha_spans):
if s.lower()[0] in span:
return self.config['bucket_alpha'].get()[i]
return s[0].upper()
def _tmpl_bucket(self, text, field=None):
if not field and text.isdigit():
field = 'year'
if field == 'year':
func = self.find_bucket_year
else:
func = self.find_bucket_alpha
return func(text)

42
docs/plugins/bucket.rst Normal file
View file

@ -0,0 +1,42 @@
Bucket Plugin
==============
The ``bucket`` plugin helps you keep a balanced files tree for your library
by grouping your files into buckets folders representing ranges.
This kind of files organization is usually used to classify your music by
periods (eg *1960s*, *1970s* etc), or to divide bloated folders into smaller
subfolders by grouping albums/artists alphabetically (eg *A-F*, *G-M*, *N-Z*).
To use this plugin, enable it by including ``bucket`` into ``plugins`` line of your
beets config. The plugin provides a template function called ``%bucket`` for
use in path format expressions::
paths:
default: /%bucket($year)/%bucket($artist)/$albumartist-$album-$year
You must then define what ranges representations you allow in the ``bucket:``
section of the config file :
bucket:
bucket_alpha: ['A-F', 'G-M', 'N-Z']
bucket_year: ['1980s', '1990s', '2000s']
The ``bucket_year`` parameter is used for all substitutions occuring on the
``$year`` field, while ``bucket_alpha`` takes care of the others textual fields.
The definition of a range is somewhat loose, and multiple formats are allowed :
- for alpha ranges: the range is defined by the lowest and highest (ascii-wise) alphanumeric characters. eg *'ABCD'*, *'A-D'*, *'A->D'*, *[AD]* are equivalent.
- for year ranges: digits characters are extracted and the two extremes years define the range. eg *'1975-77'*, *'1975,76,77'* and *'1975-1977'* are equivalent. If no upper bound is given, the range is extended to current year (unless a later range is defined). eg *'1975'* encompasses all years from 1975 until now.
If you want to group your files into many small year ranges, you don't have to
enumerate them all in `bucket_year` parameter but can activate the ``extrapolate``
option instead. This option will generate year bucket names by reproducing characteristics
of declared buckets.
bucket:
bucket_year: ['2000-05']
extrapolate: true
is enough to make the plugin return an enclosing five years range for any input year.

View file

@ -58,6 +58,7 @@ by typing ``beet version``.
fromfilename
ftintitle
keyfinder
bucket
Autotagger Extensions
---------------------
@ -102,6 +103,8 @@ Path Formats
* :doc:`rewrite`: Substitute values in path formats.
* :doc:`the`: Move patterns in path formats (i.e., move "a" and "the" to the
end).
* :doc:`bucket`: Group your files into bucket directories that cover different
field values ranges.
Interoperability
----------------

136
test/test_bucket.py Normal file
View file

@ -0,0 +1,136 @@
# This file is part of beets.
# Copyright 2014, Fabrice Laporte.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Tests for the 'bucket' plugin."""
from nose.tools import raises
from _common import unittest
from beetsplug import bucket
from beets import config, ui
from helper import TestHelper
class BucketPluginTest(unittest.TestCase, TestHelper):
def setUp(self):
self.setup_beets()
self.plugin = bucket.BucketPlugin()
def tearDown(self):
self.teardown_beets()
def _setup_config(self, bucket_year=[], bucket_alpha=[],
extrapolate=False):
config['bucket']['bucket_year'] = bucket_year
config['bucket']['bucket_alpha'] = bucket_alpha
config['bucket']['extrapolate'] = extrapolate
self.plugin.setup()
def test_year_single_year(self):
"""If a single year is given, range starts from this year and stops at
the year preceding the one of next bucket."""
self._setup_config(bucket_year=['1950s', '1970s'])
self.assertEqual(self.plugin._tmpl_bucket('1959'), '1950s')
self.assertEqual(self.plugin._tmpl_bucket('1969'), '1950s')
def test_year_single_year_last_folder(self):
"""If a single year is given for the last bucket, extend it to current
year."""
self._setup_config(bucket_year=['1950', '1970'])
self.assertEqual(self.plugin._tmpl_bucket('2014'), '1970')
self.assertEqual(self.plugin._tmpl_bucket('2015'), '2015')
def test_year_two_years(self):
"""Buckets can be named with the 'from-to' syntax."""
self._setup_config(bucket_year=['1950-59', '1960-1969'])
self.assertEqual(self.plugin._tmpl_bucket('1959'), '1950-59')
self.assertEqual(self.plugin._tmpl_bucket('1969'), '1960-1969')
def test_year_multiple_years(self):
"""Buckets can be named by listing all the years"""
self._setup_config(bucket_year=['1950,51,52,53'])
self.assertEqual(self.plugin._tmpl_bucket('1953'), '1950,51,52,53')
self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974')
def test_year_out_of_range(self):
"""If no range match, return the year"""
self._setup_config(bucket_year=['1950-59', '1960-69'])
self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974')
self._setup_config(bucket_year=[])
self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974')
def test_year_out_of_range_extrapolate(self):
"""If no defined range match, extrapolate all ranges using the most
common syntax amongst existing buckets and return the matching one."""
self._setup_config(bucket_year=['1950-59', '1960-69'],
extrapolate=True)
self.assertEqual(self.plugin._tmpl_bucket('1914'), '1910-19')
# pick single year format
self._setup_config(bucket_year=['1962-81', '2002', '2012'],
extrapolate=True)
self.assertEqual(self.plugin._tmpl_bucket('1983'), '1982')
# pick from-end format
self._setup_config(bucket_year=['1962-81', '2002', '2012-14'],
extrapolate=True)
self.assertEqual(self.plugin._tmpl_bucket('1983'), '1982-01')
# extrapolate add ranges, but never modifies existing ones
self._setup_config(bucket_year=['1932', '1942', '1952', '1962-81',
'2002'], extrapolate=True)
self.assertEqual(self.plugin._tmpl_bucket('1975'), '1962-81')
def test_alpha_all_chars(self):
"""Alphabet buckets can be named by listing all their chars"""
self._setup_config(bucket_alpha=['ABCD', 'FGH', 'IJKL'])
self.assertEqual(self.plugin._tmpl_bucket('garry'), 'FGH')
def test_alpha_first_last_chars(self):
"""Alphabet buckets can be named by listing the 'from-to' syntax"""
self._setup_config(bucket_alpha=['0->9', 'A->D', 'F-H', 'I->Z'])
self.assertEqual(self.plugin._tmpl_bucket('garry'), 'F-H')
self.assertEqual(self.plugin._tmpl_bucket('2pac'), '0->9')
def test_alpha_out_of_range(self):
"""If no range match, return the initial"""
self._setup_config(bucket_alpha=['ABCD', 'FGH', 'IJKL'])
self.assertEqual(self.plugin._tmpl_bucket('errol'), 'E')
self._setup_config(bucket_alpha=[])
self.assertEqual(self.plugin._tmpl_bucket('errol'), 'E')
@raises(ui.UserError)
def test_bad_alpha_range_def(self):
"""If bad alpha range definition, a UserError is raised"""
self._setup_config(bucket_alpha=['$%'])
self.assertEqual(self.plugin._tmpl_bucket('errol'), 'E')
@raises(ui.UserError)
def test_bad_year_range_def_no4digits(self):
"""If bad year range definition, a UserError is raised.
Range origin must be expressed on 4 digits."""
self._setup_config(bucket_year=['62-64'])
# from year must be expressed on 4 digits
self.assertEqual(self.plugin._tmpl_bucket('1963'), '62-64')
@raises(ui.UserError)
def test_bad_year_range_def_nodigits(self):
"""If bad year range definition, a UserError is raised.
At least the range origin must be declared."""
self._setup_config(bucket_year=['nodigits'])
self.assertEqual(self.plugin._tmpl_bucket('1963'), '62-64')
def suite():
return unittest.TestLoader().loadTestsFromName(__name__)
if __name__ == '__main__':
unittest.main(defaultTest='suite')