diff --git a/beetsplug/bucket.py b/beetsplug/bucket.py new file mode 100644 index 000000000..f31339aa1 --- /dev/null +++ b/beetsplug/bucket.py @@ -0,0 +1,230 @@ +# This file is part of beets. +# Copyright 2014, Fabrice Laporte. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Provides %bucket_alpha and %bucket_year functions for path formatting. +""" + +from datetime import datetime +import logging +import re +import string +from itertools import tee, izip +from beets import plugins, ui + +log = logging.getLogger('beets') + + +class BucketError(Exception): + pass + + +def pairwise(iterable): + "s -> (s0,s1), (s1,s2), (s2, s3), ..." + a, b = tee(iterable) + next(b, None) + return izip(a, b) + + +def span_from_str(span_str): + """Build a span dict from the span string representation. + """ + + def normalize_year(d, yearfrom): + """Convert string to a 4 digits year + """ + if yearfrom < 100: + raise BucketError("%d must be expressed on 4 digits" % yearfrom) + + # if two digits only, pick closest year that ends by these two + # digits starting from yearfrom + if d < 100: + if (d % 100) < (yearfrom % 100): + d = (yearfrom - yearfrom % 100) + 100 + d + else: + d = (yearfrom - yearfrom % 100) + d + return d + + years = [int(x) for x in re.findall('\d+', span_str)] + if not years: + raise ui.UserError("invalid range defined for year bucket '%s': no " + "year found" % span_str) + try: + years = [normalize_year(x, years[0]) for x in years] + except BucketError as exc: + raise ui.UserError("invalid range defined for year bucket '%s': %s" % + (span_str, exc)) + + res = {'from': years[0], 'str': span_str} + if len(years) > 1: + res['to'] = years[-1] + return res + + +def complete_year_spans(spans): + """Set the `to` value of spans if empty and sort them chronologically. + """ + spans.sort(key=lambda x: x['from']) + for (x, y) in pairwise(spans): + if 'to' not in x: + x['to'] = y['from'] - 1 + if spans and 'to' not in spans[-1]: + spans[-1]['to'] = datetime.now().year + + +def extend_year_spans(spans, spanlen, start=1900, end=2014): + """Add new spans to given spans list so that every year of [start,end] + belongs to a span. + """ + extended_spans = spans[:] + for (x, y) in pairwise(spans): + # if a gap between two spans, fill the gap with as much spans of + # spanlen length as necessary + for span_from in range(x['to'] + 1, y['from'], spanlen): + extended_spans.append({'from': span_from}) + # Create spans prior to declared ones + for span_from in range(spans[0]['from'] - spanlen, start, -spanlen): + extended_spans.append({'from': span_from}) + # Create spans after the declared ones + for span_from in range(spans[-1]['to'] + 1, end, spanlen): + extended_spans.append({'from': span_from}) + + complete_year_spans(extended_spans) + return extended_spans + + +def build_year_spans(year_spans_str): + """Build a chronologically ordered list of spans dict from unordered spans + stringlist. + """ + spans = [] + for elem in year_spans_str: + spans.append(span_from_str(elem)) + complete_year_spans(spans) + return spans + + +def str2fmt(s): + """Deduces formatting syntax from a span string. + """ + regex = re.compile("(?P\D*)(?P\d+)(?P\D*)" + "(?P\d*)(?P\D*)") + m = re.match(regex, s) + + def year_format(year): + return '%%0%dd' % len(year) + + res = {'fromnchars': len(m.group('fromyear')), + 'tonchars': len(m.group('toyear'))} + res['fmt'] = "%s%%s%s%s%s" % (m.group('bef'), + m.group('sep'), + '%s' if res['tonchars'] else '', + m.group('after')) + return res + + +def format_span(fmt, yearfrom, yearto, fromnchars, tonchars): + """Return a span string representation. + """ + args = (str(yearfrom)[-fromnchars:]) + if tonchars: + args = (str(yearfrom)[-fromnchars:], str(yearto)[-tonchars:]) + return fmt % args + + +def extract_modes(spans): + """Extract the most common spans lengths and representation formats + """ + rangelen = sorted([x['to'] - x['from'] + 1 for x in spans]) + deflen = sorted(rangelen, key=rangelen.count)[-1] + reprs = [str2fmt(x['str']) for x in spans] + deffmt = sorted(reprs, key=reprs.count)[-1] + return deflen, deffmt + + +def build_alpha_spans(alpha_spans_str): + """Extract alphanumerics from string and return sorted list of chars + [from...to] + """ + spans = [] + ASCII_DIGITS = string.digits + string.ascii_lowercase + for elem in alpha_spans_str: + bucket = sorted([x for x in elem.lower() if x.isalnum()]) + if bucket: + beginIdx = ASCII_DIGITS.index(bucket[0]) + endIdx = ASCII_DIGITS.index(bucket[-1]) + else: + raise ui.UserError("invalid range defined for alpha bucket '%s'" + " : no alphanumeric character found" % + elem) + spans.append(ASCII_DIGITS[beginIdx:endIdx + 1]) + return spans + + +class BucketPlugin(plugins.BeetsPlugin): + def __init__(self): + super(BucketPlugin, self).__init__() + self.template_funcs['bucket'] = self._tmpl_bucket + + self.config.add({ + 'bucket_year': [], + 'bucket_alpha': [], + 'extrapolate': False + }) + self.setup() + + def setup(self): + """Setup plugin from config options + """ + self.year_spans = build_year_spans(self.config['bucket_year'].get()) + if self.year_spans and self.config['extrapolate']: + [self.ys_len_mode, + self.ys_repr_mode] = extract_modes(self.year_spans) + self.year_spans = extend_year_spans(self.year_spans, + self.ys_len_mode) + + self.alpha_spans = build_alpha_spans(self.config['bucket_alpha'].get()) + + def find_bucket_year(self, year): + """Return bucket that matches given year or return the year + if no matching bucket. + """ + for ys in self.year_spans: + if ys['from'] <= int(year) <= ys['to']: + if 'str' in ys: + return ys['str'] + else: + return format_span(self.ys_repr_mode['fmt'], + ys['from'], ys['to'], + self.ys_repr_mode['fromnchars'], + self.ys_repr_mode['tonchars']) + return year + + def find_bucket_alpha(self, s): + """Return alpha-range bucket that matches given string or return the + string initial if no matching bucket. + """ + for (i, span) in enumerate(self.alpha_spans): + if s.lower()[0] in span: + return self.config['bucket_alpha'].get()[i] + return s[0].upper() + + def _tmpl_bucket(self, text, field=None): + if not field and text.isdigit(): + field = 'year' + + if field == 'year': + func = self.find_bucket_year + else: + func = self.find_bucket_alpha + return func(text) diff --git a/docs/changelog.rst b/docs/changelog.rst index fc2a7f1e3..8a68a7302 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,9 @@ Changelog * :doc:`/plugins/ftintitle`: A new option lets you remove featured artists entirely instead of moving them to the title. Thanks to SUTJael. * Item and album queries are much faster. +* The new :doc:`/plugins/bucket` provides a ``bucket()`` function for path + formatting to generate folder names representing alphabetic/years ranges. + Thanks to Fabrice Laporte. Fixes: diff --git a/docs/plugins/bucket.rst b/docs/plugins/bucket.rst new file mode 100644 index 000000000..c4f05d2fe --- /dev/null +++ b/docs/plugins/bucket.rst @@ -0,0 +1,43 @@ +Bucket Plugin +============== + +The ``bucket`` plugin helps you keep a balanced files tree for your library +by grouping your files into buckets folders representing ranges. +This kind of files organization is usually used to classify your music by +periods (eg *1960s*, *1970s* etc), or to divide bloated folders into smaller +subfolders by grouping albums/artists alphabetically (eg *A-F*, *G-M*, *N-Z*). + +To use this plugin, enable it by including ``bucket`` into ``plugins`` line of your +beets config. The plugin provides a template function called ``%bucket`` for +use in path format expressions:: + + paths: + default: /%bucket($year)/%bucket($artist)/$albumartist-$album-$year + +You must then define what ranges representations you allow in the ``bucket:`` +section of the config file:: + + bucket: + bucket_alpha: ['A-F', 'G-M', 'N-Z'] + bucket_year: ['1980s', '1990s', '2000s'] + +The ``bucket_year`` parameter is used for all substitutions occuring on the +``$year`` field, while ``bucket_alpha`` takes care of the others textual fields. + +The definition of a range is somewhat loose, and multiple formats are allowed : + +- for alpha ranges: the range is defined by the lowest and highest (ascii-wise) alphanumeric characters. eg *'ABCD'*, *'A-D'*, *'A->D'*, *[AD]* are equivalent. +- for year ranges: digits characters are extracted and the two extremes years define the range. eg *'1975-77'*, *'1975,76,77'* and *'1975-1977'* are equivalent. If no upper bound is given, the range is extended to current year (unless a later range is defined). eg *'1975'* encompasses all years from 1975 until now. + +If you want to group your files into multiple year ranges, you don't have to +enumerate them all in `bucket_year` parameter but can activate the ``extrapolate`` +option instead. This option will generate year bucket names by reproducing characteristics +of declared buckets:: + + bucket: + bucket_year: ['2000-05'] + extrapolate: true + +is enough to make the plugin return an enclosing five years range for any input year. + + diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst index 05a0b7aef..eab7a69b9 100644 --- a/docs/plugins/index.rst +++ b/docs/plugins/index.rst @@ -58,6 +58,7 @@ by typing ``beet version``. fromfilename ftintitle keyfinder + bucket Autotagger Extensions --------------------- @@ -102,6 +103,8 @@ Path Formats * :doc:`rewrite`: Substitute values in path formats. * :doc:`the`: Move patterns in path formats (i.e., move "a" and "the" to the end). +* :doc:`bucket`: Group your files into bucket directories that cover different + field values ranges. Interoperability ---------------- diff --git a/docs/reference/pathformat.rst b/docs/reference/pathformat.rst index 0108bcc56..26496f293 100644 --- a/docs/reference/pathformat.rst +++ b/docs/reference/pathformat.rst @@ -80,7 +80,7 @@ These functions are built in to beets: .. _strftime: http://docs.python.org/2/library/time.html#time.strftime Plugins can extend beets with more template functions (see -:ref:`writing-plugins`). +:ref:`templ_plugins`). .. _aunique: @@ -158,7 +158,7 @@ Available Values Here's a list of the different values available to path formats. The current list can be found definitively by running the command ``beet fields``. Note that plugins can add new (or replace existing) template values (see -:ref:`writing-plugins`). +:ref:`templ_plugins`). Ordinary metadata: @@ -227,3 +227,24 @@ Library metadata: * mtime: The modification time of the audio file. * added: The date and time that the music was added to your library. + +.. _templ_plugins: + +Template functions and values provided by plugins +------------------------------------------------- + +Remember to activate corresponding plugin before using one of those additional +fields/functions : + +* missing by :doc:`/plugins/missing`: number of missing tracks per album +* ``%bucket{text}`` by :doc:`/plugins/bucket`: substitute a string by the + range it belongs to +* ``%the{text}`` by :doc:`/plugins/the`: moves english articles to end of + strings + +In case you would need a field not mentioned hereabove, +:doc:`/plugins/inline` lets you define template fields in your beets +configuration file using python snippets. +And for more advanced processing, you can go all-in and write a dedicated +plugin to register your own fields/functions (see +:ref:`writing-plugins`). diff --git a/test/test_bucket.py b/test/test_bucket.py new file mode 100644 index 000000000..f8b86bf6d --- /dev/null +++ b/test/test_bucket.py @@ -0,0 +1,136 @@ +# This file is part of beets. +# Copyright 2014, Fabrice Laporte. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Tests for the 'bucket' plugin.""" + +from nose.tools import raises +from _common import unittest +from beetsplug import bucket +from beets import config, ui + +from helper import TestHelper + + +class BucketPluginTest(unittest.TestCase, TestHelper): + def setUp(self): + self.setup_beets() + self.plugin = bucket.BucketPlugin() + + def tearDown(self): + self.teardown_beets() + + def _setup_config(self, bucket_year=[], bucket_alpha=[], + extrapolate=False): + config['bucket']['bucket_year'] = bucket_year + config['bucket']['bucket_alpha'] = bucket_alpha + config['bucket']['extrapolate'] = extrapolate + self.plugin.setup() + + def test_year_single_year(self): + """If a single year is given, range starts from this year and stops at + the year preceding the one of next bucket.""" + self._setup_config(bucket_year=['1950s', '1970s']) + self.assertEqual(self.plugin._tmpl_bucket('1959'), '1950s') + self.assertEqual(self.plugin._tmpl_bucket('1969'), '1950s') + + def test_year_single_year_last_folder(self): + """If a single year is given for the last bucket, extend it to current + year.""" + self._setup_config(bucket_year=['1950', '1970']) + self.assertEqual(self.plugin._tmpl_bucket('2014'), '1970') + self.assertEqual(self.plugin._tmpl_bucket('2015'), '2015') + + def test_year_two_years(self): + """Buckets can be named with the 'from-to' syntax.""" + self._setup_config(bucket_year=['1950-59', '1960-1969']) + self.assertEqual(self.plugin._tmpl_bucket('1959'), '1950-59') + self.assertEqual(self.plugin._tmpl_bucket('1969'), '1960-1969') + + def test_year_multiple_years(self): + """Buckets can be named by listing all the years""" + self._setup_config(bucket_year=['1950,51,52,53']) + self.assertEqual(self.plugin._tmpl_bucket('1953'), '1950,51,52,53') + self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974') + + def test_year_out_of_range(self): + """If no range match, return the year""" + self._setup_config(bucket_year=['1950-59', '1960-69']) + self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974') + self._setup_config(bucket_year=[]) + self.assertEqual(self.plugin._tmpl_bucket('1974'), '1974') + + def test_year_out_of_range_extrapolate(self): + """If no defined range match, extrapolate all ranges using the most + common syntax amongst existing buckets and return the matching one.""" + self._setup_config(bucket_year=['1950-59', '1960-69'], + extrapolate=True) + self.assertEqual(self.plugin._tmpl_bucket('1914'), '1910-19') + # pick single year format + self._setup_config(bucket_year=['1962-81', '2002', '2012'], + extrapolate=True) + self.assertEqual(self.plugin._tmpl_bucket('1983'), '1982') + # pick from-end format + self._setup_config(bucket_year=['1962-81', '2002', '2012-14'], + extrapolate=True) + self.assertEqual(self.plugin._tmpl_bucket('1983'), '1982-01') + # extrapolate add ranges, but never modifies existing ones + self._setup_config(bucket_year=['1932', '1942', '1952', '1962-81', + '2002'], extrapolate=True) + self.assertEqual(self.plugin._tmpl_bucket('1975'), '1962-81') + + def test_alpha_all_chars(self): + """Alphabet buckets can be named by listing all their chars""" + self._setup_config(bucket_alpha=['ABCD', 'FGH', 'IJKL']) + self.assertEqual(self.plugin._tmpl_bucket('garry'), 'FGH') + + def test_alpha_first_last_chars(self): + """Alphabet buckets can be named by listing the 'from-to' syntax""" + self._setup_config(bucket_alpha=['0->9', 'A->D', 'F-H', 'I->Z']) + self.assertEqual(self.plugin._tmpl_bucket('garry'), 'F-H') + self.assertEqual(self.plugin._tmpl_bucket('2pac'), '0->9') + + def test_alpha_out_of_range(self): + """If no range match, return the initial""" + self._setup_config(bucket_alpha=['ABCD', 'FGH', 'IJKL']) + self.assertEqual(self.plugin._tmpl_bucket('errol'), 'E') + self._setup_config(bucket_alpha=[]) + self.assertEqual(self.plugin._tmpl_bucket('errol'), 'E') + + @raises(ui.UserError) + def test_bad_alpha_range_def(self): + """If bad alpha range definition, a UserError is raised""" + self._setup_config(bucket_alpha=['$%']) + self.assertEqual(self.plugin._tmpl_bucket('errol'), 'E') + + @raises(ui.UserError) + def test_bad_year_range_def_no4digits(self): + """If bad year range definition, a UserError is raised. + Range origin must be expressed on 4 digits.""" + self._setup_config(bucket_year=['62-64']) + # from year must be expressed on 4 digits + self.assertEqual(self.plugin._tmpl_bucket('1963'), '62-64') + + @raises(ui.UserError) + def test_bad_year_range_def_nodigits(self): + """If bad year range definition, a UserError is raised. + At least the range origin must be declared.""" + self._setup_config(bucket_year=['nodigits']) + self.assertEqual(self.plugin._tmpl_bucket('1963'), '62-64') + + +def suite(): + return unittest.TestLoader().loadTestsFromName(__name__) + +if __name__ == '__main__': + unittest.main(defaultTest='suite')