initial commit

2025-12-14 04:23:56 +01:00 · 2013-10-27 21:59:33 +01:00 · 2013-10-27 21:59:33 +01:00 · 8f9e35f081
commit 8f9e35f081
parent 887e02c2ae
1 changed files with 188 additions and 0 deletions
--- a/beetsplug/learn.py
+++ b/beetsplug/learn.py
@ -0,0 +1,188 @@
+# This file is part of beets.
+# Copyright 2013, Pedro Silva.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+"""Learn things about a Beets library.
+"""
+import logging
+
+import beets
+
+PLUGIN = 'cluster'
+log = logging.getLogger('beets')
+
+
+def _transform(items, features, kind):
+    import numpy as np
+    import sklearn.preprocessing
+    import sklearn.feature_extraction
+
+    if kind == 'categorical':
+        X = [dict((f, getattr(i, f, np.nan)) for f in features) for i in items]
+        X = sklearn.feature_extraction.DictVectorizer().fit_transform(X)
+    elif kind == 'text':
+        X = [getattr(i, f, np.nan) for f in features for i in items]
+        X = sklearn.feature_extraction.text.TfidfVectorizer().fit_transform(X)
+    elif kind == 'numeric':
+        X = [[getattr(i, f, np.nan) for f in features] for i in items]
+    else:
+        raise Exception('Dont\' know kind of feature %s' % kind)
+
+    return sklearn.preprocessing.Imputer().fit_transform(X)
+
+
+def _fit(X, k):
+    import sklearn.cluster
+    kmeans = sklearn.cluster.MiniBatchKMeans(k).fit(X)
+    return kmeans
+
+
+def _predict(kmeans, X):
+    labels = kmeans.predict(X)
+    return labels
+
+
+def _reduce(X, c):
+    import sklearn.decomposition
+    pca = sklearn.decomposition.PCA(n_components=c).fit_transform(X)
+    return pca
+
+
+def _encode(y, scale):
+    import sklearn.preprocessing
+    labels = sklearn.preprocessing.LabelEncoder().fit_transform(y)
+    if scale:
+        labels = labels.reshape((len(y), 1)).astype(float)
+        labels = sklearn.preprocessing.MinMaxScaler().fit_transform(labels)
+        labels = labels.reshape((len(y),))
+    return labels
+
+
+def _plot(X, groups, savefig):
+    import matplotlib.cm
+    import matplotlib.pyplot
+
+    n_samples, n_features = X.shape
+    fig = matplotlib.pyplot.figure()
+    colors = matplotlib.cm.jet(_encode(groups, True))
+
+    if n_features == 1:
+        matplotlib.pyplot.scatter(xrange(len(X)), X[:, 0], c=colors, s=30)
+    elif n_features == 2:
+        matplotlib.pyplot.scatter(X[:, 0], X[:, 1], c=colors, s=30)
+    elif n_features >= 3:
+        import mpl_toolkits.mplot3d
+        if n_features > 3:
+            X = _reduce(X, 3)
+        ax = fig.add_subplot(111, projection='3d')
+        ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=colors, s=30)
+
+    if savefig:
+        matplotlib.pyplot.savefig(savefig)
+    else:
+        matplotlib.pyplot.show()
+
+
+class LearnPlugin(beets.plugins.BeetsPlugin):
+    '''Learn things about a Beets library.
+    '''
+    def __init__(self):
+        super(LearnPlugin, self).__init__()
+
+        self.config.add({'attributes': []})
+        self.config.add({'clusters': 2})
+        self.config.add({'format': ''})
+        self.config.add({'kind': 'numeric'})
+        self.config.add({'plot': False})
+        self.config.add({'savefig': False})
+        self.config.add({'test': []})
+        self.config.add({'train': []})
+
+        self._command = beets.ui.Subcommand('learn', help=__doc__)
+
+        self._command.parser.add_option('-a', '--attributes',
+                                        action='callback', dest='attributes',
+                                        metavar='LIST',
+                                        callback=beets.ui.vararg_callback,
+                                        help='list of attributes to cluster')
+
+        self._command.parser.add_option('-c', '--clusters',
+                                        action='store', metavar='K',
+                                        type=int,
+                                        help='how many clusters to find')
+
+        self._command.parser.add_option('-f', '--format',
+                                        action='store', type=str,
+                                        help='print with custom format',
+                                        metavar='FMT')
+
+        self._command.parser.add_option('-k', '--kind',
+                                        action='store',
+                                        choices=['numeric',
+                                                 'categorical',
+                                                 'text'],
+                                        help='type of attributes (numeric, \
+                                        categorical numeric]')
+
+        self._command.parser.add_option('-p', '--plot',
+                                        action='store_true',
+                                        help='plot results')
+
+        self._command.parser.add_option('-s', '--savefig',
+                                        action='store',
+                                        help='plot results to file')
+
+        self._command.parser.add_option('-T', '--test',
+                                        action='callback', dest='test',
+                                        metavar='QUERY',
+                                        callback=beets.ui.vararg_callback,
+                                        help='test set query')
+
+        self._command.parser.add_option('-t', '--train',
+                                        action='callback', dest='train',
+                                        metavar='QUERY',
+                                        callback=beets.ui.vararg_callback,
+                                        help='training set query')
+
+    def commands(self):
+        def _learn(lib, opts, args):
+
+            self.config.set_args(opts)
+            kind = self.config['kind'].get(str)
+            features = self.config['attributes'].get(list)
+            fmt = self.config['format'].get(str)
+            k = self.config['clusters'].get(int)
+            plot = self.config['plot'].get(bool)
+            savefig = self.config['savefig'].get(str)
+            test = self.config['test'].get(list)
+            train = self.config['train'].get(list)
+            if not fmt:
+                fmt = '$albumartist - $album - $title'
+            fmt += ' - {0}'
+
+            items = lib.items(beets.ui.decargs(train or args))
+            X = _transform(items, features, kind)
+            kmeans = _fit(X, k)
+
+            items = lib.items(beets.ui.decargs(test or args))
+            X = _transform(items, features, kind)
+            labels = _predict(kmeans, X)
+
+            if plot or savefig:
+                _plot(X, labels, savefig)
+
+            for item, label in zip(items, labels):
+                beets.ui.print_obj(item, lib, fmt=fmt.format(label))
+
+        self._command.func = _learn
+        return [self._command]