Merge pull request #3206 from arcresu/random-util

random: move functionality into beets.random
This commit is contained in:
Adrian Sampson 2019-04-07 11:59:39 -04:00 committed by GitHub
commit 362722c87c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 198 additions and 91 deletions

115
beets/random.py Normal file
View file

@ -0,0 +1,115 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2016, Philippe Mongeau.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Get a random song or album from the library.
"""
from __future__ import division, absolute_import, print_function
import random
from operator import attrgetter
from itertools import groupby
def _length(obj, album):
"""Get the duration of an item or album.
"""
if album:
return sum(i.length for i in obj.items())
else:
return obj.length
def _equal_chance_permutation(objs, field='albumartist', random_gen=None):
"""Generate (lazily) a permutation of the objects where every group
with equal values for `field` have an equal chance of appearing in
any given position.
"""
rand = random_gen or random
# Group the objects by artist so we can sample from them.
key = attrgetter(field)
objs.sort(key=key)
objs_by_artists = {}
for artist, v in groupby(objs, key):
objs_by_artists[artist] = list(v)
# While we still have artists with music to choose from, pick one
# randomly and pick a track from that artist.
while objs_by_artists:
# Choose an artist and an object for that artist, removing
# this choice from the pool.
artist = rand.choice(list(objs_by_artists.keys()))
objs_from_artist = objs_by_artists[artist]
i = rand.randint(0, len(objs_from_artist) - 1)
yield objs_from_artist.pop(i)
# Remove the artist if we've used up all of its objects.
if not objs_from_artist:
del objs_by_artists[artist]
def _take(iter, num):
"""Return a list containing the first `num` values in `iter` (or
fewer, if the iterable ends early).
"""
out = []
for val in iter:
out.append(val)
num -= 1
if num <= 0:
break
return out
def _take_time(iter, secs, album):
"""Return a list containing the first values in `iter`, which should
be Item or Album objects, that add up to the given amount of time in
seconds.
"""
out = []
total_time = 0.0
for obj in iter:
length = _length(obj, album)
if total_time + length <= secs:
out.append(obj)
total_time += length
return out
def random_objs(objs, album, number=1, time=None, equal_chance=False,
random_gen=None):
"""Get a random subset of the provided `objs`.
If `number` is provided, produce that many matches. Otherwise, if
`time` is provided, instead select a list whose total time is close
to that number of minutes. If `equal_chance` is true, give each
artist an equal chance of being included so that artists with more
songs are not represented disproportionately.
"""
rand = random_gen or random
# Permute the objects either in a straightforward way or an
# artist-balanced way.
if equal_chance:
perm = _equal_chance_permutation(objs)
else:
perm = objs
rand.shuffle(perm) # N.B. This shuffles the original list.
# Select objects by time our count.
if time:
return _take_time(perm, time * 60, album)
else:
return _take(perm, number)

View file

@ -19,97 +19,7 @@ from __future__ import division, absolute_import, print_function
from beets.plugins import BeetsPlugin
from beets.ui import Subcommand, decargs, print_
import random
from operator import attrgetter
from itertools import groupby
def _length(obj, album):
"""Get the duration of an item or album.
"""
if album:
return sum(i.length for i in obj.items())
else:
return obj.length
def _equal_chance_permutation(objs, field='albumartist'):
"""Generate (lazily) a permutation of the objects where every group
with equal values for `field` have an equal chance of appearing in
any given position.
"""
# Group the objects by artist so we can sample from them.
key = attrgetter(field)
objs.sort(key=key)
objs_by_artists = {}
for artist, v in groupby(objs, key):
objs_by_artists[artist] = list(v)
# While we still have artists with music to choose from, pick one
# randomly and pick a track from that artist.
while objs_by_artists:
# Choose an artist and an object for that artist, removing
# this choice from the pool.
artist = random.choice(list(objs_by_artists.keys()))
objs_from_artist = objs_by_artists[artist]
i = random.randint(0, len(objs_from_artist) - 1)
yield objs_from_artist.pop(i)
# Remove the artist if we've used up all of its objects.
if not objs_from_artist:
del objs_by_artists[artist]
def _take(iter, num):
"""Return a list containing the first `num` values in `iter` (or
fewer, if the iterable ends early).
"""
out = []
for val in iter:
out.append(val)
num -= 1
if num <= 0:
break
return out
def _take_time(iter, secs, album):
"""Return a list containing the first values in `iter`, which should
be Item or Album objects, that add up to the given amount of time in
seconds.
"""
out = []
total_time = 0.0
for obj in iter:
length = _length(obj, album)
if total_time + length <= secs:
out.append(obj)
total_time += length
return out
def random_objs(objs, album, number=1, time=None, equal_chance=False):
"""Get a random subset of the provided `objs`.
If `number` is provided, produce that many matches. Otherwise, if
`time` is provided, instead select a list whose total time is close
to that number of minutes. If `equal_chance` is true, give each
artist an equal chance of being included so that artists with more
songs are not represented disproportionately.
"""
# Permute the objects either in a straightforward way or an
# artist-balanced way.
if equal_chance:
perm = _equal_chance_permutation(objs)
else:
perm = objs
random.shuffle(perm) # N.B. This shuffles the original list.
# Select objects by time our count.
if time:
return _take_time(perm, time * 60, album)
else:
return _take(perm, number)
from beets.random import random_objs
def random_func(lib, opts, args):

82
test/test_random.py Normal file
View file

@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
# This file is part of beets.
# Copyright 2019, Carl Suster
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Test the beets.random utilities associated with the random plugin.
"""
from __future__ import division, absolute_import, print_function
import unittest
from test.helper import TestHelper
import math
from random import Random
from beets import random
class RandomTest(unittest.TestCase, TestHelper):
def setUp(self):
self.lib = None
self.artist1 = 'Artist 1'
self.artist2 = 'Artist 2'
self.item1 = self.create_item(artist=self.artist1)
self.item2 = self.create_item(artist=self.artist2)
self.items = [self.item1, self.item2]
for _ in range(8):
self.items.append(self.create_item(artist=self.artist2))
self.random_gen = Random()
self.random_gen.seed(12345)
def tearDown(self):
pass
def _stats(self, data):
mean = sum(data) / len(data)
stdev = math.sqrt(
sum((p - mean) ** 2 for p in data) / (len(data) - 1))
quot, rem = divmod(len(data), 2)
if rem:
median = sorted(data)[quot]
else:
median = sum(sorted(data)[quot - 1:quot + 1]) / 2
return mean, stdev, median
def test_equal_permutation(self):
"""We have a list of items where only one item is from artist1 and the
rest are from artist2. If we permute weighted by the artist field then
the solo track will almost always end up near the start. If we use a
different field then it'll be in the middle on average.
"""
def experiment(field, histogram=False):
"""Permutes the list of items 500 times and calculates the position
of self.item1 each time. Returns stats about that position.
"""
positions = []
for _ in range(500):
shuffled = list(random._equal_chance_permutation(
self.items, field=field, random_gen=self.random_gen))
positions.append(shuffled.index(self.item1))
# Print a histogram (useful for debugging).
if histogram:
for i in range(len(self.items)):
print('{:2d} {}'.format(i, '*' * positions.count(i)))
return self._stats(positions)
mean1, stdev1, median1 = experiment('artist')
mean2, stdev2, median2 = experiment('track')
self.assertAlmostEqual(0, median1, delta=1)
self.assertAlmostEqual(len(self.items) // 2, median2, delta=1)
self.assertGreater(stdev2, stdev1)