Merge pull request #3478 from ybnd/parallel-replaygain

Implement parallel replaygain analysis
This commit is contained in:
Adrian Sampson 2020-12-14 17:41:11 -05:00 committed by GitHub
commit 8645f56512
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 274 additions and 45 deletions

View file

@ -24,12 +24,17 @@ import warnings
import enum import enum
import re import re
import xml.parsers.expat import xml.parsers.expat
from six.moves import zip from six.moves import zip, queue
import six
from multiprocessing.pool import ThreadPool, RUN
from threading import Thread, Event
import signal
from beets import ui from beets import ui
from beets.plugins import BeetsPlugin from beets.plugins import BeetsPlugin
from beets.util import (syspath, command_output, bytestring_path, from beets.util import (syspath, command_output, bytestring_path,
displayable_path, py3_path) displayable_path, py3_path, cpu_count)
# Utilities. # Utilities.
@ -110,6 +115,8 @@ class Backend(object):
"""An abstract class representing engine for calculating RG values. """An abstract class representing engine for calculating RG values.
""" """
do_parallel = False
def __init__(self, config, log): def __init__(self, config, log):
"""Initialize the backend with the configuration view for the """Initialize the backend with the configuration view for the
plugin. plugin.
@ -141,6 +148,8 @@ class Bs1770gainBackend(Backend):
-18: "replaygain", -18: "replaygain",
} }
do_parallel = True
def __init__(self, config, log): def __init__(self, config, log):
super(Bs1770gainBackend, self).__init__(config, log) super(Bs1770gainBackend, self).__init__(config, log)
config.add({ config.add({
@ -352,8 +361,7 @@ class Bs1770gainBackend(Backend):
except xml.parsers.expat.ExpatError: except xml.parsers.expat.ExpatError:
raise ReplayGainError( raise ReplayGainError(
u'The bs1770gain tool produced malformed XML. ' u'The bs1770gain tool produced malformed XML. '
'Using version >=0.4.10 may solve this problem.' u'Using version >=0.4.10 may solve this problem.')
)
if len(per_file_gain) != len(path_list): if len(per_file_gain) != len(path_list):
raise ReplayGainError( raise ReplayGainError(
@ -378,6 +386,9 @@ class Bs1770gainBackend(Backend):
class FfmpegBackend(Backend): class FfmpegBackend(Backend):
"""A replaygain backend using ffmpeg's ebur128 filter. """A replaygain backend using ffmpeg's ebur128 filter.
""" """
do_parallel = True
def __init__(self, config, log): def __init__(self, config, log):
super(FfmpegBackend, self).__init__(config, log) super(FfmpegBackend, self).__init__(config, log)
self._ffmpeg_path = "ffmpeg" self._ffmpeg_path = "ffmpeg"
@ -620,6 +631,7 @@ class FfmpegBackend(Backend):
# mpgain/aacgain CLI tool backend. # mpgain/aacgain CLI tool backend.
class CommandBackend(Backend): class CommandBackend(Backend):
do_parallel = True
def __init__(self, config, log): def __init__(self, config, log):
super(CommandBackend, self).__init__(config, log) super(CommandBackend, self).__init__(config, log)
@ -748,7 +760,6 @@ class CommandBackend(Backend):
# GStreamer-based backend. # GStreamer-based backend.
class GStreamerBackend(Backend): class GStreamerBackend(Backend):
def __init__(self, config, log): def __init__(self, config, log):
super(GStreamerBackend, self).__init__(config, log) super(GStreamerBackend, self).__init__(config, log)
self._import_gst() self._import_gst()
@ -1168,6 +1179,33 @@ class AudioToolsBackend(Backend):
) )
class ExceptionWatcher(Thread):
"""Monitors a queue for exceptions asynchronously.
Once an exception occurs, raise it and execute a callback.
"""
def __init__(self, queue, callback):
self._queue = queue
self._callback = callback
self._stopevent = Event()
Thread.__init__(self)
def run(self):
while not self._stopevent.is_set():
try:
exc = self._queue.get_nowait()
self._callback()
six.reraise(exc[0], exc[1], exc[2])
except queue.Empty:
# No exceptions yet, loop back to check
# whether `_stopevent` is set
pass
def join(self, timeout=None):
self._stopevent.set()
Thread.join(self, timeout)
# Main plugin logic. # Main plugin logic.
class ReplayGainPlugin(BeetsPlugin): class ReplayGainPlugin(BeetsPlugin):
@ -1195,6 +1233,7 @@ class ReplayGainPlugin(BeetsPlugin):
'overwrite': False, 'overwrite': False,
'auto': True, 'auto': True,
'backend': u'command', 'backend': u'command',
'threads': cpu_count(),
'per_disc': False, 'per_disc': False,
'peak': 'true', 'peak': 'true',
'targetlevel': 89, 'targetlevel': 89,
@ -1204,12 +1243,15 @@ class ReplayGainPlugin(BeetsPlugin):
self.overwrite = self.config['overwrite'].get(bool) self.overwrite = self.config['overwrite'].get(bool)
self.per_disc = self.config['per_disc'].get(bool) self.per_disc = self.config['per_disc'].get(bool)
backend_name = self.config['backend'].as_str()
if backend_name not in self.backends: # Remember which backend is used for CLI feedback
self.backend_name = self.config['backend'].as_str()
if self.backend_name not in self.backends:
raise ui.UserError( raise ui.UserError(
u"Selected ReplayGain backend {0} is not supported. " u"Selected ReplayGain backend {0} is not supported. "
u"Please select one of: {1}".format( u"Please select one of: {1}".format(
backend_name, self.backend_name,
u', '.join(self.backends.keys()) u', '.join(self.backends.keys())
) )
) )
@ -1226,13 +1268,15 @@ class ReplayGainPlugin(BeetsPlugin):
# On-import analysis. # On-import analysis.
if self.config['auto']: if self.config['auto']:
self.register_listener('import_begin', self.import_begin)
self.register_listener('import', self.import_end)
self.import_stages = [self.imported] self.import_stages = [self.imported]
# Formats to use R128. # Formats to use R128.
self.r128_whitelist = self.config['r128'].as_str_seq() self.r128_whitelist = self.config['r128'].as_str_seq()
try: try:
self.backend_instance = self.backends[backend_name]( self.backend_instance = self.backends[self.backend_name](
self.config, self._log self.config, self._log
) )
except (ReplayGainError, FatalReplayGainError) as e: except (ReplayGainError, FatalReplayGainError) as e:
@ -1264,30 +1308,40 @@ class ReplayGainPlugin(BeetsPlugin):
(not item.rg_album_gain or not item.rg_album_peak) (not item.rg_album_gain or not item.rg_album_peak)
for item in album.items()]) for item in album.items()])
def _store(self, item):
"""Store an item to the database.
When testing, item.store() sometimes fails non-destructively with
sqlite.OperationalError.
This method is here to be patched to a retry-once helper function
in test_replaygain.py, so that it can still fail appropriately
outside of these tests.
"""
item.store()
def store_track_gain(self, item, track_gain): def store_track_gain(self, item, track_gain):
item.rg_track_gain = track_gain.gain item.rg_track_gain = track_gain.gain
item.rg_track_peak = track_gain.peak item.rg_track_peak = track_gain.peak
item.store() self._store(item)
self._log.debug(u'applied track gain {0} LU, peak {1} of FS', self._log.debug(u'applied track gain {0} LU, peak {1} of FS',
item.rg_track_gain, item.rg_track_peak) item.rg_track_gain, item.rg_track_peak)
def store_album_gain(self, item, album_gain): def store_album_gain(self, item, album_gain):
item.rg_album_gain = album_gain.gain item.rg_album_gain = album_gain.gain
item.rg_album_peak = album_gain.peak item.rg_album_peak = album_gain.peak
item.store() self._store(item)
self._log.debug(u'applied album gain {0} LU, peak {1} of FS', self._log.debug(u'applied album gain {0} LU, peak {1} of FS',
item.rg_album_gain, item.rg_album_peak) item.rg_album_gain, item.rg_album_peak)
def store_track_r128_gain(self, item, track_gain): def store_track_r128_gain(self, item, track_gain):
item.r128_track_gain = track_gain.gain item.r128_track_gain = track_gain.gain
item.store() self._store(item)
self._log.debug(u'applied r128 track gain {0} LU', self._log.debug(u'applied r128 track gain {0} LU',
item.r128_track_gain) item.r128_track_gain)
def store_album_r128_gain(self, item, album_gain): def store_album_r128_gain(self, item, album_gain):
item.r128_album_gain = album_gain.gain item.r128_album_gain = album_gain.gain
item.store() self._store(item)
self._log.debug(u'applied r128 album gain {0} LU', self._log.debug(u'applied r128 album gain {0} LU',
item.r128_album_gain) item.r128_album_gain)
@ -1322,8 +1376,6 @@ class ReplayGainPlugin(BeetsPlugin):
self._log.info(u'Skipping album {0}', album) self._log.info(u'Skipping album {0}', album)
return return
self._log.info(u'analyzing {0}', album)
if (any([self.should_use_r128(item) for item in album.items()]) and not if (any([self.should_use_r128(item) for item in album.items()]) and not
all(([self.should_use_r128(item) for item in album.items()]))): all(([self.should_use_r128(item) for item in album.items()]))):
self._log.error( self._log.error(
@ -1331,6 +1383,8 @@ class ReplayGainPlugin(BeetsPlugin):
album) album)
return return
self._log.info(u'analyzing {0}', album)
tag_vals = self.tag_specific_values(album.items()) tag_vals = self.tag_specific_values(album.items())
store_track_gain, store_album_gain, target_level, peak = tag_vals store_track_gain, store_album_gain, target_level, peak = tag_vals
@ -1344,21 +1398,35 @@ class ReplayGainPlugin(BeetsPlugin):
discs[1] = album.items() discs[1] = album.items()
for discnumber, items in discs.items(): for discnumber, items in discs.items():
try: def _store_album(album_gain):
album_gain = self.backend_instance.compute_album_gain( if not album_gain or not album_gain.album_gain \
items, target_level, peak or len(album_gain.track_gains) != len(items):
) # In some cases, backends fail to produce a valid
if len(album_gain.track_gains) != len(items): # `album_gain` without throwing FatalReplayGainError
# => raise non-fatal exception & continue
raise ReplayGainError( raise ReplayGainError(
u"ReplayGain backend failed " u"ReplayGain backend `{}` failed "
u"for some tracks in album {0}".format(album) u"for some tracks in album {}"
.format(self.backend_name, album)
) )
for item, track_gain in zip(items,
for item, track_gain in zip(items, album_gain.track_gains): album_gain.track_gains):
store_track_gain(item, track_gain) store_track_gain(item, track_gain)
store_album_gain(item, album_gain.album_gain) store_album_gain(item, album_gain.album_gain)
if write: if write:
item.try_write() item.try_write()
self._log.debug(u'done analyzing {0}', item)
try:
self._apply(
self.backend_instance.compute_album_gain, args=(),
kwds={
"items": [i for i in items],
"target_level": target_level,
"peak": peak
},
callback=_store_album
)
except ReplayGainError as e: except ReplayGainError as e:
self._log.info(u"ReplayGain error: {0}", e) self._log.info(u"ReplayGain error: {0}", e)
except FatalReplayGainError as e: except FatalReplayGainError as e:
@ -1376,28 +1444,121 @@ class ReplayGainPlugin(BeetsPlugin):
self._log.info(u'Skipping track {0}', item) self._log.info(u'Skipping track {0}', item)
return return
self._log.info(u'analyzing {0}', item)
tag_vals = self.tag_specific_values([item]) tag_vals = self.tag_specific_values([item])
store_track_gain, store_album_gain, target_level, peak = tag_vals store_track_gain, store_album_gain, target_level, peak = tag_vals
try: def _store_track(track_gains):
track_gains = self.backend_instance.compute_track_gain( if not track_gains or len(track_gains) != 1:
[item], target_level, peak # In some cases, backends fail to produce a valid
) # `track_gains` without throwing FatalReplayGainError
if len(track_gains) != 1: # => raise non-fatal exception & continue
raise ReplayGainError( raise ReplayGainError(
u"ReplayGain backend failed for track {0}".format(item) u"ReplayGain backend `{}` failed for track {}"
.format(self.backend_name, item)
) )
store_track_gain(item, track_gains[0]) store_track_gain(item, track_gains[0])
if write: if write:
item.try_write() item.try_write()
self._log.debug(u'done analyzing {0}', item)
try:
self._apply(
self.backend_instance.compute_track_gain, args=(),
kwds={
"items": [item],
"target_level": target_level,
"peak": peak,
},
callback=_store_track
)
except ReplayGainError as e: except ReplayGainError as e:
self._log.info(u"ReplayGain error: {0}", e) self._log.info(u"ReplayGain error: {0}", e)
except FatalReplayGainError as e: except FatalReplayGainError as e:
raise ui.UserError( raise ui.UserError(u"Fatal replay gain error: {0}".format(e))
u"Fatal replay gain error: {0}".format(e))
def _has_pool(self):
"""Check whether a `ThreadPool` is running instance in `self.pool`
"""
if hasattr(self, 'pool'):
if isinstance(self.pool, ThreadPool) and self.pool._state == RUN:
return True
return False
def open_pool(self, threads):
"""Open a `ThreadPool` instance in `self.pool`
"""
if not self._has_pool() and self.backend_instance.do_parallel:
self.pool = ThreadPool(threads)
self.exc_queue = queue.Queue()
signal.signal(signal.SIGINT, self._interrupt)
self.exc_watcher = ExceptionWatcher(
self.exc_queue, # threads push exceptions here
self.terminate_pool # abort once an exception occurs
)
self.exc_watcher.start()
def _apply(self, func, args, kwds, callback):
if self._has_pool():
def catch_exc(func, exc_queue, log):
"""Wrapper to catch raised exceptions in threads
"""
def wfunc(*args, **kwargs):
try:
return func(*args, **kwargs)
except ReplayGainError as e:
log.info(e.args[0]) # log non-fatal exceptions
except Exception:
exc_queue.put(sys.exc_info())
return wfunc
# Wrap function and callback to catch exceptions
func = catch_exc(func, self.exc_queue, self._log)
callback = catch_exc(callback, self.exc_queue, self._log)
self.pool.apply_async(func, args, kwds, callback)
else:
callback(func(*args, **kwds))
def terminate_pool(self):
"""Terminate the `ThreadPool` instance in `self.pool`
(e.g. stop execution in case of exception)
"""
# Don't call self._as_pool() here,
# self.pool._state may not be == RUN
if hasattr(self, 'pool') and isinstance(self.pool, ThreadPool):
self.pool.terminate()
self.pool.join()
# self.exc_watcher.join()
def _interrupt(self, signal, frame):
try:
self._log.info('interrupted')
self.terminate_pool()
exit(0)
except SystemExit:
# Silence raised SystemExit ~ exit(0)
pass
def close_pool(self):
"""Close the `ThreadPool` instance in `self.pool` (if there is one)
"""
if self._has_pool():
self.pool.close()
self.pool.join()
self.exc_watcher.join()
def import_begin(self, session):
"""Handle `import_begin` event -> open pool
"""
self.open_pool(self.config['threads'].get(int))
def import_end(self, paths):
"""Handle `import` event -> close pool
"""
self.close_pool()
def imported(self, session, task): def imported(self, session, task):
"""Add replay gain info to items or albums of ``task``. """Add replay gain info to items or albums of ``task``.
@ -1411,19 +1572,44 @@ class ReplayGainPlugin(BeetsPlugin):
"""Return the "replaygain" ui subcommand. """Return the "replaygain" ui subcommand.
""" """
def func(lib, opts, args): def func(lib, opts, args):
write = ui.should_write(opts.write) try:
force = opts.force write = ui.should_write(opts.write)
force = opts.force
if opts.album: # Bypass self.open_pool() if called with `--threads 0`
for album in lib.albums(ui.decargs(args)): if opts.threads != 0:
self.handle_album(album, write, force) threads = opts.threads or self.config['threads'].get(int)
self.open_pool(threads)
else: if opts.album:
for item in lib.items(ui.decargs(args)): albums = lib.albums(ui.decargs(args))
self.handle_track(item, write, force) self._log.info(
"Analyzing {} albums ~ {} backend..."
.format(len(albums), self.backend_name)
)
for album in albums:
self.handle_album(album, write, force)
else:
items = lib.items(ui.decargs(args))
self._log.info(
"Analyzing {} tracks ~ {} backend..."
.format(len(items), self.backend_name)
)
for item in items:
self.handle_track(item, write, force)
self.close_pool()
except (SystemExit, KeyboardInterrupt):
# Silence interrupt exceptions
pass
cmd = ui.Subcommand('replaygain', help=u'analyze for ReplayGain') cmd = ui.Subcommand('replaygain', help=u'analyze for ReplayGain')
cmd.parser.add_album_option() cmd.parser.add_album_option()
cmd.parser.add_option(
"-t", "--threads", dest="threads", type=int,
help=u'change the number of threads, \
defaults to maximum available processors'
)
cmd.parser.add_option( cmd.parser.add_option(
"-f", "--force", dest="force", action="store_true", default=False, "-f", "--force", dest="force", action="store_true", default=False,
help=u"analyze all files, including those that " help=u"analyze all files, including those that "

View file

@ -169,6 +169,9 @@ New features:
https://github.com/alastair/python-musicbrainzngs/pull/247 and https://github.com/alastair/python-musicbrainzngs/pull/247 and
https://github.com/alastair/python-musicbrainzngs/pull/266 . https://github.com/alastair/python-musicbrainzngs/pull/266 .
Thanks to :user:`aereaux`. Thanks to :user:`aereaux`.
* :doc:`/plugins/replaygain` now does its analysis in parallel when using
the ``command``, ``ffmpeg`` or ``bs1770gain`` backends.
:bug:`3478`
Fixes: Fixes:

View file

@ -13,12 +13,16 @@ Installation
This plugin can use one of many backends to compute the ReplayGain values: This plugin can use one of many backends to compute the ReplayGain values:
GStreamer, mp3gain (and its cousin, aacgain), Python Audio Tools or ffmpeg. GStreamer, mp3gain (and its cousin, aacgain), Python Audio Tools or ffmpeg.
ffmpeg and mp3gain can be easier to install. mp3gain supports less audio formats ffmpeg and mp3gain can be easier to install. mp3gain supports less audio formats
then the other backend. than the other backend.
Once installed, this plugin analyzes all files during the import process. This Once installed, this plugin analyzes all files during the import process. This
can be a slow process; to instead analyze after the fact, disable automatic can be a slow process; to instead analyze after the fact, disable automatic
analysis and use the ``beet replaygain`` command (see below). analysis and use the ``beet replaygain`` command (see below).
To speed up analysis with some of the avalaible backends, this plugin processes
tracks or albums (when using the ``-a`` option) in parallel. By default,
a single thread is used per logical core of your CPU.
GStreamer GStreamer
````````` `````````
@ -35,6 +39,8 @@ the GStreamer backend by adding this to your configuration file::
replaygain: replaygain:
backend: gstreamer backend: gstreamer
The GStreamer backend does not support parallel analysis.
mp3gain and aacgain mp3gain and aacgain
``````````````````` ```````````````````
@ -73,6 +79,8 @@ On OS X, most of the dependencies can be installed with `Homebrew`_::
brew install mpg123 mp3gain vorbisgain faad2 libvorbis brew install mpg123 mp3gain vorbisgain faad2 libvorbis
The Python Audio Tools backend does not support parallel analysis.
.. _Python Audio Tools: http://audiotools.sourceforge.net .. _Python Audio Tools: http://audiotools.sourceforge.net
ffmpeg ffmpeg
@ -92,6 +100,9 @@ configuration file. The available options are:
- **auto**: Enable ReplayGain analysis during import. - **auto**: Enable ReplayGain analysis during import.
Default: ``yes``. Default: ``yes``.
- **threads**: The number of parallel threads to run the analysis in. Overridden
by ``--threads`` at the command line.
Default: # of logical CPU cores
- **backend**: The analysis backend; either ``gstreamer``, ``command``, ``audiotools`` - **backend**: The analysis backend; either ``gstreamer``, ``command``, ``audiotools``
or ``ffmpeg``. or ``ffmpeg``.
Default: ``command``. Default: ``command``.
@ -143,8 +154,15 @@ whether ReplayGain tags are written into the music files, or stored in the
beets database only (the default is to use :ref:`the importer's configuration beets database only (the default is to use :ref:`the importer's configuration
<config-import-write>`). <config-import-write>`).
To execute with a different number of threads, call ``beet replaygain --threads N``::
$ beet replaygain --threads N [-Waf] [QUERY]
with N any integer. To disable parallelism, use ``--threads 0``.
ReplayGain analysis is not fast, so you may want to disable it during import. ReplayGain analysis is not fast, so you may want to disable it during import.
Use the ``auto`` config option to control this:: Use the ``auto`` config option to control this::
replaygain: replaygain:
auto: no auto: no

View file

@ -22,11 +22,15 @@ import six
from mock import patch from mock import patch
from test.helper import TestHelper, capture_log, has_program from test.helper import TestHelper, capture_log, has_program
from sqlite3 import OperationalError
from beets import config from beets import config
from beets.util import CommandOutput from beets.util import CommandOutput
from mediafile import MediaFile from mediafile import MediaFile
from beetsplug.replaygain import (FatalGstreamerPluginReplayGainError, from beetsplug.replaygain import (FatalGstreamerPluginReplayGainError,
GStreamerBackend) GStreamerBackend,
ReplayGainPlugin)
try: try:
import gi import gi
@ -55,10 +59,28 @@ def reset_replaygain(item):
item['rg_album_gain'] = None item['rg_album_gain'] = None
item.write() item.write()
item.store() item.store()
item.store()
item.store()
def _store_retry_once(self, item):
"""Helper method to retry item.store() once in case
of a sqlite3.OperationalError exception.
:param self: `ReplayGainPlugin` instance
:param item: a library item to store
"""
try:
item.store()
except OperationalError:
# test_replaygain.py :memory: library can fail with
# `sqlite3.OperationalError: no such table: items`
# but the second attempt succeeds
item.store()
@patch.object(ReplayGainPlugin, '_store', _store_retry_once)
class ReplayGainCliTestBase(TestHelper): class ReplayGainCliTestBase(TestHelper):
def setUp(self): def setUp(self):
self.setup_beets() self.setup_beets()
self.config['replaygain']['backend'] = self.backend self.config['replaygain']['backend'] = self.backend