diff --git a/beetsplug/replaygain.py b/beetsplug/replaygain.py index a70f9384b..d8e0e484a 100644 --- a/beetsplug/replaygain.py +++ b/beetsplug/replaygain.py @@ -20,12 +20,13 @@ import os import collections import sys import warnings -import re +import xml.parsers.expat from six.moves import zip from beets import ui from beets.plugins import BeetsPlugin -from beets.util import syspath, command_output, displayable_path, py3_path +from beets.util import (syspath, command_output, bytestring_path, + displayable_path, py3_path) # Utilities. @@ -194,12 +195,13 @@ class Bs1770gainBackend(Backend): # Construct shell command. cmd = [self.command] cmd += [self.method] - cmd += ['-p'] + cmd += ['--xml', '-p'] # Workaround for Windows: the underlying tool fails on paths # with the \\?\ prefix, so we don't use it here. This # prevents the backend from working with long paths. args = cmd + [syspath(i.path, prefix=False) for i in items] + path_list = [i.path for i in items] # Invoke the command. self._log.debug( @@ -208,40 +210,63 @@ class Bs1770gainBackend(Backend): output = call(args) self._log.debug(u'analysis finished: {0}', output) - results = self.parse_tool_output(output, - len(items) + is_album) + results = self.parse_tool_output(output, path_list, is_album) self._log.debug(u'{0} items, {1} results', len(items), len(results)) return results - def parse_tool_output(self, text, num_lines): + def parse_tool_output(self, text, path_list, is_album): """Given the output from bs1770gain, parse the text and return a list of dictionaries containing information about each analyzed file. """ - out = [] - data = text.decode('utf-8', errors='ignore') - regex = re.compile( - u'(\\s{2,2}\\[\\d+\\/\\d+\\].*?|\\[ALBUM\\].*?)' - '(?=\\s{2,2}\\[\\d+\\/\\d+\\]|\\s{2,2}\\[ALBUM\\]' - ':|done\\.\\s)', re.DOTALL | re.UNICODE) - results = re.findall(regex, data) - for parts in results[0:num_lines]: - part = parts.split(u'\n') - if len(part) == 0: - self._log.debug(u'bad tool output: {0!r}', text) - raise ReplayGainError(u'bs1770gain failed') + per_file_gain = {} + album_gain = {} # mutable variable so it can be set from handlers + parser = xml.parsers.expat.ParserCreate(encoding='utf-8') + state = {'file': None, 'gain': None, 'peak': None} + def start_element_handler(name, attrs): + if name == u'track': + state['file'] = bytestring_path(attrs[u'file']) + if state['file'] in per_file_gain: + raise ReplayGainError( + u'duplicate filename in bs1770gain output') + elif name == u'integrated': + state['gain'] = float(attrs[u'lu']) + elif name == u'sample-peak': + state['peak'] = float(attrs[u'factor']) + def end_element_handler(name): + if name == u'track': + if state['gain'] is None or state['peak'] is None: + raise ReplayGainError(u'could not parse gain or peak from ' + 'the output of bs1770gain') + per_file_gain[state['file']] = Gain(state['gain'], + state['peak']) + state['gain'] = state['peak'] = None + elif name == u'summary': + if state['gain'] is None or state['peak'] is None: + raise ReplayGainError(u'could not parse gain or peak from ' + 'the output of bs1770gain') + album_gain["album"] = Gain(state['gain'], state['peak']) + state['gain'] = state['peak'] = None + parser.StartElementHandler = start_element_handler + parser.EndElementHandler = end_element_handler + parser.Parse(text, True) - try: - song = { - 'file': part[0], - 'gain': float((part[1].split('/'))[1].split('LU')[0]), - 'peak': float(part[2].split('/')[1]), - } - except IndexError: - self._log.info(u'bs1770gain reports (faulty file?): {}', parts) - continue + if len(per_file_gain) != len(path_list): + raise ReplayGainError( + u'the number of results returned by bs1770gain does not match ' + 'the number of files passed to it') - out.append(Gain(song['gain'], song['peak'])) + # bs1770gain does not return the analysis results in the order that + # files are passed on the command line, because it is sorting the files + # internally. We must recover the order from the filenames themselves. + try: + out = [per_file_gain[os.path.basename(p)] for p in path_list] + except KeyError: + raise ReplayGainError( + u'unrecognized filename in bs1770gain output ' + '(bs1770gain can only deal with utf-8 file names)') + if is_album: + out.append(album_gain["album"]) return out