Fix fragile parsing of bs1770gain output

bs1770gain is sorting the filenames internally, so in some corner cases it returns the results in a different order from the command-line arguments.

This patch uses the --xml option of bs1770gain (introduced in 0.4.6) in order to read the filenames reliably.
This commit is contained in:
Yann Leprince 2017-12-31 11:27:42 +01:00
parent 28cb79b877
commit 05775ccac6

View file

@ -20,12 +20,13 @@ import os
import collections
import sys
import warnings
import re
import xml.parsers.expat
from six.moves import zip
from beets import ui
from beets.plugins import BeetsPlugin
from beets.util import syspath, command_output, displayable_path, py3_path
from beets.util import (syspath, command_output, bytestring_path,
displayable_path, py3_path)
# Utilities.
@ -194,12 +195,13 @@ class Bs1770gainBackend(Backend):
# Construct shell command.
cmd = [self.command]
cmd += [self.method]
cmd += ['-p']
cmd += ['--xml', '-p']
# Workaround for Windows: the underlying tool fails on paths
# with the \\?\ prefix, so we don't use it here. This
# prevents the backend from working with long paths.
args = cmd + [syspath(i.path, prefix=False) for i in items]
path_list = [i.path for i in items]
# Invoke the command.
self._log.debug(
@ -208,40 +210,63 @@ class Bs1770gainBackend(Backend):
output = call(args)
self._log.debug(u'analysis finished: {0}', output)
results = self.parse_tool_output(output,
len(items) + is_album)
results = self.parse_tool_output(output, path_list, is_album)
self._log.debug(u'{0} items, {1} results', len(items), len(results))
return results
def parse_tool_output(self, text, num_lines):
def parse_tool_output(self, text, path_list, is_album):
"""Given the output from bs1770gain, parse the text and
return a list of dictionaries
containing information about each analyzed file.
"""
out = []
data = text.decode('utf-8', errors='ignore')
regex = re.compile(
u'(\\s{2,2}\\[\\d+\\/\\d+\\].*?|\\[ALBUM\\].*?)'
'(?=\\s{2,2}\\[\\d+\\/\\d+\\]|\\s{2,2}\\[ALBUM\\]'
':|done\\.\\s)', re.DOTALL | re.UNICODE)
results = re.findall(regex, data)
for parts in results[0:num_lines]:
part = parts.split(u'\n')
if len(part) == 0:
self._log.debug(u'bad tool output: {0!r}', text)
raise ReplayGainError(u'bs1770gain failed')
per_file_gain = {}
album_gain = {} # mutable variable so it can be set from handlers
parser = xml.parsers.expat.ParserCreate(encoding='utf-8')
state = {'file': None, 'gain': None, 'peak': None}
def start_element_handler(name, attrs):
if name == u'track':
state['file'] = bytestring_path(attrs[u'file'])
if state['file'] in per_file_gain:
raise ReplayGainError(
u'duplicate filename in bs1770gain output')
elif name == u'integrated':
state['gain'] = float(attrs[u'lu'])
elif name == u'sample-peak':
state['peak'] = float(attrs[u'factor'])
def end_element_handler(name):
if name == u'track':
if state['gain'] is None or state['peak'] is None:
raise ReplayGainError(u'could not parse gain or peak from '
'the output of bs1770gain')
per_file_gain[state['file']] = Gain(state['gain'],
state['peak'])
state['gain'] = state['peak'] = None
elif name == u'summary':
if state['gain'] is None or state['peak'] is None:
raise ReplayGainError(u'could not parse gain or peak from '
'the output of bs1770gain')
album_gain["album"] = Gain(state['gain'], state['peak'])
state['gain'] = state['peak'] = None
parser.StartElementHandler = start_element_handler
parser.EndElementHandler = end_element_handler
parser.Parse(text, True)
try:
song = {
'file': part[0],
'gain': float((part[1].split('/'))[1].split('LU')[0]),
'peak': float(part[2].split('/')[1]),
}
except IndexError:
self._log.info(u'bs1770gain reports (faulty file?): {}', parts)
continue
if len(per_file_gain) != len(path_list):
raise ReplayGainError(
u'the number of results returned by bs1770gain does not match '
'the number of files passed to it')
out.append(Gain(song['gain'], song['peak']))
# bs1770gain does not return the analysis results in the order that
# files are passed on the command line, because it is sorting the files
# internally. We must recover the order from the filenames themselves.
try:
out = [per_file_gain[os.path.basename(p)] for p in path_list]
except KeyError:
raise ReplayGainError(
u'unrecognized filename in bs1770gain output '
'(bs1770gain can only deal with utf-8 file names)')
if is_album:
out.append(album_gain["album"])
return out