mirror of
https://github.com/beetbox/beets.git
synced 2025-12-27 19:12:40 +01:00
Fix fragile parsing of bs1770gain output
bs1770gain is sorting the filenames internally, so in some corner cases it returns the results in a different order from the command-line arguments. This patch uses the --xml option of bs1770gain (introduced in 0.4.6) in order to read the filenames reliably.
This commit is contained in:
parent
28cb79b877
commit
05775ccac6
1 changed files with 53 additions and 28 deletions
|
|
@ -20,12 +20,13 @@ import os
|
|||
import collections
|
||||
import sys
|
||||
import warnings
|
||||
import re
|
||||
import xml.parsers.expat
|
||||
from six.moves import zip
|
||||
|
||||
from beets import ui
|
||||
from beets.plugins import BeetsPlugin
|
||||
from beets.util import syspath, command_output, displayable_path, py3_path
|
||||
from beets.util import (syspath, command_output, bytestring_path,
|
||||
displayable_path, py3_path)
|
||||
|
||||
|
||||
# Utilities.
|
||||
|
|
@ -194,12 +195,13 @@ class Bs1770gainBackend(Backend):
|
|||
# Construct shell command.
|
||||
cmd = [self.command]
|
||||
cmd += [self.method]
|
||||
cmd += ['-p']
|
||||
cmd += ['--xml', '-p']
|
||||
|
||||
# Workaround for Windows: the underlying tool fails on paths
|
||||
# with the \\?\ prefix, so we don't use it here. This
|
||||
# prevents the backend from working with long paths.
|
||||
args = cmd + [syspath(i.path, prefix=False) for i in items]
|
||||
path_list = [i.path for i in items]
|
||||
|
||||
# Invoke the command.
|
||||
self._log.debug(
|
||||
|
|
@ -208,40 +210,63 @@ class Bs1770gainBackend(Backend):
|
|||
output = call(args)
|
||||
|
||||
self._log.debug(u'analysis finished: {0}', output)
|
||||
results = self.parse_tool_output(output,
|
||||
len(items) + is_album)
|
||||
results = self.parse_tool_output(output, path_list, is_album)
|
||||
self._log.debug(u'{0} items, {1} results', len(items), len(results))
|
||||
return results
|
||||
|
||||
def parse_tool_output(self, text, num_lines):
|
||||
def parse_tool_output(self, text, path_list, is_album):
|
||||
"""Given the output from bs1770gain, parse the text and
|
||||
return a list of dictionaries
|
||||
containing information about each analyzed file.
|
||||
"""
|
||||
out = []
|
||||
data = text.decode('utf-8', errors='ignore')
|
||||
regex = re.compile(
|
||||
u'(\\s{2,2}\\[\\d+\\/\\d+\\].*?|\\[ALBUM\\].*?)'
|
||||
'(?=\\s{2,2}\\[\\d+\\/\\d+\\]|\\s{2,2}\\[ALBUM\\]'
|
||||
':|done\\.\\s)', re.DOTALL | re.UNICODE)
|
||||
results = re.findall(regex, data)
|
||||
for parts in results[0:num_lines]:
|
||||
part = parts.split(u'\n')
|
||||
if len(part) == 0:
|
||||
self._log.debug(u'bad tool output: {0!r}', text)
|
||||
raise ReplayGainError(u'bs1770gain failed')
|
||||
per_file_gain = {}
|
||||
album_gain = {} # mutable variable so it can be set from handlers
|
||||
parser = xml.parsers.expat.ParserCreate(encoding='utf-8')
|
||||
state = {'file': None, 'gain': None, 'peak': None}
|
||||
def start_element_handler(name, attrs):
|
||||
if name == u'track':
|
||||
state['file'] = bytestring_path(attrs[u'file'])
|
||||
if state['file'] in per_file_gain:
|
||||
raise ReplayGainError(
|
||||
u'duplicate filename in bs1770gain output')
|
||||
elif name == u'integrated':
|
||||
state['gain'] = float(attrs[u'lu'])
|
||||
elif name == u'sample-peak':
|
||||
state['peak'] = float(attrs[u'factor'])
|
||||
def end_element_handler(name):
|
||||
if name == u'track':
|
||||
if state['gain'] is None or state['peak'] is None:
|
||||
raise ReplayGainError(u'could not parse gain or peak from '
|
||||
'the output of bs1770gain')
|
||||
per_file_gain[state['file']] = Gain(state['gain'],
|
||||
state['peak'])
|
||||
state['gain'] = state['peak'] = None
|
||||
elif name == u'summary':
|
||||
if state['gain'] is None or state['peak'] is None:
|
||||
raise ReplayGainError(u'could not parse gain or peak from '
|
||||
'the output of bs1770gain')
|
||||
album_gain["album"] = Gain(state['gain'], state['peak'])
|
||||
state['gain'] = state['peak'] = None
|
||||
parser.StartElementHandler = start_element_handler
|
||||
parser.EndElementHandler = end_element_handler
|
||||
parser.Parse(text, True)
|
||||
|
||||
try:
|
||||
song = {
|
||||
'file': part[0],
|
||||
'gain': float((part[1].split('/'))[1].split('LU')[0]),
|
||||
'peak': float(part[2].split('/')[1]),
|
||||
}
|
||||
except IndexError:
|
||||
self._log.info(u'bs1770gain reports (faulty file?): {}', parts)
|
||||
continue
|
||||
if len(per_file_gain) != len(path_list):
|
||||
raise ReplayGainError(
|
||||
u'the number of results returned by bs1770gain does not match '
|
||||
'the number of files passed to it')
|
||||
|
||||
out.append(Gain(song['gain'], song['peak']))
|
||||
# bs1770gain does not return the analysis results in the order that
|
||||
# files are passed on the command line, because it is sorting the files
|
||||
# internally. We must recover the order from the filenames themselves.
|
||||
try:
|
||||
out = [per_file_gain[os.path.basename(p)] for p in path_list]
|
||||
except KeyError:
|
||||
raise ReplayGainError(
|
||||
u'unrecognized filename in bs1770gain output '
|
||||
'(bs1770gain can only deal with utf-8 file names)')
|
||||
if is_album:
|
||||
out.append(album_gain["album"])
|
||||
return out
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue