mirror of
https://github.com/beetbox/beets.git
synced 2025-12-28 11:32:30 +01:00
1121 lines
37 KiB
Python
1121 lines
37 KiB
Python
# This file is part of beets.
|
|
# Copyright 2013, Adrian Sampson.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
# the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be
|
|
# included in all copies or substantial portions of the Software.
|
|
|
|
"""Provides the basic, interface-agnostic workflow for importing and
|
|
autotagging music files.
|
|
"""
|
|
from __future__ import print_function
|
|
|
|
import os
|
|
import logging
|
|
import pickle
|
|
import itertools
|
|
from collections import defaultdict
|
|
from tempfile import mkdtemp
|
|
import shutil
|
|
|
|
from beets import autotag
|
|
from beets import library
|
|
from beets import dbcore
|
|
from beets import plugins
|
|
from beets import util
|
|
from beets import config
|
|
from beets.util import pipeline
|
|
from beets.util import syspath, normpath, displayable_path
|
|
from enum import Enum
|
|
from beets import mediafile
|
|
|
|
action = Enum('action',
|
|
['SKIP', 'ASIS', 'TRACKS', 'MANUAL', 'APPLY', 'MANUAL_ID',
|
|
'ALBUMS'])
|
|
|
|
QUEUE_SIZE = 128
|
|
SINGLE_ARTIST_THRESH = 0.25
|
|
VARIOUS_ARTISTS = u'Various Artists'
|
|
PROGRESS_KEY = 'tagprogress'
|
|
HISTORY_KEY = 'taghistory'
|
|
|
|
# Global logger.
|
|
log = logging.getLogger('beets')
|
|
|
|
|
|
class ImportAbort(Exception):
|
|
"""Raised when the user aborts the tagging operation.
|
|
"""
|
|
pass
|
|
|
|
|
|
# Utilities.
|
|
|
|
def _open_state():
|
|
"""Reads the state file, returning a dictionary."""
|
|
try:
|
|
with open(config['statefile'].as_filename()) as f:
|
|
return pickle.load(f)
|
|
except (IOError, EOFError):
|
|
return {}
|
|
|
|
|
|
def _save_state(state):
|
|
"""Writes the state dictionary out to disk."""
|
|
try:
|
|
with open(config['statefile'].as_filename(), 'w') as f:
|
|
pickle.dump(state, f)
|
|
except IOError as exc:
|
|
log.error(u'state file could not be written: %s' % unicode(exc))
|
|
|
|
|
|
# Utilities for reading and writing the beets progress file, which
|
|
# allows long tagging tasks to be resumed when they pause (or crash).
|
|
|
|
def progress_set(toppath, paths):
|
|
"""Record that tagging for the given `toppath` was successful up to
|
|
`paths`. If paths is None, then clear the progress value (indicating
|
|
that the tagging completed).
|
|
"""
|
|
state = _open_state()
|
|
if PROGRESS_KEY not in state:
|
|
state[PROGRESS_KEY] = {}
|
|
|
|
if paths is None:
|
|
# Remove progress from file.
|
|
if toppath in state[PROGRESS_KEY]:
|
|
del state[PROGRESS_KEY][toppath]
|
|
else:
|
|
state[PROGRESS_KEY][toppath] = paths
|
|
|
|
_save_state(state)
|
|
|
|
|
|
def progress_get(toppath):
|
|
"""Get the last successfully tagged subpath of toppath. If toppath
|
|
has no progress information, returns None.
|
|
"""
|
|
state = _open_state()
|
|
if PROGRESS_KEY not in state:
|
|
return None
|
|
return state[PROGRESS_KEY].get(toppath)
|
|
|
|
|
|
# Similarly, utilities for manipulating the "incremental" import log.
|
|
# This keeps track of all directories that were ever imported, which
|
|
# allows the importer to only import new stuff.
|
|
|
|
def history_add(paths):
|
|
"""Indicate that the import of the album in `paths` is completed and
|
|
should not be repeated in incremental imports.
|
|
"""
|
|
state = _open_state()
|
|
if HISTORY_KEY not in state:
|
|
state[HISTORY_KEY] = set()
|
|
|
|
state[HISTORY_KEY].add(tuple(paths))
|
|
|
|
_save_state(state)
|
|
|
|
|
|
def history_get():
|
|
"""Get the set of completed path tuples in incremental imports.
|
|
"""
|
|
state = _open_state()
|
|
if HISTORY_KEY not in state:
|
|
return set()
|
|
return state[HISTORY_KEY]
|
|
|
|
|
|
# Abstract session class.
|
|
|
|
class ImportSession(object):
|
|
"""Controls an import action. Subclasses should implement methods to
|
|
communicate with the user or otherwise make decisions.
|
|
"""
|
|
def __init__(self, lib, logfile, paths, query):
|
|
"""Create a session. `lib` is a Library object. `logfile` is a
|
|
file-like object open for writing or None if no logging is to be
|
|
performed. Either `paths` or `query` is non-null and indicates
|
|
the source of files to be imported.
|
|
"""
|
|
self.lib = lib
|
|
self.logfile = logfile
|
|
self.paths = paths
|
|
self.query = query
|
|
|
|
# Normalize the paths.
|
|
if self.paths:
|
|
self.paths = map(normpath, self.paths)
|
|
|
|
def set_config(self, config):
|
|
"""Set `config` property from global import config and make
|
|
implied changes.
|
|
"""
|
|
# FIXME: Maybe this function should not exist and should instead
|
|
# provide "decision wrappers" like "should_resume()", etc.
|
|
iconfig = dict(config)
|
|
self.config = iconfig
|
|
|
|
# Incremental and progress are mutually exclusive.
|
|
if iconfig['incremental']:
|
|
iconfig['resume'] = False
|
|
|
|
# When based on a query instead of directories, never
|
|
# save progress or try to resume.
|
|
if self.query is not None:
|
|
iconfig['resume'] = False
|
|
iconfig['incremental'] = False
|
|
|
|
# Copy and move are mutually exclusive.
|
|
if iconfig['move']:
|
|
iconfig['copy'] = False
|
|
|
|
# Only delete when copying.
|
|
if not iconfig['copy']:
|
|
iconfig['delete'] = False
|
|
|
|
self.want_resume = config['resume'].as_choice([True, False, 'ask'])
|
|
|
|
def tag_log(self, status, paths):
|
|
"""Log a message about a given album to logfile. The status should
|
|
reflect the reason the album couldn't be tagged.
|
|
"""
|
|
if self.logfile:
|
|
print(u'{0} {1}'.format(status, displayable_path(paths)),
|
|
file=self.logfile)
|
|
self.logfile.flush()
|
|
|
|
def log_choice(self, task, duplicate=False):
|
|
"""Logs the task's current choice if it should be logged. If
|
|
``duplicate``, then this is a secondary choice after a duplicate was
|
|
detected and a decision was made.
|
|
"""
|
|
paths = task.paths
|
|
if duplicate:
|
|
# Duplicate: log all three choices (skip, keep both, and trump).
|
|
if task.remove_duplicates:
|
|
self.tag_log('duplicate-replace', paths)
|
|
elif task.choice_flag in (action.ASIS, action.APPLY):
|
|
self.tag_log('duplicate-keep', paths)
|
|
elif task.choice_flag is (action.SKIP):
|
|
self.tag_log('duplicate-skip', paths)
|
|
else:
|
|
# Non-duplicate: log "skip" and "asis" choices.
|
|
if task.choice_flag is action.ASIS:
|
|
self.tag_log('asis', paths)
|
|
elif task.choice_flag is action.SKIP:
|
|
self.tag_log('skip', paths)
|
|
|
|
def should_resume(self, path):
|
|
raise NotImplementedError
|
|
|
|
def choose_match(self, task):
|
|
raise NotImplementedError
|
|
|
|
def resolve_duplicate(self, task):
|
|
raise NotImplementedError
|
|
|
|
def choose_item(self, task):
|
|
raise NotImplementedError
|
|
|
|
def run(self):
|
|
"""Run the import task.
|
|
"""
|
|
self.set_config(config['import'])
|
|
|
|
# Set up the pipeline.
|
|
if self.query is None:
|
|
stages = [read_tasks(self)]
|
|
else:
|
|
stages = [query_tasks(self)]
|
|
|
|
if self.config['group_albums'] and \
|
|
not self.config['singletons']:
|
|
# Split directory tasks into one task for each album
|
|
stages += [group_albums(self)]
|
|
if self.config['autotag']:
|
|
# Only look up and query the user when autotagging.
|
|
|
|
# FIXME We should also resolve duplicates when not
|
|
# autotagging.
|
|
# FIXME user_query and resolve_duplicates use the UI.
|
|
# Running them parallel in different stages might mess up
|
|
# the output.
|
|
stages += [lookup_candidates(self), user_query(self),
|
|
resolve_duplicates(self)]
|
|
else:
|
|
stages += [import_asis(self)]
|
|
stages += [apply_choices(self)]
|
|
for stage_func in plugins.import_stages():
|
|
stages.append(plugin_stage(self, stage_func))
|
|
stages += [manipulate_files(self)]
|
|
stages += [finalize(self)]
|
|
pl = pipeline.Pipeline(stages)
|
|
|
|
# Run the pipeline.
|
|
try:
|
|
if config['threaded']:
|
|
pl.run_parallel(QUEUE_SIZE)
|
|
else:
|
|
pl.run_sequential()
|
|
except ImportAbort:
|
|
# User aborted operation. Silently stop.
|
|
pass
|
|
|
|
|
|
# The importer task class.
|
|
|
|
class ImportTask(object):
|
|
"""Represents a single set of items to be imported along with its
|
|
intermediate state. May represent an album or a single item.
|
|
"""
|
|
def __init__(self, toppath=None, paths=None, items=None):
|
|
self.toppath = toppath
|
|
self.paths = paths
|
|
self.items = items
|
|
self.choice_flag = None
|
|
# TODO remove this eventually
|
|
self.remove_duplicates = False
|
|
self.is_album = True
|
|
|
|
def set_null_candidates(self):
|
|
"""Set the candidates to indicate no album match was found.
|
|
"""
|
|
self.cur_artist = None
|
|
self.cur_album = None
|
|
self.candidates = None
|
|
self.rec = None
|
|
|
|
def set_choice(self, choice):
|
|
"""Given an AlbumMatch or TrackMatch object or an action constant,
|
|
indicates that an action has been selected for this task.
|
|
"""
|
|
# Not part of the task structure:
|
|
assert choice not in (action.MANUAL, action.MANUAL_ID)
|
|
assert choice != action.APPLY # Only used internally.
|
|
if choice in (action.SKIP, action.ASIS, action.TRACKS, action.ALBUMS):
|
|
self.choice_flag = choice
|
|
self.match = None
|
|
else:
|
|
self.choice_flag = action.APPLY # Implicit choice.
|
|
self.match = choice
|
|
|
|
def save_progress(self):
|
|
"""Updates the progress state to indicate that this album has
|
|
finished.
|
|
"""
|
|
progress_set(self.toppath, self.paths)
|
|
|
|
def save_history(self):
|
|
"""Save the directory in the history for incremental imports.
|
|
"""
|
|
if self.paths:
|
|
history_add(self.paths)
|
|
|
|
# Logical decisions.
|
|
|
|
@property
|
|
def apply(self):
|
|
return self.choice_flag == action.APPLY
|
|
|
|
@property
|
|
def skip(self):
|
|
return self.choice_flag == action.SKIP
|
|
|
|
# Convenient data.
|
|
|
|
def chosen_ident(self):
|
|
"""Returns identifying metadata about the current choice. For
|
|
albums, this is an (artist, album) pair. For items, this is
|
|
(artist, title). May only be called when the choice flag is ASIS
|
|
(in which case the data comes from the files' current metadata)
|
|
or APPLY (data comes from the choice).
|
|
"""
|
|
if self.choice_flag is action.ASIS:
|
|
return (self.cur_artist, self.cur_album)
|
|
elif self.choice_flag is action.APPLY:
|
|
return (self.match.info.artist, self.match.info.album)
|
|
|
|
def imported_items(self):
|
|
"""Return a list of Items that should be added to the library.
|
|
If this is an album task, return the list of items in the
|
|
selected match or everything if the choice is ASIS. If this is a
|
|
singleton task, return a list containing the item.
|
|
"""
|
|
if self.choice_flag == action.ASIS:
|
|
return list(self.items)
|
|
elif self.choice_flag == action.APPLY:
|
|
return self.match.mapping.keys()
|
|
else:
|
|
assert False
|
|
|
|
def apply_metadata(self):
|
|
"""Copy metadata from match info to the items.
|
|
"""
|
|
autotag.apply_metadata(self.match.info, self.match.mapping)
|
|
|
|
def duplicate_items(self, lib):
|
|
duplicate_items = []
|
|
for album in self.find_duplicates(lib):
|
|
duplicate_items += album.items()
|
|
return duplicate_items
|
|
|
|
def do_remove_duplicates(self, lib):
|
|
# TODO: Bad name. Resolve naming conflict.
|
|
duplicate_items = self.duplicate_items(lib)
|
|
log.debug('removing %i old duplicated items' %
|
|
len(duplicate_items))
|
|
for item in duplicate_items:
|
|
item.remove()
|
|
if lib.directory in util.ancestry(item.path):
|
|
log.debug(u'deleting duplicate %s' %
|
|
util.displayable_path(item.path))
|
|
util.remove(item.path)
|
|
util.prune_dirs(os.path.dirname(item.path),
|
|
lib.directory)
|
|
|
|
def finalize(self, session):
|
|
"""Save progress clean up files, and emit plugin event.
|
|
"""
|
|
# FIXME the session argument is unfortunate. It should be
|
|
# present as an attribute of the task.
|
|
|
|
# Update progress.
|
|
if session.want_resume:
|
|
self.save_progress()
|
|
if session.config['incremental']:
|
|
self.save_history()
|
|
|
|
self.cleanup(copy=session.config['copy'],
|
|
delete=session.config['delete'],
|
|
move=session.config['move'])
|
|
self._emit_imported(session.lib)
|
|
|
|
def cleanup(self, copy=False, delete=False, move=False):
|
|
"""Remove and prune imported paths.
|
|
"""
|
|
# FIXME Maybe the keywords should be task properties.
|
|
|
|
# FIXME This shouldn't be here. Skipping should be handled in
|
|
# the stages.
|
|
if self.skip:
|
|
return
|
|
items = self.imported_items()
|
|
|
|
# When copying and deleting originals, delete old files.
|
|
if copy and delete:
|
|
new_paths = [os.path.realpath(item.path) for item in items]
|
|
for old_path in self.old_paths:
|
|
# Only delete files that were actually copied.
|
|
if old_path not in new_paths:
|
|
util.remove(syspath(old_path), False)
|
|
self.prune(old_path)
|
|
|
|
# When moving, prune empty directories containing the original files.
|
|
elif move:
|
|
for old_path in self.old_paths:
|
|
self.prune(old_path)
|
|
|
|
def _emit_imported(self, lib):
|
|
# FIXME This shouldn't be here. Skipping should be handled in
|
|
# the stages.
|
|
if self.skip:
|
|
return
|
|
plugins.send('album_imported', lib=lib, album=self.album)
|
|
|
|
def lookup_candidates(self):
|
|
"""Retrieve and store candidates for this album.
|
|
"""
|
|
artist, album, candidates, recommendation = \
|
|
autotag.tag_album(self.items)
|
|
self.cur_artist = artist
|
|
self.cur_album = album
|
|
self.candidates = candidates
|
|
self.rec = recommendation
|
|
|
|
def find_duplicates(self, lib):
|
|
"""Return a list of albums from `lib` with the same artist and
|
|
album name as the task.
|
|
"""
|
|
artist, album = self.chosen_ident()
|
|
|
|
if artist is None:
|
|
# As-is import with no artist. Skip check.
|
|
return []
|
|
|
|
duplicates = []
|
|
task_paths = set(i.path for i in self.items if i)
|
|
duplicate_query = dbcore.AndQuery((
|
|
dbcore.MatchQuery('albumartist', artist),
|
|
dbcore.MatchQuery('album', album),
|
|
))
|
|
|
|
for album in lib.albums(duplicate_query):
|
|
# Check whether the album is identical in contents, in which
|
|
# case it is not a duplicate (will be replaced).
|
|
album_paths = set(i.path for i in album.items())
|
|
if album_paths != task_paths:
|
|
duplicates.append(album)
|
|
return duplicates
|
|
|
|
def infer_album_fields(self):
|
|
"""Make the some album fields equal across `self.items`
|
|
"""
|
|
changes = {}
|
|
|
|
if self.choice_flag == action.ASIS:
|
|
# Taking metadata "as-is". Guess whether this album is VA.
|
|
plur_albumartist, freq = util.plurality(
|
|
[i.albumartist or i.artist for i in self.items]
|
|
)
|
|
if freq == len(self.items) or \
|
|
(freq > 1 and
|
|
float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH):
|
|
# Single-artist album.
|
|
changes['albumartist'] = plur_albumartist
|
|
changes['comp'] = False
|
|
else:
|
|
# VA.
|
|
changes['albumartist'] = VARIOUS_ARTISTS
|
|
changes['comp'] = True
|
|
|
|
elif self.choice_flag == action.APPLY:
|
|
# Applying autotagged metadata. Just get AA from the first
|
|
# item.
|
|
# FIXME this is overly complicated. Can we assume that
|
|
# `self.items` contains only elements that are not none and
|
|
# at least one of them?
|
|
for item in self.items:
|
|
if item is not None:
|
|
first_item = item
|
|
break
|
|
else:
|
|
assert False, "all items are None"
|
|
if not first_item.albumartist:
|
|
changes['albumartist'] = first_item.artist
|
|
if not first_item.mb_albumartistid:
|
|
changes['mb_albumartistid'] = first_item.mb_artistid
|
|
|
|
# Apply new metadata.
|
|
for item in self.items:
|
|
if item is not None:
|
|
item.update(changes)
|
|
|
|
def manipulate_files(self, move=False, copy=False, write=False,
|
|
session=None):
|
|
items = self.imported_items()
|
|
# Save the original paths of all items for deletion and pruning
|
|
# in the next step (finalization).
|
|
self.old_paths = [item.path for item in items]
|
|
for item in items:
|
|
if session.config['move']:
|
|
# Just move the file.
|
|
item.move(False)
|
|
elif session.config['copy']:
|
|
# If it's a reimport, move in-library files and copy
|
|
# out-of-library files. Otherwise, copy and keep track
|
|
# of the old path.
|
|
old_path = item.path
|
|
if self.replaced_items[item]:
|
|
# This is a reimport. Move in-library files and copy
|
|
# out-of-library files.
|
|
if session.lib.directory in util.ancestry(old_path):
|
|
item.move(False)
|
|
# We moved the item, so remove the
|
|
# now-nonexistent file from old_paths.
|
|
self.old_paths.remove(old_path)
|
|
else:
|
|
item.move(True)
|
|
else:
|
|
# A normal import. Just copy files and keep track of
|
|
# old paths.
|
|
item.move(True)
|
|
|
|
if session.config['write'] and self.apply:
|
|
item.try_write()
|
|
|
|
with session.lib.transaction():
|
|
for item in self.imported_items():
|
|
item.store()
|
|
|
|
plugins.send('import_task_files', session=session, task=self)
|
|
|
|
def add(self, lib):
|
|
"""Add the items as an album to the library and remove replaced items.
|
|
"""
|
|
with lib.transaction():
|
|
self.remove_replaced(lib)
|
|
self.album = lib.add_album(self.imported_items())
|
|
|
|
def remove_replaced(self, lib):
|
|
"""Removes all the items from the library that have the same
|
|
path as an item from this task.
|
|
|
|
Records the replaced items in the `replaced_items` dictionary
|
|
"""
|
|
self.replaced_items = defaultdict(list)
|
|
for item in self.imported_items():
|
|
dup_items = lib.items(dbcore.query.BytesQuery('path', item.path))
|
|
self.replaced_items[item] = dup_items
|
|
for dup_item in dup_items:
|
|
log.debug('replacing item %i: %s' %
|
|
(dup_item.id, displayable_path(item.path)))
|
|
dup_item.remove()
|
|
log.debug('%i of %i items replaced' % (len(self.replaced_items),
|
|
len(self.imported_items())))
|
|
|
|
def choose_match(self, session):
|
|
"""Ask the session which match should apply and apply it.
|
|
"""
|
|
choice = session.choose_match(self)
|
|
self.set_choice(choice)
|
|
session.log_choice(self)
|
|
|
|
def reload(self):
|
|
"""Reload albums and items from the database.
|
|
"""
|
|
for item in self.imported_items():
|
|
item.load()
|
|
self.album.load()
|
|
|
|
# Utilities.
|
|
|
|
def prune(self, filename):
|
|
"""Prune any empty directories above the given file. If this
|
|
task has no `toppath` or the file path provided is not within
|
|
the `toppath`, then this function has no effect. Similarly, if
|
|
the file still exists, no pruning is performed, so it's safe to
|
|
call when the file in question may not have been removed.
|
|
"""
|
|
if self.toppath and not os.path.exists(filename):
|
|
util.prune_dirs(os.path.dirname(filename),
|
|
self.toppath,
|
|
clutter=config['clutter'].as_str_seq())
|
|
|
|
|
|
class SingletonImportTask(ImportTask):
|
|
"""ImportTask for a single track that is not associated to an album.
|
|
"""
|
|
|
|
def __init__(self, item):
|
|
super(SingletonImportTask, self).__init__(paths=[item.path])
|
|
self.item = item
|
|
self.is_album = False
|
|
self.paths = [item.path]
|
|
|
|
def chosen_ident(self):
|
|
assert self.choice_flag in (action.ASIS, action.APPLY)
|
|
if self.choice_flag is action.ASIS:
|
|
return (self.item.artist, self.item.title)
|
|
elif self.choice_flag is action.APPLY:
|
|
return (self.match.info.artist, self.match.info.title)
|
|
|
|
def imported_items(self):
|
|
return [self.item]
|
|
|
|
def save_progress(self):
|
|
# TODO we should also save progress for singletons
|
|
pass
|
|
|
|
def save_history(self):
|
|
# TODO we should also save history for singletons
|
|
pass
|
|
|
|
def apply_metadata(self):
|
|
autotag.apply_item_metadata(self.item, self.match.info)
|
|
|
|
def _emit_imported(self, lib):
|
|
# FIXME This shouldn't be here. Skipped tasks should be removed from
|
|
# the pipeline.
|
|
if self.skip:
|
|
return
|
|
for item in self.imported_items():
|
|
plugins.send('item_imported', lib=lib, item=item)
|
|
|
|
def lookup_candidates(self):
|
|
candidates, recommendation = autotag.tag_item(self.item)
|
|
self.candidates = candidates
|
|
self.rec = recommendation
|
|
|
|
def find_duplicates(self, lib):
|
|
"""Return a list of items from `lib` that have the same artist
|
|
and title as the task.
|
|
"""
|
|
artist, title = self.chosen_ident()
|
|
|
|
found_items = []
|
|
query = dbcore.AndQuery((
|
|
dbcore.MatchQuery('artist', artist),
|
|
dbcore.MatchQuery('title', title),
|
|
))
|
|
for other_item in lib.items(query):
|
|
# Existing items not considered duplicates.
|
|
if other_item.path != self.item.path:
|
|
found_items.append(other_item)
|
|
return found_items
|
|
|
|
duplicate_items = find_duplicates
|
|
|
|
def add(self, lib):
|
|
with lib.transaction():
|
|
self.remove_replaced(lib)
|
|
lib.add(self.item)
|
|
|
|
def infer_album_fields(self):
|
|
raise NotImplementedError
|
|
|
|
def choose_match(self, session):
|
|
"""Ask the session which match should apply and apply it.
|
|
"""
|
|
choice = session.choose_item(self)
|
|
self.set_choice(choice)
|
|
session.log_choice(self)
|
|
|
|
def reload(self):
|
|
self.item.load()
|
|
|
|
|
|
# FIXME The inheritance relationships are inverted. This is why there
|
|
# are so many methods which pass. We should introduce a new
|
|
# BaseImportTask class.
|
|
class SentinelImportTask(ImportTask):
|
|
"""This class marks the progress of an import and does not import
|
|
any items itself.
|
|
|
|
If only `toppath` is set the task indicats the end of a top-level
|
|
directory import. If the `paths` argument is givent, too, the task
|
|
indicates the progress in the `toppath` import.
|
|
"""
|
|
|
|
def __init__(self, toppath=None, paths=None):
|
|
self.toppath = toppath
|
|
self.paths = paths
|
|
# TODO Remove the remaining attributes eventually
|
|
self.items = None
|
|
self.remove_duplicates = False
|
|
self.is_album = True
|
|
self.choice_flag = None
|
|
|
|
def save_history(self):
|
|
pass
|
|
|
|
def save_progress(self):
|
|
if self.paths is None:
|
|
# "Done" sentinel.
|
|
progress_set(self.toppath, None)
|
|
else:
|
|
# "Directory progress" sentinel for singletons
|
|
progress_set(self.toppath, self.paths)
|
|
|
|
def skip(self):
|
|
return True
|
|
|
|
def set_choice(self, choice):
|
|
raise NotImplementedError
|
|
|
|
def cleanup(self, **kwargs):
|
|
pass
|
|
|
|
def _emit_imported(self, session):
|
|
pass
|
|
|
|
|
|
class ArchiveImportTask(SentinelImportTask):
|
|
"""Additional methods for handling archives.
|
|
|
|
Use when `toppath` points to a `zip`, `tar`, or `rar` archive.
|
|
"""
|
|
|
|
def __init__(self, toppath):
|
|
super(ArchiveImportTask, self).__init__(toppath)
|
|
self.extracted = False
|
|
|
|
@classmethod
|
|
def is_archive(cls, path):
|
|
"""Returns true if the given path points to an archive that can
|
|
be handled.
|
|
"""
|
|
if not os.path.isfile(path):
|
|
return False
|
|
|
|
for path_test, _ in cls.handlers():
|
|
if path_test(path):
|
|
return True
|
|
return False
|
|
|
|
@classmethod
|
|
def handlers(cls):
|
|
"""Returns a list of archive handlers.
|
|
|
|
Each handler is a `(path_test, ArchiveClass)` tuple. `path_test`
|
|
is a function that returns `True` if the given path can be
|
|
handled by `ArchiveClass`. `ArchiveClass` is a class that
|
|
implements the same interface as `tarfile.TarFile`.
|
|
"""
|
|
if not hasattr(cls, '_handlers'):
|
|
cls._handlers = []
|
|
from zipfile import is_zipfile, ZipFile
|
|
cls._handlers.append((is_zipfile, ZipFile))
|
|
from tarfile import is_tarfile, TarFile
|
|
cls._handlers.append((is_tarfile, TarFile))
|
|
try:
|
|
from rarfile import is_rarfile, RarFile
|
|
except ImportError:
|
|
pass
|
|
else:
|
|
cls._handlers.append((is_rarfile, RarFile))
|
|
|
|
return cls._handlers
|
|
|
|
def cleanup(self, **kwargs):
|
|
"""Removes the temporary directory the archive was extracted to.
|
|
"""
|
|
if self.extracted:
|
|
shutil.rmtree(self.toppath)
|
|
|
|
def extract(self):
|
|
"""Extracts the archive to a temporary directory and sets
|
|
`toppath` to that directory.
|
|
"""
|
|
for path_test, handler_class in self.handlers():
|
|
if path_test(self.toppath):
|
|
break
|
|
|
|
try:
|
|
extract_to = mkdtemp()
|
|
archive = handler_class(self.toppath, mode='r')
|
|
archive.extractall(extract_to)
|
|
finally:
|
|
archive.close()
|
|
self.extracted = True
|
|
self.toppath = extract_to
|
|
|
|
|
|
# Full-album pipeline stages.
|
|
|
|
def read_tasks(session):
|
|
"""A generator yielding all the albums (as ImportTask objects) found
|
|
in the user-specified list of paths. In the case of a singleton
|
|
import, yields single-item tasks instead.
|
|
"""
|
|
# Look for saved incremental directories.
|
|
if session.config['incremental']:
|
|
incremental_skipped = 0
|
|
history_dirs = history_get()
|
|
|
|
for toppath in session.paths:
|
|
# Extract archives.
|
|
archive_task = None
|
|
if ArchiveImportTask.is_archive(syspath(toppath)):
|
|
if not (session.config['move'] or session.config['copy']):
|
|
log.warn("Archive importing requires either "
|
|
"'copy' or 'move' to be enabled.")
|
|
continue
|
|
|
|
log.debug('extracting archive {0}'
|
|
.format(displayable_path(toppath)))
|
|
archive_task = ArchiveImportTask(toppath)
|
|
try:
|
|
archive_task.extract()
|
|
except Exception as exc:
|
|
log.error('extraction failed: {0}'.format(exc))
|
|
continue
|
|
|
|
# Continue reading albums from the extracted directory.
|
|
toppath = archive_task.toppath
|
|
|
|
# Check whether the path is to a file.
|
|
if not os.path.isdir(syspath(toppath)):
|
|
try:
|
|
item = library.Item.from_path(toppath)
|
|
except mediafile.UnreadableFileError:
|
|
log.warn(u'unreadable file: {0}'.format(
|
|
util.displayable_path(toppath)
|
|
))
|
|
continue
|
|
if session.config['singletons']:
|
|
yield SingletonImportTask(item)
|
|
else:
|
|
yield ImportTask(toppath, [toppath], [item])
|
|
continue
|
|
|
|
# A flat album import merges all items into one album.
|
|
if session.config['flat'] and not session.config['singletons']:
|
|
all_items = []
|
|
for _, items in autotag.albums_in_dir(toppath):
|
|
all_items += items
|
|
yield ImportTask(toppath, [toppath], all_items)
|
|
yield SentinelImportTask(toppath)
|
|
continue
|
|
|
|
resume_dir = None
|
|
if session.want_resume:
|
|
resume_dir = progress_get(toppath)
|
|
if resume_dir:
|
|
# Either accept immediately or prompt for input to decide.
|
|
if session.want_resume is True or \
|
|
session.should_resume(toppath):
|
|
log.warn('Resuming interrupted import of %s' % toppath)
|
|
else:
|
|
# Clear progress; we're starting from the top.
|
|
resume_dir = None
|
|
progress_set(toppath, None)
|
|
|
|
# Produce paths under this directory.
|
|
for paths, items in autotag.albums_in_dir(toppath):
|
|
# Skip according to progress.
|
|
if session.want_resume and resume_dir:
|
|
# We're fast-forwarding to resume a previous tagging.
|
|
if paths == resume_dir:
|
|
# We've hit the last good path! Turn off the
|
|
# fast-forwarding.
|
|
resume_dir = None
|
|
continue
|
|
|
|
# When incremental, skip paths in the history.
|
|
if session.config['incremental'] \
|
|
and tuple(paths) in history_dirs:
|
|
log.debug(u'Skipping previously-imported path: %s' %
|
|
displayable_path(paths))
|
|
incremental_skipped += 1
|
|
continue
|
|
|
|
# Yield all the necessary tasks.
|
|
if session.config['singletons']:
|
|
for item in items:
|
|
yield SingletonImportTask(item)
|
|
yield SentinelImportTask(toppath, paths)
|
|
else:
|
|
yield ImportTask(toppath, paths, items)
|
|
|
|
# Indicate the directory is finished.
|
|
# FIXME hack to delete extracted archives
|
|
if archive_task is None:
|
|
yield SentinelImportTask(toppath)
|
|
else:
|
|
yield archive_task
|
|
|
|
# Show skipped directories.
|
|
if session.config['incremental'] and incremental_skipped:
|
|
log.info(u'Incremental import: skipped %i directories.' %
|
|
incremental_skipped)
|
|
|
|
|
|
def query_tasks(session):
|
|
"""A generator that works as a drop-in-replacement for read_tasks.
|
|
Instead of finding files from the filesystem, a query is used to
|
|
match items from the library.
|
|
"""
|
|
if session.config['singletons']:
|
|
# Search for items.
|
|
for item in session.lib.items(session.query):
|
|
yield SingletonImportTask(item)
|
|
|
|
else:
|
|
# Search for albums.
|
|
for album in session.lib.albums(session.query):
|
|
log.debug('yielding album %i: %s - %s' %
|
|
(album.id, album.albumartist, album.album))
|
|
items = list(album.items())
|
|
|
|
# Clear IDs from re-tagged items so they appear "fresh" when
|
|
# we add them back to the library.
|
|
for item in items:
|
|
item.id = None
|
|
item.album_id = None
|
|
|
|
yield ImportTask(None, [album.item_dir()], items)
|
|
|
|
|
|
@pipeline.mutator_stage
|
|
def lookup_candidates(session, task):
|
|
"""A coroutine for performing the initial MusicBrainz lookup for an
|
|
album. It accepts lists of Items and yields
|
|
(items, cur_artist, cur_album, candidates, rec) tuples. If no match
|
|
is found, all of the yielded parameters (except items) are None.
|
|
"""
|
|
if task.skip:
|
|
# FIXME This gets duplicated a lot. We need a better
|
|
# abstraction.
|
|
return
|
|
|
|
plugins.send('import_task_start', session=session, task=task)
|
|
log.debug('Looking up: %s' % displayable_path(task.paths))
|
|
task.lookup_candidates()
|
|
|
|
|
|
@pipeline.stage
|
|
def user_query(session, task):
|
|
"""A coroutine for interfacing with the user about the tagging
|
|
process.
|
|
|
|
The coroutine accepts an ImportTask objects. It uses the
|
|
session's `choose_match` method to determine the `action` for
|
|
this task. Depending on the action additional stages are exectuted
|
|
and the processed task is yielded.
|
|
|
|
It emits the ``import_task_choice`` event for plugins. Plugins have
|
|
acces to the choice via the ``taks.choice_flag`` property and may
|
|
choose to change it.
|
|
"""
|
|
if task.skip:
|
|
return task
|
|
|
|
# Ask the user for a choice.
|
|
task.choose_match(session)
|
|
plugins.send('import_task_choice', session=session, task=task)
|
|
|
|
# As-tracks: transition to singleton workflow.
|
|
if task.choice_flag is action.TRACKS:
|
|
# Set up a little pipeline for dealing with the singletons.
|
|
def emitter(task):
|
|
for item in task.items:
|
|
yield SingletonImportTask(item)
|
|
yield SentinelImportTask(task.toppath, task.paths)
|
|
|
|
ipl = pipeline.Pipeline([
|
|
emitter(task),
|
|
lookup_candidates(session),
|
|
user_query(session),
|
|
])
|
|
task = pipeline.multiple(ipl.pull())
|
|
|
|
# As albums: group items by albums and create task for each album
|
|
elif task.choice_flag is action.ALBUMS:
|
|
ipl = pipeline.Pipeline([
|
|
iter([task]),
|
|
group_albums(session),
|
|
lookup_candidates(session),
|
|
user_query(session)
|
|
])
|
|
task = pipeline.multiple(ipl.pull())
|
|
|
|
return task
|
|
|
|
|
|
def resolve_duplicates(session):
|
|
"""Check if a task conflicts with items or albums already imported
|
|
and ask the session to resolve this.
|
|
|
|
Two separate stages have to be created for albums and singletons
|
|
since `chosen_ident()` returns different types of data.
|
|
"""
|
|
task = None
|
|
recent = set()
|
|
while True:
|
|
task = yield task
|
|
|
|
if task.choice_flag in (action.ASIS, action.APPLY):
|
|
ident = task.chosen_ident()
|
|
if ident in recent or task.find_duplicates(session.lib):
|
|
session.resolve_duplicate(task)
|
|
session.log_choice(task, True)
|
|
recent.add(ident)
|
|
|
|
|
|
@pipeline.mutator_stage
|
|
def import_asis(session, task):
|
|
"""Select the `action.ASIS` choice for all tasks.
|
|
|
|
This stage replaces the initial_lookup and user_query stages
|
|
when the importer is run without autotagging.
|
|
"""
|
|
if task.skip:
|
|
return
|
|
|
|
log.info(displayable_path(task.paths))
|
|
|
|
# Behave as if ASIS were selected.
|
|
task.set_null_candidates()
|
|
task.set_choice(action.ASIS)
|
|
|
|
|
|
@pipeline.mutator_stage
|
|
def apply_choices(session, task):
|
|
"""A coroutine for applying changes to albums and singletons during
|
|
the autotag process.
|
|
"""
|
|
if task.skip:
|
|
return
|
|
|
|
# Change metadata.
|
|
if task.apply:
|
|
task.apply_metadata()
|
|
plugins.send('import_task_apply', session=session, task=task)
|
|
|
|
# Infer album-level fields.
|
|
if task.is_album:
|
|
task.infer_album_fields()
|
|
|
|
task.add(session.lib)
|
|
|
|
|
|
@pipeline.mutator_stage
|
|
def plugin_stage(session, func, task):
|
|
"""A coroutine (pipeline stage) that calls the given function with
|
|
each non-skipped import task. These stages occur between applying
|
|
metadata changes and moving/copying/writing files.
|
|
"""
|
|
if task.skip:
|
|
return
|
|
|
|
func(session, task)
|
|
|
|
# Stage may modify DB, so re-load cached item data.
|
|
# FIXME Importer plugins should not modify the database but instead
|
|
# the albums and items attached to tasks.
|
|
task.reload()
|
|
|
|
|
|
@pipeline.mutator_stage
|
|
def manipulate_files(session, task):
|
|
"""A coroutine (pipeline stage) that performs necessary file
|
|
manipulations *after* items have been added to the library.
|
|
"""
|
|
if task.skip:
|
|
return
|
|
|
|
if task.remove_duplicates:
|
|
task.do_remove_duplicates(session.lib)
|
|
|
|
task.manipulate_files(
|
|
move=session.config['move'],
|
|
copy=session.config['copy'],
|
|
write=session.config['write'],
|
|
session=session,
|
|
)
|
|
|
|
|
|
# FIXME Boilerplate. Maybe we should move this to the `manipulate_files`
|
|
# stage.
|
|
@pipeline.stage
|
|
def finalize(session, task):
|
|
task.finalize(session)
|
|
|
|
|
|
def group_albums(session):
|
|
"""Group the items of a task by albumartist and album name and create a new
|
|
task for each album. Yield the tasks as a multi message.
|
|
"""
|
|
def group(item):
|
|
return (item.albumartist or item.artist, item.album)
|
|
|
|
task = None
|
|
while True:
|
|
task = yield task
|
|
if task.skip:
|
|
continue
|
|
tasks = []
|
|
for _, items in itertools.groupby(task.items, group):
|
|
tasks.append(ImportTask(items=list(items)))
|
|
tasks.append(SentinelImportTask(task.toppath, task.paths))
|
|
|
|
task = pipeline.multiple(tasks)
|