New: Multithreaded file scan and artist/album matching for faster import

- ImportDecisionMaker: parallelize tag reading and file augmentation
  (Parallel.ForEach) with degree of parallelism based on processor count.
- IdentificationService: parallelize candidate release scoring in GetBestRelease
  (AsParallel) so multiple album candidates are matched concurrently.

Significantly faster on large libraries (60k+ tracks).

Made-with: Cursor
This commit is contained in:
Sean Parsons 2026-03-12 01:41:00 +00:00
parent f6a3e73705
commit bffe49f2be
2 changed files with 54 additions and 31 deletions

View file

@ -305,15 +305,18 @@ private void GetBestRelease(LocalAlbumRelease localAlbumRelease, List<CandidateA
_logger.Debug("Matching {0} track files against {1} candidates", localAlbumRelease.TrackCount, candidateReleases.Count);
_logger.Trace("Processing files:\n{0}", string.Join("\n", localAlbumRelease.LocalTracks.Select(x => x.Path)));
var bestDistance = 1.0;
foreach (var candidateRelease in candidateReleases)
var maxParallelism = Math.Max(1, Environment.ProcessorCount);
var scoredCandidates = candidateReleases
.Select((candidateRelease, index) => new { candidateRelease, index })
.AsParallel()
.WithDegreeOfParallelism(maxParallelism)
.Select(item =>
{
var release = candidateRelease.AlbumRelease;
_logger.Debug("Trying Release {0} [{1}, {2} tracks, {3} existing]", release, release.Title, release.TrackCount, candidateRelease.ExistingTracks.Count);
var release = item.candidateRelease.AlbumRelease;
_logger.Debug("Trying Release {0} [{1}, {2} tracks, {3} existing]", release, release.Title, release.TrackCount, item.candidateRelease.ExistingTracks.Count);
var rwatch = System.Diagnostics.Stopwatch.StartNew();
var extraTrackPaths = candidateRelease.ExistingTracks.Select(x => x.Path).ToList();
var extraTrackPaths = new HashSet<string>(item.candidateRelease.ExistingTracks.Select(x => x.Path), PathEqualityComparer.Instance);
var extraTracks = extraTracksOnDisk.Where(x => extraTrackPaths.Contains(x.Path)).ToList();
var allLocalTracks = localAlbumRelease.LocalTracks.Concat(extraTracks).DistinctBy(x => x.Path).ToList();
@ -322,25 +325,33 @@ private void GetBestRelease(LocalAlbumRelease localAlbumRelease, List<CandidateA
var currDistance = distance.NormalizedDistance();
rwatch.Stop();
_logger.Debug("Release {0} [{1} tracks] has distance {2} vs best distance {3} [{4}ms]",
_logger.Debug("Release {0} [{1} tracks] has distance {2} [{3}ms]",
release,
release.TrackCount,
currDistance,
bestDistance,
rwatch.ElapsedMilliseconds);
if (currDistance < bestDistance)
return new
{
bestDistance = currDistance;
localAlbumRelease.Distance = distance;
localAlbumRelease.AlbumRelease = release;
localAlbumRelease.ExistingTracks = extraTracks;
localAlbumRelease.TrackMapping = mapping;
if (currDistance == 0.0)
{
break;
}
}
}
item.index,
release,
distance,
currDistance,
extraTracks,
mapping
};
})
.ToList();
var best = scoredCandidates
.OrderBy(x => x.currDistance)
.ThenBy(x => x.index)
.First();
localAlbumRelease.Distance = best.distance;
localAlbumRelease.AlbumRelease = best.release;
localAlbumRelease.ExistingTracks = best.extraTracks;
localAlbumRelease.TrackMapping = best.mapping;
watch.Stop();
_logger.Debug($"Best release: {localAlbumRelease.AlbumRelease} Distance {localAlbumRelease.Distance.NormalizedDistance()} found in {watch.ElapsedMilliseconds}ms");

View file

@ -1,7 +1,10 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO.Abstractions;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using NLog;
using NzbDrone.Common.Extensions;
using NzbDrone.Common.Instrumentation.Extensions;
@ -100,19 +103,25 @@ public Tuple<List<LocalTrack>, List<ImportDecision<LocalTrack>>> GetLocalTracks(
downloadClientItemInfo = Parser.Parser.ParseAlbumTitle(downloadClientItem.Title);
}
var i = 1;
foreach (var file in files)
var processedTracks = new ConcurrentBag<(int Index, LocalTrack Track)>();
var processedDecisions = new ConcurrentBag<(int Index, ImportDecision<LocalTrack> Decision)>();
var progress = 0;
var maxParallelism = Math.Max(1, Environment.ProcessorCount);
var filesWithIndex = files.Select((file, index) => new { file, index }).ToList();
Parallel.ForEach(filesWithIndex, new ParallelOptions { MaxDegreeOfParallelism = maxParallelism }, item =>
{
_logger.ProgressInfo($"Reading file {i++}/{files.Count}");
var current = Interlocked.Increment(ref progress);
_logger.ProgressInfo($"Reading file {current}/{files.Count}");
var localTrack = new LocalTrack
{
DownloadClientAlbumInfo = downloadClientItemInfo,
FolderAlbumInfo = folderInfo,
Path = file.FullName,
Size = file.Length,
Modified = file.LastWriteTimeUtc,
FileTrackInfo = _audioTagService.ReadTags(file.FullName),
Path = item.file.FullName,
Size = item.file.Length,
Modified = item.file.LastWriteTimeUtc,
FileTrackInfo = _audioTagService.ReadTags(item.file.FullName),
AdditionalFile = false
};
@ -120,19 +129,22 @@ public Tuple<List<LocalTrack>, List<ImportDecision<LocalTrack>>> GetLocalTracks(
{
// TODO fix otherfiles?
_augmentingService.Augment(localTrack, true);
localTracks.Add(localTrack);
processedTracks.Add((item.index, localTrack));
}
catch (AugmentingFailedException)
{
decisions.Add(new ImportDecision<LocalTrack>(localTrack, new Rejection("Unable to parse file")));
processedDecisions.Add((item.index, new ImportDecision<LocalTrack>(localTrack, new Rejection("Unable to parse file"))));
}
catch (Exception e)
{
_logger.Error(e, "Couldn't import file. {0}", localTrack.Path);
decisions.Add(new ImportDecision<LocalTrack>(localTrack, new Rejection("Unexpected error processing file")));
processedDecisions.Add((item.index, new ImportDecision<LocalTrack>(localTrack, new Rejection("Unexpected error processing file"))));
}
}
});
localTracks.AddRange(processedTracks.OrderBy(x => x.Index).Select(x => x.Track));
decisions.AddRange(processedDecisions.OrderBy(x => x.Index).Select(x => x.Decision));
_logger.Debug($"Tags parsed for {files.Count} files in {watch.ElapsedMilliseconds}ms");