diff --git a/.github/workflows/docker-ghcr-multithread.yml b/.github/workflows/docker-ghcr-multithread.yml new file mode 100644 index 000000000..a7b6dd3fd --- /dev/null +++ b/.github/workflows/docker-ghcr-multithread.yml @@ -0,0 +1,43 @@ +# Build custom Lidarr overlay image and push to GHCR when this branch updates. +name: Docker (multithread) → GHCR + +on: + push: + branches: + - new-multithreaded-import + workflow_dispatch: + +permissions: + contents: read + packages: write + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository_owner }}/lidarr + +jobs: + build-push: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile.multithread + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:new-multithreaded-import + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:git-${{ github.sha }} diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 000000000..e96f58bc6 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,98 @@ +name: Docker + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +on: + schedule: + - cron: '19 21 * * *' + push: + branches: [ "develop" ] + # Publish semver tags as releases. + tags: [ 'v*.*.*' ] + pull_request: + branches: [ "develop" ] + +env: + # Use docker.io for Docker Hub if empty + REGISTRY: ghcr.io + # github.repository as / + IMAGE_NAME: ${{ github.repository }} + + +jobs: + build: + + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Install the cosign tool except on PR + # https://github.com/sigstore/cosign-installer + - name: Install cosign + if: github.event_name != 'pull_request' + uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 #v3.5.0 + with: + cosign-release: 'v2.2.4' + + # Set up BuildKit Docker container builder to be able to build + # multi-platform images and export cache + # https://github.com/docker/setup-buildx-action + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into registry ${{ env.REGISTRY }} + if: github.event_name != 'pull_request' + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 + with: + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + # Sign the resulting Docker image digest except on PRs. + # This will only write to the public Rekor transparency log when the Docker + # repository is public to avoid leaking data. If you would like to publish + # transparency data even for private images, pass --force to cosign below. + # https://github.com/sigstore/cosign + - name: Sign the published Docker image + if: ${{ github.event_name != 'pull_request' }} + env: + # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable + TAGS: ${{ steps.meta.outputs.tags }} + DIGEST: ${{ steps.build-and-push.outputs.digest }} + # This step uses the identity token to provision an ephemeral certificate + # against the sigstore community Fulcio instance. + run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} diff --git a/Dockerfile.multithread b/Dockerfile.multithread new file mode 100644 index 000000000..375a5aeaf --- /dev/null +++ b/Dockerfile.multithread @@ -0,0 +1,19 @@ +# CI / context = repo root (lidarr-src). Local builds from parent folder use ../Dockerfile instead. +FROM mcr.microsoft.com/dotnet/sdk:8.0 AS builder +WORKDIR /src + +COPY . ./ + +RUN dotnet publish src/NzbDrone.Console/Lidarr.Console.csproj \ + -c Release \ + -f net8.0 \ + -r linux-musl-x64 \ + --self-contained true \ + -p:RunAnalyzers=false \ + -p:EnforceCodeStyleInBuild=false \ + -p:TreatWarningsAsErrors=false \ + -o /out + +FROM ghcr.io/linuxserver/lidarr:nightly + +COPY --from=builder /out/ /app/lidarr/bin/ diff --git a/MULTITHREAD_README.md b/MULTITHREAD_README.md new file mode 100644 index 000000000..7796b69f8 --- /dev/null +++ b/MULTITHREAD_README.md @@ -0,0 +1,47 @@ +# Multithreaded library scan / import (this fork) + +This branch adds a faster, **parallel** disk scan and import path. Upstream Lidarr does much of this work sequentially; this fork parallelizes folder scanning, tag reads, and release-candidate scoring. + +A **Dockerfile.multithread** in this repository builds a self-contained binary and overlays it on `ghcr.io/linuxserver/lidarr:nightly` (see CI or build from repo root per that file’s comments). A wrapper layout that keeps this tree in a `lidarr-src/` subdirectory can use the parent `Dockerfile` instead. + +## `LIDARR_MEDIA_IO_PARALLELISM` (optional IO cap) + +Parallel import work is **not** limited by Lidarr’s download bandwidth or rate settings (those apply to indexers/clients only). On **slow or remote storage** (especially NFS), the default **uncapped** parallelism can saturate IOPS. Set this variable only when you need to **limit** concurrency. + +| | | +| --- | --- | +| **Name** | `LIDARR_MEDIA_IO_PARALLELISM` | +| **Omit / empty / invalid / ≤0** | **Original fork behavior:** `Parallel.ForEach` uses the **TPL default** (`MaxDegreeOfParallelism = -1`), which can use **more** concurrent workers than `ProcessorCount` on I/O-heavy work (this is why setting `16` on a 16-core box could feel *slower* than before). **PLINQ** still uses **`ProcessorCount`** (TagLib / candidate scoring cannot use `-1`). | +| **1–64** | Hard cap on **both** `Parallel.ForEach` loops **and** PLINQ degree (same number). Use **`1`–`2`** on NFS if the host stalls. | +| **Scope** | Environment is read when each parallel section runs | + +**Docker:** set on the container like any other env variable. + +On the first disk scan, Lidarr logs `Media import parallelism:` with **TPL default (-1, uncapped)** or your numeric cap, plus PLINQ degree and host `ProcessorCount`. + +### Docker Compose + +```yaml +services: + lidarr: + image: your-registry/lidarr-nightly-multithread:latest + environment: + - PUID=1000 + - PGID=1000 + - TZ=Etc/UTC + # Omit LIDARR_MEDIA_IO_PARALLELISM on fast local storage (max throughput). + # - LIDARR_MEDIA_IO_PARALLELISM=2 # NFS / slow disk — cap concurrent work +``` + +### When to set it + +- **Fast local RAID / SSD:** **omit** the variable (matches the first multithread fork). +- **NFS or network filesystem:** start with **`2`** (or **`1`**) if scans overwhelm the host. + +### Implementation reference + +`src/NzbDrone.Common/MediaImportParallelism.cs`. + +## Relationship to upstream + +Behavior outside scan/import parallelism matches your chosen base (e.g. nightly image + overlaid build). For upstream docs and support channels, see [Lidarr](https://github.com/Lidarr/Lidarr) and the [Servarr wiki](https://wiki.servarr.com/lidarr). diff --git a/src/NzbDrone.Common/MediaImportParallelism.cs b/src/NzbDrone.Common/MediaImportParallelism.cs new file mode 100644 index 000000000..4efb33174 --- /dev/null +++ b/src/NzbDrone.Common/MediaImportParallelism.cs @@ -0,0 +1,73 @@ +using System; +using System.Threading.Tasks; + +namespace NzbDrone.Common +{ + /// + /// Optional cap on parallel scan/import work via LIDARR_MEDIA_IO_PARALLELISM. + /// Unrelated to download bandwidth limits in Lidarr settings. + /// + public static class MediaImportParallelism + { + public const string EnvironmentVariableName = "LIDARR_MEDIA_IO_PARALLELISM"; + + private const int MaxDegreeCap = 64; + + /// + /// Unset / empty / invalid / ≤0: Original fork behavior — no explicit cap on + /// (TPL default -1, scheduler chooses concurrency; often higher than core count for I/O). + /// 1–64: Cap loops to that many concurrent workers (use on NFS / slow storage). + /// + public static ParallelOptions GetParallelForEachOptions() + { + if (!TryParseUserCap(out var cap)) + { + return new ParallelOptions(); + } + + return new ParallelOptions { MaxDegreeOfParallelism = cap }; + } + + /// + /// PLINQ WithDegreeOfParallelism must be ≥ 1. + /// Uncapped: (same as pre-env ImportDecisionMaker / IdentificationService). + /// Capped: user value (1–64). + /// + public static int PlinqMaxDegreeOfParallelism + { + get + { + if (!TryParseUserCap(out var cap)) + { + return Math.Max(1, Environment.ProcessorCount); + } + + return cap; + } + } + + /// + /// For logging: -1 means TPL default (uncapped loops); otherwise the explicit cap. + /// + public static int EffectiveParallelForEachDegreeForLog => + TryParseUserCap(out var cap) ? cap : -1; + + private static bool TryParseUserCap(out int cap) + { + cap = 0; + var raw = Environment.GetEnvironmentVariable(EnvironmentVariableName); + if (string.IsNullOrWhiteSpace(raw) || !int.TryParse(raw.Trim(), out var parsed)) + { + return false; + } + + if (parsed <= 0) + { + return false; + } + + cap = Math.Min(parsed, MaxDegreeCap); + return true; + } + } +} diff --git a/src/NzbDrone.Core/MediaFiles/DiskScanService.cs b/src/NzbDrone.Core/MediaFiles/DiskScanService.cs index a77432497..184f8f635 100644 --- a/src/NzbDrone.Core/MediaFiles/DiskScanService.cs +++ b/src/NzbDrone.Core/MediaFiles/DiskScanService.cs @@ -5,6 +5,8 @@ using System.IO.Abstractions; using System.Linq; using System.Text.RegularExpressions; +using System.Threading; +using System.Threading.Tasks; using NLog; using NzbDrone.Common; using NzbDrone.Common.Disk; @@ -47,6 +49,7 @@ public class DiskScanService : private readonly IRootFolderService _rootFolderService; private readonly IEventAggregator _eventAggregator; private readonly Logger _logger; + private static int _mediaParallelismLogged; public DiskScanService(IConfigService configService, IDiskProvider diskProvider, @@ -84,23 +87,19 @@ public void Scan(List folders = null, FilterFilesType filter = FilterFil } var mediaFileList = new List(); + var mediaFileListLock = new object(); - var musicFilesStopwatch = Stopwatch.StartNew(); - + // Validate folders first (early exit on error like original behaviour) + var foldersToScan = new List(); foreach (var folder in folders) { - // We could be scanning a root folder or a subset of a root folder. If it's a subset, - // check if the root folder exists before cleaning. var rootFolder = _rootFolderService.GetBestRootFolder(folder); - if (rootFolder == null) { _logger.Error("Not scanning {0}, it's not a subdirectory of a defined root folder", folder); return; } - var folderExists = _diskProvider.FolderExists(folder); - if (!folderExists) { if (!_diskProvider.FolderExists(rootFolder.Path)) @@ -110,7 +109,6 @@ public void Scan(List folders = null, FilterFilesType filter = FilterFil skippedArtists.ForEach(x => _eventAggregator.PublishEvent(new ArtistScanSkippedEvent(x, ArtistScanSkippedReason.RootFolderDoesNotExist))); return; } - if (_diskProvider.FolderEmpty(rootFolder.Path)) { _logger.Warn("Artists' root folder ({0}) is empty.", rootFolder.Path); @@ -119,28 +117,49 @@ public void Scan(List folders = null, FilterFilesType filter = FilterFil return; } } - if (!folderExists) { _logger.Debug("Specified scan folder ({0}) doesn't exist.", folder); - CleanMediaFiles(folder, new List()); continue; } + foldersToScan.Add(folder); + } + var musicFilesStopwatch = Stopwatch.StartNew(); + + if (Interlocked.CompareExchange(ref _mediaParallelismLogged, 1, 0) == 0) + { + var envRaw = Environment.GetEnvironmentVariable(MediaImportParallelism.EnvironmentVariableName); + var loopDeg = MediaImportParallelism.EffectiveParallelForEachDegreeForLog; + var loopDesc = loopDeg < 0 ? "TPL default (-1, uncapped)" : loopDeg.ToString(); + _logger.Info( + "Media import parallelism: Parallel.ForEach MaxDegreeOfParallelism={0} ({1}; PLINQ degree {2}). Set {3}=1–64 to cap; omit or ≤0 restores pre-cap fork (uncapped loops). Host ProcessorCount={4}.", + loopDesc, + string.IsNullOrEmpty(envRaw) ? $"{MediaImportParallelism.EnvironmentVariableName}=(unset)" : $"{MediaImportParallelism.EnvironmentVariableName}={envRaw}", + MediaImportParallelism.PlinqMaxDegreeOfParallelism, + MediaImportParallelism.EnvironmentVariableName, + Environment.ProcessorCount); + } + + Parallel.ForEach(foldersToScan, MediaImportParallelism.GetParallelForEachOptions(), folder => + { _logger.ProgressInfo("Scanning {0}", folder); - var files = FilterFiles(folder, GetAudioFiles(folder)); + var files = FilterFiles(folder, GetAudioFiles(folder)).ToList(); if (!files.Any()) { _logger.Warn("Scan folder {0} is empty.", folder); - continue; + return; } CleanMediaFiles(folder, files.Select(x => x.FullName).ToList()); - mediaFileList.AddRange(files); - } + lock (mediaFileListLock) + { + mediaFileList.AddRange(files); + } + }); var artists = _artistService.GetArtists(artistIds); diff --git a/src/NzbDrone.Core/MediaFiles/TrackImport/Identification/IdentificationService.cs b/src/NzbDrone.Core/MediaFiles/TrackImport/Identification/IdentificationService.cs index 19f81f152..15f7da24b 100644 --- a/src/NzbDrone.Core/MediaFiles/TrackImport/Identification/IdentificationService.cs +++ b/src/NzbDrone.Core/MediaFiles/TrackImport/Identification/IdentificationService.cs @@ -305,15 +305,17 @@ private void GetBestRelease(LocalAlbumRelease localAlbumRelease, List x.Path))); - var bestDistance = 1.0; - - foreach (var candidateRelease in candidateReleases) + var scoredCandidates = candidateReleases + .Select((candidateRelease, index) => new { candidateRelease, index }) + .AsParallel() + .WithDegreeOfParallelism(MediaImportParallelism.PlinqMaxDegreeOfParallelism) + .Select(item => { - var release = candidateRelease.AlbumRelease; - _logger.Debug("Trying Release {0} [{1}, {2} tracks, {3} existing]", release, release.Title, release.TrackCount, candidateRelease.ExistingTracks.Count); + var release = item.candidateRelease.AlbumRelease; + _logger.Debug("Trying Release {0} [{1}, {2} tracks, {3} existing]", release, release.Title, release.TrackCount, item.candidateRelease.ExistingTracks.Count); var rwatch = System.Diagnostics.Stopwatch.StartNew(); - var extraTrackPaths = candidateRelease.ExistingTracks.Select(x => x.Path).ToList(); + var extraTrackPaths = new HashSet(item.candidateRelease.ExistingTracks.Select(x => x.Path), PathEqualityComparer.Instance); var extraTracks = extraTracksOnDisk.Where(x => extraTrackPaths.Contains(x.Path)).ToList(); var allLocalTracks = localAlbumRelease.LocalTracks.Concat(extraTracks).DistinctBy(x => x.Path).ToList(); @@ -322,25 +324,33 @@ private void GetBestRelease(LocalAlbumRelease localAlbumRelease, List x.currDistance) + .ThenBy(x => x.index) + .First(); + + localAlbumRelease.Distance = best.distance; + localAlbumRelease.AlbumRelease = best.release; + localAlbumRelease.ExistingTracks = best.extraTracks; + localAlbumRelease.TrackMapping = best.mapping; watch.Stop(); _logger.Debug($"Best release: {localAlbumRelease.AlbumRelease} Distance {localAlbumRelease.Distance.NormalizedDistance()} found in {watch.ElapsedMilliseconds}ms"); diff --git a/src/NzbDrone.Core/MediaFiles/TrackImport/ImportDecisionMaker.cs b/src/NzbDrone.Core/MediaFiles/TrackImport/ImportDecisionMaker.cs index aae1a9e6f..0c903852e 100644 --- a/src/NzbDrone.Core/MediaFiles/TrackImport/ImportDecisionMaker.cs +++ b/src/NzbDrone.Core/MediaFiles/TrackImport/ImportDecisionMaker.cs @@ -1,8 +1,12 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.IO.Abstractions; using System.Linq; +using System.Threading; +using System.Threading.Tasks; using NLog; +using NzbDrone.Common; using NzbDrone.Common.Extensions; using NzbDrone.Common.Instrumentation.Extensions; using NzbDrone.Core.DecisionEngine; @@ -100,19 +104,24 @@ public Tuple, List>> GetLocalTracks( downloadClientItemInfo = Parser.Parser.ParseAlbumTitle(downloadClientItem.Title); } - var i = 1; - foreach (var file in files) + var processedTracks = new ConcurrentBag<(int Index, LocalTrack Track)>(); + var processedDecisions = new ConcurrentBag<(int Index, ImportDecision Decision)>(); + var progress = 0; + var filesWithIndex = files.Select((file, index) => new { file, index }).ToList(); + + Parallel.ForEach(filesWithIndex, MediaImportParallelism.GetParallelForEachOptions(), item => { - _logger.ProgressInfo($"Reading file {i++}/{files.Count}"); + var current = Interlocked.Increment(ref progress); + _logger.ProgressInfo($"Reading file {current}/{files.Count}"); var localTrack = new LocalTrack { DownloadClientAlbumInfo = downloadClientItemInfo, FolderAlbumInfo = folderInfo, - Path = file.FullName, - Size = file.Length, - Modified = file.LastWriteTimeUtc, - FileTrackInfo = _audioTagService.ReadTags(file.FullName), + Path = item.file.FullName, + Size = item.file.Length, + Modified = item.file.LastWriteTimeUtc, + FileTrackInfo = _audioTagService.ReadTags(item.file.FullName), AdditionalFile = false }; @@ -120,19 +129,22 @@ public Tuple, List>> GetLocalTracks( { // TODO fix otherfiles? _augmentingService.Augment(localTrack, true); - localTracks.Add(localTrack); + processedTracks.Add((item.index, localTrack)); } catch (AugmentingFailedException) { - decisions.Add(new ImportDecision(localTrack, new Rejection("Unable to parse file"))); + processedDecisions.Add((item.index, new ImportDecision(localTrack, new Rejection("Unable to parse file")))); } catch (Exception e) { _logger.Error(e, "Couldn't import file. {0}", localTrack.Path); - decisions.Add(new ImportDecision(localTrack, new Rejection("Unexpected error processing file"))); + processedDecisions.Add((item.index, new ImportDecision(localTrack, new Rejection("Unexpected error processing file")))); } - } + }); + + localTracks.AddRange(processedTracks.OrderBy(x => x.Index).Select(x => x.Track)); + decisions.AddRange(processedDecisions.OrderBy(x => x.Index).Select(x => x.Decision)); _logger.Debug($"Tags parsed for {files.Count} files in {watch.ElapsedMilliseconds}ms"); diff --git a/src/NzbDrone.Core/Music/Services/RefreshArtistService.cs b/src/NzbDrone.Core/Music/Services/RefreshArtistService.cs index 4d4efdf6e..fe1734da2 100644 --- a/src/NzbDrone.Core/Music/Services/RefreshArtistService.cs +++ b/src/NzbDrone.Core/Music/Services/RefreshArtistService.cs @@ -305,6 +305,13 @@ private void RescanArtists(List artists, bool isNew, CommandTrigger trig // badly organized / partly matched libraries folders = artists.Select(x => x.Path).ToList(); } + else if (trigger == CommandTrigger.Manual && artists.Any()) + { + // Manual refresh of specific artist(s): only scan those artists' folders, + // never the entire library (avoids 60k+ file scan when refreshing e.g. Various Artists). + folders = artists.Select(x => x.Path).ToList(); + _logger.Trace("Manual refresh: rescanning only {0} artist folder(s)", folders.Count); + } else if (rescanAfterRefresh == RescanAfterRefreshType.Never) { _logger.Trace("Skipping rescan. Reason: never rescan after refresh");