mirror of
https://github.com/Readarr/Readarr
synced 2025-12-31 04:36:15 +01:00
New: Get more candidates and include ISBN/ASIN in distance calculation
This commit is contained in:
parent
d078dacaab
commit
9f37b1c484
5 changed files with 201 additions and 89 deletions
|
|
@ -1,3 +1,4 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using NLog;
|
||||
|
|
@ -12,7 +13,7 @@ namespace NzbDrone.Core.MediaFiles.BookImport.Identification
|
|||
public interface ICandidateService
|
||||
{
|
||||
List<CandidateEdition> GetDbCandidatesFromTags(LocalEdition localEdition, IdentificationOverrides idOverrides, bool includeExisting);
|
||||
List<CandidateEdition> GetRemoteCandidates(LocalEdition localEdition);
|
||||
IEnumerable<CandidateEdition> GetRemoteCandidates(LocalEdition localEdition);
|
||||
}
|
||||
|
||||
public class CandidateService : ICandidateService
|
||||
|
|
@ -183,116 +184,194 @@ private List<CandidateEdition> GetDbCandidates(LocalEdition localEdition, bool i
|
|||
return candidateReleases;
|
||||
}
|
||||
|
||||
public List<CandidateEdition> GetRemoteCandidates(LocalEdition localEdition)
|
||||
public IEnumerable<CandidateEdition> GetRemoteCandidates(LocalEdition localEdition)
|
||||
{
|
||||
// Gets candidate book releases from the metadata server.
|
||||
// Will eventually need adding locally if we find a match
|
||||
var watch = System.Diagnostics.Stopwatch.StartNew();
|
||||
|
||||
List<Book> remoteBooks = null;
|
||||
var candidates = new List<CandidateEdition>();
|
||||
List<Book> remoteBooks;
|
||||
var seenCandidates = new HashSet<string>();
|
||||
|
||||
var isbns = localEdition.LocalBooks.Select(x => x.FileTrackInfo.Isbn).Distinct().ToList();
|
||||
var asins = localEdition.LocalBooks.Select(x => x.FileTrackInfo.Asin).Distinct().ToList();
|
||||
var goodreads = localEdition.LocalBooks.Select(x => x.FileTrackInfo.GoodreadsId).Distinct().ToList();
|
||||
|
||||
try
|
||||
// grab possibilities for all the IDs present
|
||||
if (isbns.Count == 1 && isbns[0].IsNotNullOrWhiteSpace())
|
||||
{
|
||||
if (isbns.Count == 1 && isbns[0].IsNotNullOrWhiteSpace())
|
||||
{
|
||||
_logger.Trace($"Searching by isbn {isbns[0]}");
|
||||
_logger.Trace($"Searching by isbn {isbns[0]}");
|
||||
|
||||
try
|
||||
{
|
||||
remoteBooks = _bookSearchService.SearchByIsbn(isbns[0]);
|
||||
}
|
||||
|
||||
// Calibre puts junk asins into books it creates so check for sensible length
|
||||
if ((remoteBooks == null || !remoteBooks.Any()) &&
|
||||
asins.Count == 1 &&
|
||||
asins[0].IsNotNullOrWhiteSpace() &&
|
||||
asins[0].Length == 10)
|
||||
catch (GoodreadsException e)
|
||||
{
|
||||
_logger.Trace($"Searching by asin {asins[0]}");
|
||||
_logger.Info(e, "Skipping ISBN search due to Goodreads Error");
|
||||
remoteBooks = new List<Book>();
|
||||
}
|
||||
|
||||
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
|
||||
{
|
||||
yield return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
if (asins.Count == 1 &&
|
||||
asins[0].IsNotNullOrWhiteSpace() &&
|
||||
asins[0].Length == 10)
|
||||
{
|
||||
_logger.Trace($"Searching by asin {asins[0]}");
|
||||
|
||||
try
|
||||
{
|
||||
remoteBooks = _bookSearchService.SearchByAsin(asins[0]);
|
||||
}
|
||||
|
||||
// if we don't have an independent ID, try a goodreads ID, but may have been matched to the wrong edition by calibre
|
||||
if ((remoteBooks == null || !remoteBooks.Any()) &&
|
||||
goodreads.Count == 1 &&
|
||||
goodreads[0].IsNotNullOrWhiteSpace())
|
||||
catch (GoodreadsException e)
|
||||
{
|
||||
if (int.TryParse(goodreads[0], out var id))
|
||||
{
|
||||
_logger.Trace($"Searching by goodreads id {id}");
|
||||
_logger.Info(e, "Skipping ASIN search due to Goodreads Error");
|
||||
remoteBooks = new List<Book>();
|
||||
}
|
||||
|
||||
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
|
||||
{
|
||||
yield return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
if (goodreads.Count == 1 &&
|
||||
goodreads[0].IsNotNullOrWhiteSpace())
|
||||
{
|
||||
if (int.TryParse(goodreads[0], out var id))
|
||||
{
|
||||
_logger.Trace($"Searching by goodreads id {id}");
|
||||
|
||||
try
|
||||
{
|
||||
remoteBooks = _bookSearchService.SearchByGoodreadsId(id);
|
||||
}
|
||||
}
|
||||
catch (GoodreadsException e)
|
||||
{
|
||||
_logger.Info(e, "Skipping Goodreads ID search due to Goodreads Error");
|
||||
remoteBooks = new List<Book>();
|
||||
}
|
||||
|
||||
// if no asin/isbn or no result, fall back to text search
|
||||
if (remoteBooks == null || !remoteBooks.Any())
|
||||
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
|
||||
{
|
||||
yield return candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we got an id result, stop
|
||||
if (seenCandidates.Any())
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
// fall back to author / book name search
|
||||
var authorTags = new List<string>();
|
||||
|
||||
if (TrackGroupingService.IsVariousAuthors(localEdition.LocalBooks))
|
||||
{
|
||||
authorTags.Add("Various Authors");
|
||||
}
|
||||
else
|
||||
{
|
||||
authorTags.AddRange(localEdition.LocalBooks.MostCommon(x => x.FileTrackInfo.Authors));
|
||||
}
|
||||
|
||||
var bookTag = localEdition.LocalBooks.MostCommon(x => x.FileTrackInfo.BookTitle) ?? "";
|
||||
|
||||
// If no valid author or book tags, stop
|
||||
if (!authorTags.Any() || bookTag.IsNullOrWhiteSpace())
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
// Search by author+book
|
||||
foreach (var authorTag in authorTags)
|
||||
{
|
||||
try
|
||||
{
|
||||
// fall back to author / book name search
|
||||
List<string> authorTags = new List<string>();
|
||||
|
||||
if (TrackGroupingService.IsVariousAuthors(localEdition.LocalBooks))
|
||||
{
|
||||
authorTags.Add("Various Authors");
|
||||
}
|
||||
else
|
||||
{
|
||||
authorTags.AddRange(localEdition.LocalBooks.MostCommon(x => x.FileTrackInfo.Authors));
|
||||
}
|
||||
|
||||
var bookTag = localEdition.LocalBooks.MostCommon(x => x.FileTrackInfo.BookTitle) ?? "";
|
||||
|
||||
if (!authorTags.Any() || bookTag.IsNullOrWhiteSpace())
|
||||
{
|
||||
return candidates;
|
||||
}
|
||||
|
||||
foreach (var authorTag in authorTags)
|
||||
{
|
||||
remoteBooks = _bookSearchService.SearchForNewBook(bookTag, authorTag);
|
||||
if (remoteBooks.Any())
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!remoteBooks.Any())
|
||||
{
|
||||
var bookSearch = _bookSearchService.SearchForNewBook(bookTag, null);
|
||||
var authorSearch = authorTags.SelectMany(a => _bookSearchService.SearchForNewBook(a, null));
|
||||
|
||||
remoteBooks = bookSearch.Concat(authorSearch).DistinctBy(x => x.ForeignBookId).ToList();
|
||||
}
|
||||
remoteBooks = _bookSearchService.SearchForNewBook(bookTag, authorTag);
|
||||
}
|
||||
catch (GoodreadsException e)
|
||||
{
|
||||
_logger.Info(e, "Skipping author/title search due to Goodreads Error");
|
||||
remoteBooks = new List<Book>();
|
||||
}
|
||||
|
||||
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
|
||||
{
|
||||
yield return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
// If we got an author/book search result, stop
|
||||
if (seenCandidates.Any())
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
// Search by just book title
|
||||
try
|
||||
{
|
||||
remoteBooks = _bookSearchService.SearchForNewBook(bookTag, null);
|
||||
}
|
||||
catch (GoodreadsException e)
|
||||
{
|
||||
_logger.Info(e, "Skipping book due to Goodreads error");
|
||||
_logger.Info(e, "Skipping book title search due to Goodreads Error");
|
||||
remoteBooks = new List<Book>();
|
||||
}
|
||||
|
||||
foreach (var book in remoteBooks)
|
||||
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
|
||||
{
|
||||
yield return candidate;
|
||||
}
|
||||
|
||||
// Search by just author
|
||||
foreach (var a in authorTags)
|
||||
{
|
||||
try
|
||||
{
|
||||
remoteBooks = _bookSearchService.SearchForNewBook(a, null);
|
||||
}
|
||||
catch (GoodreadsException e)
|
||||
{
|
||||
_logger.Info(e, "Skipping author search due to Goodreads Error");
|
||||
remoteBooks = new List<Book>();
|
||||
}
|
||||
|
||||
foreach (var candidate in ToCandidates(remoteBooks, seenCandidates))
|
||||
{
|
||||
yield return candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<CandidateEdition> ToCandidates(IEnumerable<Book> books, HashSet<string> seenCandidates)
|
||||
{
|
||||
var candidates = new List<CandidateEdition>();
|
||||
|
||||
foreach (var book in books)
|
||||
{
|
||||
// We have to make sure various bits and pieces are populated that are normally handled
|
||||
// by a database lazy load
|
||||
foreach (var edition in book.Editions.Value)
|
||||
{
|
||||
edition.Book = book;
|
||||
candidates.Add(new CandidateEdition
|
||||
if (!seenCandidates.Contains(edition.ForeignEditionId))
|
||||
{
|
||||
Edition = edition,
|
||||
ExistingFiles = new List<BookFile>()
|
||||
});
|
||||
seenCandidates.Add(edition.ForeignEditionId);
|
||||
edition.Book = book;
|
||||
candidates.Add(new CandidateEdition
|
||||
{
|
||||
Edition = edition,
|
||||
ExistingFiles = new List<BookFile>()
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
watch.Stop();
|
||||
_logger.Debug($"Getting {candidates.Count} remote candidates from tags for {localEdition.LocalBooks.Count} tracks took {watch.ElapsedMilliseconds}ms");
|
||||
|
||||
return candidates;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ public class Distance
|
|||
{ "source", 2.0 },
|
||||
{ "author", 3.0 },
|
||||
{ "book", 3.0 },
|
||||
{ "isbn", 10.0 },
|
||||
{ "asin", 10.0 },
|
||||
{ "media_count", 1.0 },
|
||||
{ "media_format", 1.0 },
|
||||
{ "year", 1.0 },
|
||||
|
|
|
|||
|
|
@ -64,6 +64,20 @@ public static Distance BookDistance(List<LocalBook> localTracks, Edition edition
|
|||
dist.AddString("book", title, titleOptions);
|
||||
Logger.Trace("book: '{0}' vs '{1}'; {2}", title, titleOptions.ConcatToString("' or '"), dist.NormalizedDistance());
|
||||
|
||||
var isbn = localTracks.MostCommon(x => x.FileTrackInfo.Isbn);
|
||||
if (isbn.IsNotNullOrWhiteSpace() && edition.Isbn13.IsNotNullOrWhiteSpace())
|
||||
{
|
||||
dist.AddBool("isbn", isbn != edition.Isbn13);
|
||||
Logger.Trace("isbn: '{0}' vs '{1}'; {2}", isbn, edition.Isbn13, dist.NormalizedDistance());
|
||||
}
|
||||
|
||||
var asin = localTracks.MostCommon(x => x.FileTrackInfo.Asin);
|
||||
if (asin.IsNotNullOrWhiteSpace() && edition.Asin.IsNotNullOrWhiteSpace())
|
||||
{
|
||||
dist.AddBool("asin", asin != edition.Asin);
|
||||
Logger.Trace("asin: '{0}' vs '{1}'; {2}", asin, edition.Asin, dist.NormalizedDistance());
|
||||
}
|
||||
|
||||
// Year
|
||||
var localYear = localTracks.MostCommon(x => x.FileTrackInfo.Year);
|
||||
if (localYear > 0 && edition.ReleaseDate.HasValue)
|
||||
|
|
|
|||
|
|
@ -116,14 +116,22 @@ private void IdentifyRelease(LocalEdition localBookRelease, IdentificationOverri
|
|||
{
|
||||
var watch = System.Diagnostics.Stopwatch.StartNew();
|
||||
|
||||
var candidateReleases = _candidateService.GetDbCandidatesFromTags(localBookRelease, idOverrides, config.IncludeExisting);
|
||||
IEnumerable<CandidateEdition> candidateReleases = _candidateService.GetDbCandidatesFromTags(localBookRelease, idOverrides, config.IncludeExisting);
|
||||
|
||||
if (candidateReleases.Count == 0 && config.AddNewAuthors)
|
||||
// convert all the TrackFiles that represent extra files to List<LocalTrack>
|
||||
// local candidates are actually a list so this is fine to enumerate
|
||||
var allLocalTracks = ToLocalTrack(candidateReleases
|
||||
.SelectMany(x => x.ExistingFiles)
|
||||
.DistinctBy(x => x.Path), localBookRelease);
|
||||
|
||||
_logger.Debug($"Retrieved {allLocalTracks.Count} possible tracks in {watch.ElapsedMilliseconds}ms");
|
||||
|
||||
if (!candidateReleases.Any() && config.AddNewAuthors)
|
||||
{
|
||||
candidateReleases = _candidateService.GetRemoteCandidates(localBookRelease);
|
||||
}
|
||||
|
||||
if (candidateReleases.Count == 0)
|
||||
if (!candidateReleases.Any())
|
||||
{
|
||||
// can't find any candidates even after fingerprinting
|
||||
// populate the overrides and return
|
||||
|
|
@ -137,15 +145,6 @@ private void IdentifyRelease(LocalEdition localBookRelease, IdentificationOverri
|
|||
return;
|
||||
}
|
||||
|
||||
_logger.Debug($"Got {candidateReleases.Count} candidates for {localBookRelease.LocalBooks.Count} tracks in {watch.ElapsedMilliseconds}ms");
|
||||
|
||||
// convert all the TrackFiles that represent extra files to List<LocalTrack>
|
||||
var allLocalTracks = ToLocalTrack(candidateReleases
|
||||
.SelectMany(x => x.ExistingFiles)
|
||||
.DistinctBy(x => x.Path), localBookRelease);
|
||||
|
||||
_logger.Debug($"Retrieved {allLocalTracks.Count} possible tracks in {watch.ElapsedMilliseconds}ms");
|
||||
|
||||
GetBestRelease(localBookRelease, candidateReleases, allLocalTracks);
|
||||
|
||||
_logger.Debug($"Best release found in {watch.ElapsedMilliseconds}ms");
|
||||
|
|
@ -155,11 +154,11 @@ private void IdentifyRelease(LocalEdition localBookRelease, IdentificationOverri
|
|||
_logger.Debug($"IdentifyRelease done in {watch.ElapsedMilliseconds}ms");
|
||||
}
|
||||
|
||||
private void GetBestRelease(LocalEdition localBookRelease, List<CandidateEdition> candidateReleases, List<LocalBook> extraTracksOnDisk)
|
||||
private void GetBestRelease(LocalEdition localBookRelease, IEnumerable<CandidateEdition> candidateReleases, List<LocalBook> extraTracksOnDisk)
|
||||
{
|
||||
var watch = System.Diagnostics.Stopwatch.StartNew();
|
||||
|
||||
_logger.Debug("Matching {0} track files against {1} candidates", localBookRelease.TrackCount, candidateReleases.Count);
|
||||
_logger.Debug("Matching {0} track files against candidates", localBookRelease.TrackCount);
|
||||
_logger.Trace("Processing files:\n{0}", string.Join("\n", localBookRelease.LocalBooks.Select(x => x.Path)));
|
||||
|
||||
double bestDistance = 1.0;
|
||||
|
|
|
|||
|
|
@ -418,12 +418,30 @@ public List<Book> SearchForNewBook(string title, string author)
|
|||
|
||||
public List<Book> SearchByIsbn(string isbn)
|
||||
{
|
||||
return SearchByField("isbn", isbn);
|
||||
var result = SearchByField("isbn", isbn);
|
||||
|
||||
// we don't get isbn back in search result, but if only one result assume the query was correct
|
||||
// and add in the searched isbn
|
||||
if (result.Count == 1 && result[0].Editions.Value.Count == 1)
|
||||
{
|
||||
result[0].Editions.Value[0].Isbn13 = isbn;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<Book> SearchByAsin(string asin)
|
||||
{
|
||||
return SearchByField("isbn", asin);
|
||||
var result = SearchByField("asin", asin);
|
||||
|
||||
// we don't get isbn back in search result, but if only one result assume the query was correct
|
||||
// and add in the searched isbn
|
||||
if (result.Count == 1 && result[0].Editions.Value.Count == 1)
|
||||
{
|
||||
result[0].Editions.Value[0].Asin = asin;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<Book> SearchByGoodreadsId(int id)
|
||||
|
|
|
|||
Loading…
Reference in a new issue