From 7fda41c18b2eb9f94f9e736f7017d5d80de309e4 Mon Sep 17 00:00:00 2001 From: ta264 Date: Tue, 8 Jun 2021 21:40:56 +0100 Subject: [PATCH] New: Better matching of books with subtitles --- .../SearchDefinitionFixture.cs | 4 +- .../MusicTests/AlbumServiceFixture.cs | 12 ++++ .../ParserTests/ParserFixture.cs | 12 ++++ .../Books/Services/BookService.cs | 3 +- .../Definitions/BookSearchCriteria.cs | 5 +- .../Identification/DistanceCalculator.cs | 12 ++++ src/NzbDrone.Core/Parser/Parser.cs | 57 +++++++++++++++++++ 7 files changed, 101 insertions(+), 4 deletions(-) diff --git a/src/NzbDrone.Core.Test/IndexerSearchTests/SearchDefinitionFixture.cs b/src/NzbDrone.Core.Test/IndexerSearchTests/SearchDefinitionFixture.cs index 43b5be895..38be57af5 100644 --- a/src/NzbDrone.Core.Test/IndexerSearchTests/SearchDefinitionFixture.cs +++ b/src/NzbDrone.Core.Test/IndexerSearchTests/SearchDefinitionFixture.cs @@ -17,13 +17,14 @@ public void should_replace_some_special_characters_author(string author, string } [TestCase("…and Justice for All", "and+Justice+for+All")] - [TestCase("American III: Solitary Man", "American+III+Solitary+Man")] + [TestCase("American III: Solitary Man", "American+III")] [TestCase("Sad Clowns & Hillbillies", "Sad+Clowns+Hillbillies")] [TestCase("¿Quién sabe?", "Quien+sabe")] [TestCase("Seal the Deal & Let’s Boogie", "Seal+the+Deal+Let’s+Boogie")] [TestCase("Section.80", "Section+80")] public void should_replace_some_special_characters(string book, string expected) { + Subject.Author = new Author { Name = "Author" }; Subject.BookTitle = book; Subject.BookQuery.Should().Be(expected); } @@ -31,6 +32,7 @@ public void should_replace_some_special_characters(string book, string expected) [TestCase("+", "+")] public void should_not_replace_some_special_characters_if_result_empty_string(string book, string expected) { + Subject.Author = new Author { Name = "Author" }; Subject.BookTitle = book; Subject.BookQuery.Should().Be(expected); } diff --git a/src/NzbDrone.Core.Test/MusicTests/AlbumServiceFixture.cs b/src/NzbDrone.Core.Test/MusicTests/AlbumServiceFixture.cs index 477886e88..1f9619d3d 100644 --- a/src/NzbDrone.Core.Test/MusicTests/AlbumServiceFixture.cs +++ b/src/NzbDrone.Core.Test/MusicTests/AlbumServiceFixture.cs @@ -20,12 +20,20 @@ public void Setup() { Title = "ANThology", CleanTitle = "anthology", + AuthorMetadata = new AuthorMetadata + { + Name = "Author" + } }); _books.Add(new Book { Title = "+", CleanTitle = "", + AuthorMetadata = new AuthorMetadata + { + Name = "Author" + } }); Mocker.GetMock() @@ -39,6 +47,10 @@ private void GivenSimilarBook() { Title = "ANThology2", CleanTitle = "anthology2", + AuthorMetadata = new AuthorMetadata + { + Name = "Author" + } }); } diff --git a/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs b/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs index a2b242f46..77bf8c4af 100644 --- a/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs +++ b/src/NzbDrone.Core.Test/ParserTests/ParserFixture.cs @@ -272,5 +272,17 @@ public void should_find_result_if_multiple_books_in_searchcriteria() parseResult.AuthorName.Should().Be("Michael Buble"); parseResult.BookTitle.Should().Be("Christmas"); } + + [TestCase("Tom Clancy", "Tom Clancy: Ghost Protocol", "Ghost Protocol", "")] + [TestCase("Andrew Steele", "Ageless: The New Science of Getting Older Without Getting Old", "Ageless", "The New Science of Getting Older Without Getting Old")] + [TestCase("Author", "Title (Subtitle with spaces)", "Title", "Subtitle with spaces")] + [TestCase("Author", "Title (Unabridged)", "Title (Unabridged)", "")] + public void should_split_title_correctly(string author, string book, string expectedTitle, string expectedSubtitle) + { + var (title, subtitle) = book.SplitBookTitle(author); + + title.Should().Be(expectedTitle); + subtitle.Should().Be(expectedSubtitle); + } } } diff --git a/src/NzbDrone.Core/Books/Services/BookService.cs b/src/NzbDrone.Core/Books/Services/BookService.cs index 319566b48..5481bc97a 100644 --- a/src/NzbDrone.Core/Books/Services/BookService.cs +++ b/src/NzbDrone.Core/Books/Services/BookService.cs @@ -110,7 +110,8 @@ private List, string>> BookScoringFunctions(str tc((a, t) => a.CleanTitle.FuzzyMatch(t), title.RemoveAfterDash().CleanAuthorName()), tc((a, t) => a.CleanTitle.FuzzyMatch(t), title.RemoveBracketsAndContents().RemoveAfterDash().CleanAuthorName()), tc((a, t) => t.FuzzyContains(a.CleanTitle), cleanTitle), - tc((a, t) => t.FuzzyContains(a.Title), title) + tc((a, t) => t.FuzzyContains(a.Title), title), + tc((a, t) => a.Title.SplitBookTitle(a.AuthorMetadata.Value.Name).Item1.FuzzyMatch(t), title) }; return scoringFunctions; diff --git a/src/NzbDrone.Core/IndexerSearch/Definitions/BookSearchCriteria.cs b/src/NzbDrone.Core/IndexerSearch/Definitions/BookSearchCriteria.cs index 9d27a95b3..c9570ed37 100644 --- a/src/NzbDrone.Core/IndexerSearch/Definitions/BookSearchCriteria.cs +++ b/src/NzbDrone.Core/IndexerSearch/Definitions/BookSearchCriteria.cs @@ -1,4 +1,5 @@ using NzbDrone.Common.Extensions; +using NzbDrone.Core.Parser; namespace NzbDrone.Core.IndexerSearch.Definitions { @@ -9,11 +10,11 @@ public class BookSearchCriteria : SearchCriteriaBase public string BookIsbn { get; set; } public string Disambiguation { get; set; } - public string BookQuery => GetQueryTitle($"{BookTitle}"); + public string BookQuery => GetQueryTitle(BookTitle.SplitBookTitle(Author.Name).Item1); public override string ToString() { - return $"[{Author.Name} - {BookTitle} ({BookYear})]"; + return $"[{Author.Name} - {BookTitle}]"; } } } diff --git a/src/NzbDrone.Core/MediaFiles/BookImport/Identification/DistanceCalculator.cs b/src/NzbDrone.Core/MediaFiles/BookImport/Identification/DistanceCalculator.cs index 2448dfef5..1e9ce5e23 100644 --- a/src/NzbDrone.Core/MediaFiles/BookImport/Identification/DistanceCalculator.cs +++ b/src/NzbDrone.Core/MediaFiles/BookImport/Identification/DistanceCalculator.cs @@ -49,6 +49,18 @@ public static Distance BookDistance(List localTracks, Edition edition titleOptions.Add(StripSeriesRegex.Replace(titleOptions[0])); } + var (maintitle, _) = edition.Book.Value.Title.SplitBookTitle(edition.Book.Value.AuthorMetadata.Value.Name); + if (!titleOptions.Contains(maintitle)) + { + titleOptions.Add(maintitle); + } + + (maintitle, _) = edition.Title.SplitBookTitle(edition.Book.Value.AuthorMetadata.Value.Name); + if (!titleOptions.Contains(maintitle)) + { + titleOptions.Add(maintitle); + } + if (edition.Book.Value.SeriesLinks?.Value?.Any() ?? false) { foreach (var l in edition.Book.Value.SeriesLinks.Value) diff --git a/src/NzbDrone.Core/Parser/Parser.cs b/src/NzbDrone.Core/Parser/Parser.cs index 699a7e235..3e6cf8891 100644 --- a/src/NzbDrone.Core/Parser/Parser.cs +++ b/src/NzbDrone.Core/Parser/Parser.cs @@ -362,6 +362,11 @@ public static ParsedBookInfo ParseBookTitleWithSearchCriteria(string title, Auth var foundBook = GetTitleFuzzy(remainder, bestBook.Title, out _); + if (foundBook == null) + { + foundBook = GetTitleFuzzy(remainder, bestBook.Title.SplitBookTitle(authorName).Item1, out _); + } + Logger.Trace($"Found {foundAuthor} - {foundBook} with fuzzy parser"); if (foundAuthor == null || foundBook == null) @@ -576,6 +581,58 @@ public static ParsedBookInfo ParseBookTitle(string title) return null; } + public static (string, string) SplitBookTitle(this string book, string author) + { + // Strip author from title, eg Tom Clancy: Ghost Protocol + if (book.StartsWith($"{author}:")) + { + book = book.Split(':', 2)[1].Trim(); + } + + var parenthesis = book.IndexOf('('); + var colon = book.IndexOf(':'); + + string[] parts = null; + + if (parenthesis > -1) + { + var endParenthesis = book.IndexOf(')'); + if (endParenthesis > -1 && !book.Substring(parenthesis + 1, endParenthesis - parenthesis).Contains(' ')) + { + parenthesis = -1; + } + } + + if (colon > -1 && parenthesis > -1) + { + if (colon < parenthesis) + { + parts = book.Split(':', 2); + } + else + { + parts = book.Split('(', 2); + parts[1] = parts[1].TrimEnd(')'); + } + } + else if (colon > -1) + { + parts = book.Split(':', 2); + } + else if (parenthesis > -1) + { + parts = book.Split('('); + parts[1] = parts[1].TrimEnd(')'); + } + + if (parts != null) + { + return (parts[0].Trim(), parts[1].TrimEnd(':').Trim()); + } + + return (book, string.Empty); + } + public static string CleanAuthorName(this string name) { // If Title only contains numbers return it as is.