From d0bfdce9c5ceb13daa20e9dcca8086531a88dc4e Mon Sep 17 00:00:00 2001 From: Mark McDowall Date: Wed, 16 Feb 2022 18:15:26 -0800 Subject: [PATCH] Fixed: Sorting of some titles with acronyms or common words at the start (cherry picked from commit 79436149eb6869033d2263cd9558dbe75b1d3a68) --- .../SkyHook/SkyHookProxyFixture.cs | 3 +-- .../MovieTests/MovieTitleNormalizerFixture.cs | 23 +++++++++++++------ .../Aggregators/AggregateLanguages.cs | 4 ++-- .../MetadataSource/SkyHook/SkyHookProxy.cs | 2 +- .../Movies/MovieTitleNormalizer.cs | 19 ++++++++------- src/NzbDrone.Core/Parser/Parser.cs | 13 ++++------- src/Radarr.Api.V3/Movies/MovieResource.cs | 3 +-- 7 files changed, 37 insertions(+), 30 deletions(-) diff --git a/src/NzbDrone.Core.Test/MetadataSource/SkyHook/SkyHookProxyFixture.cs b/src/NzbDrone.Core.Test/MetadataSource/SkyHook/SkyHookProxyFixture.cs index b8e790a5bc..b0a2676140 100644 --- a/src/NzbDrone.Core.Test/MetadataSource/SkyHook/SkyHookProxyFixture.cs +++ b/src/NzbDrone.Core.Test/MetadataSource/SkyHook/SkyHookProxyFixture.cs @@ -20,6 +20,7 @@ public void Setup() [TestCase(11, "Star Wars")] [TestCase(2, "Ariel")] [TestCase(70981, "Prometheus")] + [TestCase(238, "The Godfather")] public void should_be_able_to_get_movie_detail(int tmdbId, string title) { var details = Subject.GetMovieInfo(tmdbId).Item1; @@ -41,8 +42,6 @@ private void ValidateMovie(MovieMetadata movie) movie.ImdbId.Should().NotBeNullOrWhiteSpace(); movie.Studio.Should().NotBeNullOrWhiteSpace(); movie.Runtime.Should().BeGreaterThan(0); - - // series.TvRageId.Should().BeGreaterThan(0); movie.TmdbId.Should().BeGreaterThan(0); } } diff --git a/src/NzbDrone.Core.Test/MovieTests/MovieTitleNormalizerFixture.cs b/src/NzbDrone.Core.Test/MovieTests/MovieTitleNormalizerFixture.cs index 625a693f3a..233be93183 100644 --- a/src/NzbDrone.Core.Test/MovieTests/MovieTitleNormalizerFixture.cs +++ b/src/NzbDrone.Core.Test/MovieTests/MovieTitleNormalizerFixture.cs @@ -7,14 +7,14 @@ namespace NzbDrone.Core.Test.MovieTests [TestFixture] public class MovieTitleNormalizerFixture { - // TODO: Decide on reimplementing this! - /* - [TestCase("A to Z", 281588, "a to z")] - [TestCase("A. D. - The Trials & Triumph of the Early Church", 266757, "ad trials triumph early church")] - public void should_use_precomputed_title(string title, int tvdbId, string expected) + [TestCase("A to Z", 387354, "a to z")] + [TestCase("A to Z", 1212922, "a to z")] + [TestCase("A to Z: The First Alphabet", 888700, "a to z the first alphabet")] + [TestCase("A to Zeppelin: The Story of Led Zeppelin", 101273, "a to zeppelin the story of led zeppelin")] + public void should_use_precomputed_title(string title, int tmdbId, string expected) { - MovieTitleNormalizer.Normalize(title, tvdbId).Should().Be(expected); - }*/ + MovieTitleNormalizer.Normalize(title, tmdbId).Should().Be(expected); + } [TestCase("2 Broke Girls", "2 broke girls")] [TestCase("Archer (2009)", "archer 2009")] @@ -23,6 +23,15 @@ public void should_use_precomputed_title(string title, int tvdbId, string expect [TestCase("The Good Wife", "good wife")] [TestCase("The Newsroom (2012)", "newsroom 2012")] [TestCase("Special Agent Oso", "special agent oso")] + [TestCase("A.N.T. Farm", "ant farm")] + [TestCase("A.I.C.O. -Incarnation-", "aico incarnation")] + [TestCase("A.D. The Bible Continues", "ad the bible continues")] + [TestCase("A.P. Bio", "ap bio")] + [TestCase("A-Team", "ateam")] + [TestCase("The A-Team", "ateam")] + [TestCase("And Just Like That", "and just like that")] + [TestCase("A.I. Artificial Intelligence", "ai artificial intelligence")] + [TestCase("An A to Z of English", "a to z of english")] public void should_normalize_title(string title, string expected) { MovieTitleNormalizer.Normalize(title, 0).Should().Be(expected); diff --git a/src/NzbDrone.Core/Download/Aggregation/Aggregators/AggregateLanguages.cs b/src/NzbDrone.Core/Download/Aggregation/Aggregators/AggregateLanguages.cs index d4d906d0e7..0ad220c87b 100644 --- a/src/NzbDrone.Core/Download/Aggregation/Aggregators/AggregateLanguages.cs +++ b/src/NzbDrone.Core/Download/Aggregation/Aggregators/AggregateLanguages.cs @@ -29,7 +29,7 @@ public RemoteMovie Aggregate(RemoteMovie remoteMovie) var languages = parsedMovieInfo.Languages; var movie = remoteMovie.Movie; var releaseTokens = parsedMovieInfo.SimpleReleaseTitle ?? parsedMovieInfo.ReleaseTitle; - var normalizedReleaseTokens = Parser.Parser.NormalizeEpisodeTitle(releaseTokens); + var normalizedReleaseTokens = Parser.Parser.NormalizeMovieTitle(releaseTokens); var languagesToRemove = new List(); if (movie == null) @@ -54,7 +54,7 @@ public RemoteMovie Aggregate(RemoteMovie remoteMovie) if (!movieTitleLanguage.Contains(Language.Unknown)) { - var normalizedEpisodeTitle = Parser.Parser.NormalizeEpisodeTitle(movie.Title); + var normalizedEpisodeTitle = Parser.Parser.NormalizeMovieTitle(movie.Title); var movieTitleIndex = normalizedReleaseTokens.IndexOf(normalizedEpisodeTitle, StringComparison.CurrentCultureIgnoreCase); if (movieTitleIndex >= 0) diff --git a/src/NzbDrone.Core/MetadataSource/SkyHook/SkyHookProxy.cs b/src/NzbDrone.Core/MetadataSource/SkyHook/SkyHookProxy.cs index 120c719659..6afb732698 100644 --- a/src/NzbDrone.Core/MetadataSource/SkyHook/SkyHookProxy.cs +++ b/src/NzbDrone.Core/MetadataSource/SkyHook/SkyHookProxy.cs @@ -233,7 +233,7 @@ public MovieMetadata MapMovie(MovieResource resource) movie.Title = resource.Title; movie.OriginalTitle = resource.OriginalTitle; movie.CleanTitle = resource.Title.CleanMovieTitle(); - movie.SortTitle = Parser.Parser.NormalizeTitle(resource.Title); + movie.SortTitle = MovieTitleNormalizer.Normalize(resource.Title, resource.TmdbId); movie.CleanOriginalTitle = resource.OriginalTitle.CleanMovieTitle(); movie.Overview = resource.Overview; diff --git a/src/NzbDrone.Core/Movies/MovieTitleNormalizer.cs b/src/NzbDrone.Core/Movies/MovieTitleNormalizer.cs index 7b43027086..926575b294 100644 --- a/src/NzbDrone.Core/Movies/MovieTitleNormalizer.cs +++ b/src/NzbDrone.Core/Movies/MovieTitleNormalizer.cs @@ -4,16 +4,19 @@ namespace NzbDrone.Core.Movies { public static class MovieTitleNormalizer { - private static readonly Dictionary PreComputedTitles = new Dictionary - { - { 999999999, "a to z" }, - }; - - public static string Normalize(string title, int tmdbid) + private static readonly Dictionary PreComputedTitles = new () { - if (PreComputedTitles.ContainsKey(tmdbid)) + { 387354, "a to z" }, + { 1212922, "a to z" }, + { 888700, "a to z the first alphabet" }, + { 101273, "a to zeppelin the story of led zeppelin" }, + }; + + public static string Normalize(string title, int tmdbId) + { + if (PreComputedTitles.TryGetValue(tmdbId, out var value)) { - return PreComputedTitles[tmdbid]; + return value; } return Parser.Parser.NormalizeTitle(title).ToLower(); diff --git a/src/NzbDrone.Core/Parser/Parser.cs b/src/NzbDrone.Core/Parser/Parser.cs index 5de0cdaf5f..3fbc1d5884 100644 --- a/src/NzbDrone.Core/Parser/Parser.cs +++ b/src/NzbDrone.Core/Parser/Parser.cs @@ -161,10 +161,9 @@ public static class Parser // name only...BE VERY CAREFUL WITH THIS, HIGH CHANCE OF FALSE POSITIVES private static readonly Regex ExceptionReleaseGroupRegexExact = new Regex(@"\b(?KRaLiMaRKo|E\.N\.D|D\-Z0N3|Koten_Gars|BluDragon|ZØNEHD|Tigole|HQMUX|VARYG|YIFY|YTS(.(MX|LT|AG))?|TMd|Eml HDTeam|LMain|DarQ|BEN THE MEN)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled); - private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|'|\|)+", RegexOptions.Compiled); private static readonly Regex SpecialCharRegex = new Regex(@"(\&|\:|\\|\/)+", RegexOptions.Compiled); private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled); - private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex ArticleWordRegex = new Regex(@"^(a|an|the)\s", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex SpecialEpisodeWordRegex = new Regex(@"\b(part|special|edition|christmas)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex DuplicateSpacesRegex = new Regex(@"\s{2,}", RegexOptions.Compiled); @@ -479,21 +478,19 @@ public static string CleanMovieTitle(this string title) return ReplaceGermanUmlauts(NormalizeRegex.Replace(title, string.Empty).ToLowerInvariant()).RemoveAccent(); } - public static string NormalizeEpisodeTitle(this string title) + public static string NormalizeMovieTitle(string title) { title = SpecialEpisodeWordRegex.Replace(title, string.Empty); title = PunctuationRegex.Replace(title, " "); title = DuplicateSpacesRegex.Replace(title, " "); - return title.Trim() - .ToLower(); + return title.Trim().ToLower(); } - public static string NormalizeTitle(this string title) + public static string NormalizeTitle(string title) { - title = WordDelimiterRegex.Replace(title, " "); title = PunctuationRegex.Replace(title, string.Empty); - title = CommonWordRegex.Replace(title, string.Empty); + title = ArticleWordRegex.Replace(title, string.Empty); title = DuplicateSpacesRegex.Replace(title, " "); title = SpecialCharRegex.Replace(title, string.Empty); diff --git a/src/Radarr.Api.V3/Movies/MovieResource.cs b/src/Radarr.Api.V3/Movies/MovieResource.cs index 66da6b0e8a..9a9829eadd 100644 --- a/src/Radarr.Api.V3/Movies/MovieResource.cs +++ b/src/Radarr.Api.V3/Movies/MovieResource.cs @@ -9,7 +9,6 @@ using NzbDrone.Core.MediaCover; using NzbDrone.Core.Movies; using NzbDrone.Core.Movies.Translations; -using NzbDrone.Core.Parser; using Radarr.Api.V3.MovieFiles; using Radarr.Http.REST; using Swashbuckle.AspNetCore.Annotations; @@ -121,7 +120,7 @@ public static MovieResource ToResource(this Movie model, int availDelay, MovieTr Title = translatedTitle, OriginalTitle = model.MovieMetadata.Value.OriginalTitle, OriginalLanguage = model.MovieMetadata.Value.OriginalLanguage, - SortTitle = translatedTitle.NormalizeTitle(), + SortTitle = MovieTitleNormalizer.Normalize(translatedTitle, model.TmdbId), InCinemas = model.MovieMetadata.Value.InCinemas, PhysicalRelease = model.MovieMetadata.Value.PhysicalRelease, DigitalRelease = model.MovieMetadata.Value.DigitalRelease,