Fixed: Sorting of some titles with acronyms or common words at the start

(cherry picked from commit 79436149eb6869033d2263cd9558dbe75b1d3a68)
This commit is contained in:
Mark McDowall 2022-02-16 18:15:26 -08:00 committed by Bogdan
parent 5d0cd78667
commit d0bfdce9c5
7 changed files with 37 additions and 30 deletions

View file

@ -20,6 +20,7 @@ public void Setup()
[TestCase(11, "Star Wars")]
[TestCase(2, "Ariel")]
[TestCase(70981, "Prometheus")]
[TestCase(238, "The Godfather")]
public void should_be_able_to_get_movie_detail(int tmdbId, string title)
{
var details = Subject.GetMovieInfo(tmdbId).Item1;
@ -41,8 +42,6 @@ private void ValidateMovie(MovieMetadata movie)
movie.ImdbId.Should().NotBeNullOrWhiteSpace();
movie.Studio.Should().NotBeNullOrWhiteSpace();
movie.Runtime.Should().BeGreaterThan(0);
// series.TvRageId.Should().BeGreaterThan(0);
movie.TmdbId.Should().BeGreaterThan(0);
}
}

View file

@ -7,14 +7,14 @@ namespace NzbDrone.Core.Test.MovieTests
[TestFixture]
public class MovieTitleNormalizerFixture
{
// TODO: Decide on reimplementing this!
/*
[TestCase("A to Z", 281588, "a to z")]
[TestCase("A. D. - The Trials & Triumph of the Early Church", 266757, "ad trials triumph early church")]
public void should_use_precomputed_title(string title, int tvdbId, string expected)
[TestCase("A to Z", 387354, "a to z")]
[TestCase("A to Z", 1212922, "a to z")]
[TestCase("A to Z: The First Alphabet", 888700, "a to z the first alphabet")]
[TestCase("A to Zeppelin: The Story of Led Zeppelin", 101273, "a to zeppelin the story of led zeppelin")]
public void should_use_precomputed_title(string title, int tmdbId, string expected)
{
MovieTitleNormalizer.Normalize(title, tvdbId).Should().Be(expected);
}*/
MovieTitleNormalizer.Normalize(title, tmdbId).Should().Be(expected);
}
[TestCase("2 Broke Girls", "2 broke girls")]
[TestCase("Archer (2009)", "archer 2009")]
@ -23,6 +23,15 @@ public void should_use_precomputed_title(string title, int tvdbId, string expect
[TestCase("The Good Wife", "good wife")]
[TestCase("The Newsroom (2012)", "newsroom 2012")]
[TestCase("Special Agent Oso", "special agent oso")]
[TestCase("A.N.T. Farm", "ant farm")]
[TestCase("A.I.C.O. -Incarnation-", "aico incarnation")]
[TestCase("A.D. The Bible Continues", "ad the bible continues")]
[TestCase("A.P. Bio", "ap bio")]
[TestCase("A-Team", "ateam")]
[TestCase("The A-Team", "ateam")]
[TestCase("And Just Like That", "and just like that")]
[TestCase("A.I. Artificial Intelligence", "ai artificial intelligence")]
[TestCase("An A to Z of English", "a to z of english")]
public void should_normalize_title(string title, string expected)
{
MovieTitleNormalizer.Normalize(title, 0).Should().Be(expected);

View file

@ -29,7 +29,7 @@ public RemoteMovie Aggregate(RemoteMovie remoteMovie)
var languages = parsedMovieInfo.Languages;
var movie = remoteMovie.Movie;
var releaseTokens = parsedMovieInfo.SimpleReleaseTitle ?? parsedMovieInfo.ReleaseTitle;
var normalizedReleaseTokens = Parser.Parser.NormalizeEpisodeTitle(releaseTokens);
var normalizedReleaseTokens = Parser.Parser.NormalizeMovieTitle(releaseTokens);
var languagesToRemove = new List<Language>();
if (movie == null)
@ -54,7 +54,7 @@ public RemoteMovie Aggregate(RemoteMovie remoteMovie)
if (!movieTitleLanguage.Contains(Language.Unknown))
{
var normalizedEpisodeTitle = Parser.Parser.NormalizeEpisodeTitle(movie.Title);
var normalizedEpisodeTitle = Parser.Parser.NormalizeMovieTitle(movie.Title);
var movieTitleIndex = normalizedReleaseTokens.IndexOf(normalizedEpisodeTitle, StringComparison.CurrentCultureIgnoreCase);
if (movieTitleIndex >= 0)

View file

@ -233,7 +233,7 @@ public MovieMetadata MapMovie(MovieResource resource)
movie.Title = resource.Title;
movie.OriginalTitle = resource.OriginalTitle;
movie.CleanTitle = resource.Title.CleanMovieTitle();
movie.SortTitle = Parser.Parser.NormalizeTitle(resource.Title);
movie.SortTitle = MovieTitleNormalizer.Normalize(resource.Title, resource.TmdbId);
movie.CleanOriginalTitle = resource.OriginalTitle.CleanMovieTitle();
movie.Overview = resource.Overview;

View file

@ -4,16 +4,19 @@ namespace NzbDrone.Core.Movies
{
public static class MovieTitleNormalizer
{
private static readonly Dictionary<int, string> PreComputedTitles = new Dictionary<int, string>
{
{ 999999999, "a to z" },
};
public static string Normalize(string title, int tmdbid)
private static readonly Dictionary<int, string> PreComputedTitles = new ()
{
if (PreComputedTitles.ContainsKey(tmdbid))
{ 387354, "a to z" },
{ 1212922, "a to z" },
{ 888700, "a to z the first alphabet" },
{ 101273, "a to zeppelin the story of led zeppelin" },
};
public static string Normalize(string title, int tmdbId)
{
if (PreComputedTitles.TryGetValue(tmdbId, out var value))
{
return PreComputedTitles[tmdbid];
return value;
}
return Parser.Parser.NormalizeTitle(title).ToLower();

View file

@ -161,10 +161,9 @@ public static class Parser
// name only...BE VERY CAREFUL WITH THIS, HIGH CHANCE OF FALSE POSITIVES
private static readonly Regex ExceptionReleaseGroupRegexExact = new Regex(@"\b(?<releasegroup>KRaLiMaRKo|E\.N\.D|D\-Z0N3|Koten_Gars|BluDragon|ZØNEHD|Tigole|HQMUX|VARYG|YIFY|YTS(.(MX|LT|AG))?|TMd|Eml HDTeam|LMain|DarQ|BEN THE MEN)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|'|\|)+", RegexOptions.Compiled);
private static readonly Regex SpecialCharRegex = new Regex(@"(\&|\:|\\|\/)+", RegexOptions.Compiled);
private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled);
private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex ArticleWordRegex = new Regex(@"^(a|an|the)\s", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SpecialEpisodeWordRegex = new Regex(@"\b(part|special|edition|christmas)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex DuplicateSpacesRegex = new Regex(@"\s{2,}", RegexOptions.Compiled);
@ -479,21 +478,19 @@ public static string CleanMovieTitle(this string title)
return ReplaceGermanUmlauts(NormalizeRegex.Replace(title, string.Empty).ToLowerInvariant()).RemoveAccent();
}
public static string NormalizeEpisodeTitle(this string title)
public static string NormalizeMovieTitle(string title)
{
title = SpecialEpisodeWordRegex.Replace(title, string.Empty);
title = PunctuationRegex.Replace(title, " ");
title = DuplicateSpacesRegex.Replace(title, " ");
return title.Trim()
.ToLower();
return title.Trim().ToLower();
}
public static string NormalizeTitle(this string title)
public static string NormalizeTitle(string title)
{
title = WordDelimiterRegex.Replace(title, " ");
title = PunctuationRegex.Replace(title, string.Empty);
title = CommonWordRegex.Replace(title, string.Empty);
title = ArticleWordRegex.Replace(title, string.Empty);
title = DuplicateSpacesRegex.Replace(title, " ");
title = SpecialCharRegex.Replace(title, string.Empty);

View file

@ -9,7 +9,6 @@
using NzbDrone.Core.MediaCover;
using NzbDrone.Core.Movies;
using NzbDrone.Core.Movies.Translations;
using NzbDrone.Core.Parser;
using Radarr.Api.V3.MovieFiles;
using Radarr.Http.REST;
using Swashbuckle.AspNetCore.Annotations;
@ -121,7 +120,7 @@ public static MovieResource ToResource(this Movie model, int availDelay, MovieTr
Title = translatedTitle,
OriginalTitle = model.MovieMetadata.Value.OriginalTitle,
OriginalLanguage = model.MovieMetadata.Value.OriginalLanguage,
SortTitle = translatedTitle.NormalizeTitle(),
SortTitle = MovieTitleNormalizer.Normalize(translatedTitle, model.TmdbId),
InCinemas = model.MovieMetadata.Value.InCinemas,
PhysicalRelease = model.MovieMetadata.Value.PhysicalRelease,
DigitalRelease = model.MovieMetadata.Value.DigitalRelease,