align parsing with upstream

Separate release group parsing logic into dedicated classes and update references throughout codebase.

(cherry picked from commit b00229e53c7a4bcb8684fd0aa4f66650c64a9a20)

Co-Authored-By: Mark McDowall <mark@mcdowall.ca>
This commit is contained in:
bakerboy448 2025-09-21 23:00:15 -05:00
parent f28691e48d
commit 6bdbc9c600
10 changed files with 163 additions and 135 deletions

View file

@ -58,7 +58,7 @@ public class ReleaseGroupParserFixture : CoreTest
[TestCase("Movie Name (2017) (Showtime) (1080p.BD.DD5.1.x265-TheSickle[TAoE])", "TheSickle")] [TestCase("Movie Name (2017) (Showtime) (1080p.BD.DD5.1.x265-TheSickle[TAoE])", "TheSickle")]
public void should_parse_release_group(string title, string expected) public void should_parse_release_group(string title, string expected)
{ {
Parser.Parser.ParseReleaseGroup(title).Should().Be(expected); Parser.ReleaseGroupParser.ParseReleaseGroup(title).Should().Be(expected);
} }
[TestCase("Movie Name (2020) [2160p x265 10bit S82 Joy]", "Joy")] [TestCase("Movie Name (2020) [2160p x265 10bit S82 Joy]", "Joy")]
@ -128,13 +128,13 @@ public void should_parse_release_group(string title, string expected)
[TestCase("Movie Title (2024) (1080p BluRay x265 SDR DDP 5.1 English -BEN THE MEN", "BEN THE MEN")] [TestCase("Movie Title (2024) (1080p BluRay x265 SDR DDP 5.1 English -BEN THE MEN", "BEN THE MEN")]
public void should_parse_exception_release_group(string title, string expected) public void should_parse_exception_release_group(string title, string expected)
{ {
Parser.Parser.ParseReleaseGroup(title).Should().Be(expected); Parser.ReleaseGroupParser.ParseReleaseGroup(title).Should().Be(expected);
} }
[TestCase(@"C:\Test\Doctor.Series.2005.s01e01.internal.bdrip.x264-archivist.mkv", "archivist")] [TestCase(@"C:\Test\Doctor.Series.2005.s01e01.internal.bdrip.x264-archivist.mkv", "archivist")]
public void should_not_include_extension_in_release_group(string title, string expected) public void should_not_include_extension_in_release_group(string title, string expected)
{ {
Parser.Parser.ParseReleaseGroup(title).Should().Be(expected); Parser.ReleaseGroupParser.ParseReleaseGroup(title).Should().Be(expected);
} }
[TestCase("Some.Movie.S02E04.720p.WEBRip.x264-SKGTV English", "SKGTV")] [TestCase("Some.Movie.S02E04.720p.WEBRip.x264-SKGTV English", "SKGTV")]
@ -143,7 +143,7 @@ public void should_not_include_extension_in_release_group(string title, string e
public void should_not_include_language_in_release_group(string title, string expected) public void should_not_include_language_in_release_group(string title, string expected)
{ {
Parser.Parser.ParseReleaseGroup(title).Should().Be(expected); Parser.ReleaseGroupParser.ParseReleaseGroup(title).Should().Be(expected);
} }
[TestCase("Some.Movie.2019.1080p.BDRip.X264.AC3-EVO-RP", "EVO")] [TestCase("Some.Movie.2019.1080p.BDRip.X264.AC3-EVO-RP", "EVO")]
@ -173,7 +173,7 @@ public void should_not_include_language_in_release_group(string title, string ex
public void should_not_include_bad_suffix_in_release_group(string title, string expected) public void should_not_include_bad_suffix_in_release_group(string title, string expected)
{ {
Parser.Parser.ParseReleaseGroup(title).Should().Be(expected); Parser.ReleaseGroupParser.ParseReleaseGroup(title).Should().Be(expected);
} }
[TestCase("[FFF] Invaders of the Movies!! - S01E11 - Someday, With Movies", "FFF")] [TestCase("[FFF] Invaders of the Movies!! - S01E11 - Someday, With Movies", "FFF")]
@ -184,13 +184,13 @@ public void should_not_include_bad_suffix_in_release_group(string title, string
public void should_parse_anime_release_groups(string title, string expected) public void should_parse_anime_release_groups(string title, string expected)
{ {
Parser.Parser.ParseReleaseGroup(title).Should().Be(expected); Parser.ReleaseGroupParser.ParseReleaseGroup(title).Should().Be(expected);
} }
[TestCase("Terrible.Anime.Title.2020.DBOX.480p.x264-iKaos [v3] [6AFFEF6B]")] [TestCase("Terrible.Anime.Title.2020.DBOX.480p.x264-iKaos [v3] [6AFFEF6B]")]
public void should_not_parse_anime_hash_as_release_group(string title) public void should_not_parse_anime_hash_as_release_group(string title)
{ {
Parser.Parser.ParseReleaseGroup(title).Should().BeNull(); Parser.ReleaseGroupParser.ParseReleaseGroup(title).Should().BeNull();
} }
} }
} }

View file

@ -38,7 +38,7 @@ public void should_not_parse_url_in_name(string postTitle, string title)
[TestCase("Movie Title Future 2023 DVDRip XviD RUNNER[www.allstate.net]", null)] [TestCase("Movie Title Future 2023 DVDRip XviD RUNNER[www.allstate.net]", null)]
public void should_not_parse_url_in_group(string title, string expected) public void should_not_parse_url_in_group(string title, string expected)
{ {
Parser.Parser.ParseReleaseGroup(title).Should().Be(expected); Parser.ReleaseGroupParser.ParseReleaseGroup(title).Should().Be(expected);
} }
} }
} }

View file

@ -12,6 +12,7 @@
using NzbDrone.Common.Extensions; using NzbDrone.Common.Extensions;
using NzbDrone.Common.Serializer; using NzbDrone.Common.Serializer;
using NzbDrone.Core.Datastore.Migration.Framework; using NzbDrone.Core.Datastore.Migration.Framework;
using NzbDrone.Core.MediaFiles;
using NzbDrone.Core.MediaFiles.MediaInfo; using NzbDrone.Core.MediaFiles.MediaInfo;
namespace NzbDrone.Core.Datastore.Migration namespace NzbDrone.Core.Datastore.Migration
@ -809,7 +810,7 @@ private string MigrateTransferCharacteristics(string transferCharacteristics)
private static string GetSceneNameMatch(string sceneName, params string[] tokens) private static string GetSceneNameMatch(string sceneName, params string[] tokens)
{ {
sceneName = sceneName.IsNotNullOrWhiteSpace() ? Parser.Parser.RemoveFileExtension(sceneName) : string.Empty; sceneName = sceneName.IsNotNullOrWhiteSpace() ? FileExtensions.RemoveFileExtension(sceneName) : string.Empty;
foreach (var token in tokens) foreach (var token in tokens)
{ {

View file

@ -1,11 +1,21 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace NzbDrone.Core.MediaFiles namespace NzbDrone.Core.MediaFiles
{ {
internal static class FileExtensions public static class FileExtensions
{ {
private static List<string> _archiveExtensions = new List<string> private static readonly Regex FileExtensionRegex = new (@"\.[a-z0-9]{2,4}$",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly HashSet<string> UsenetExtensions = new HashSet<string>()
{
".par2",
".nzb"
};
public static HashSet<string> ArchiveExtensions => new (StringComparer.OrdinalIgnoreCase)
{ {
".7z", ".7z",
".bz2", ".bz2",
@ -20,8 +30,7 @@ internal static class FileExtensions
".tgz", ".tgz",
".zip" ".zip"
}; };
public static HashSet<string> DangerousExtensions => new (StringComparer.OrdinalIgnoreCase)
private static List<string> _dangerousExtensions = new List<string>
{ {
".arj", ".arj",
".lnk", ".lnk",
@ -31,8 +40,7 @@ internal static class FileExtensions
".vbs", ".vbs",
".zipx" ".zipx"
}; };
public static HashSet<string> ExecutableExtensions => new (StringComparer.OrdinalIgnoreCase)
private static List<string> _executableExtensions = new List<string>
{ {
".bat", ".bat",
".cmd", ".cmd",
@ -40,8 +48,20 @@ internal static class FileExtensions
".sh" ".sh"
}; };
public static HashSet<string> ArchiveExtensions => new HashSet<string>(_archiveExtensions, StringComparer.OrdinalIgnoreCase); public static string RemoveFileExtension(string title)
public static HashSet<string> DangerousExtensions => new HashSet<string>(_dangerousExtensions, StringComparer.OrdinalIgnoreCase); {
public static HashSet<string> ExecutableExtensions => new HashSet<string>(_executableExtensions, StringComparer.OrdinalIgnoreCase); title = FileExtensionRegex.Replace(title, m =>
{
var extension = m.Value.ToLower();
if (MediaFileExtensions.Extensions.Contains(extension) || UsenetExtensions.Contains(extension))
{
return string.Empty;
}
return m.Value;
});
return title;
}
} }
} }

View file

@ -293,7 +293,7 @@ public static string FormatVideoCodec(MediaInfoModel mediaInfo, string sceneName
private static string GetSceneNameMatch(string sceneName, params string[] tokens) private static string GetSceneNameMatch(string sceneName, params string[] tokens)
{ {
sceneName = sceneName.IsNotNullOrWhiteSpace() ? Parser.Parser.RemoveFileExtension(sceneName) : string.Empty; sceneName = sceneName.IsNotNullOrWhiteSpace() ? FileExtensions.RemoveFileExtension(sceneName) : string.Empty;
foreach (var token in tokens) foreach (var token in tokens)
{ {

View file

@ -144,7 +144,7 @@ public ManualImportItem ReprocessItem(string path, string downloadId, int movieI
var downloadClientItem = GetTrackedDownload(downloadId)?.DownloadItem; var downloadClientItem = GetTrackedDownload(downloadId)?.DownloadItem;
var finalReleaseGroup = releaseGroup.IsNullOrWhiteSpace() var finalReleaseGroup = releaseGroup.IsNullOrWhiteSpace()
? Parser.Parser.ParseReleaseGroup(path) ? ReleaseGroupParser.ParseReleaseGroup(path)
: releaseGroup; : releaseGroup;
var finalQuality = (quality?.Quality ?? Quality.Unknown) == Quality.Unknown ? QualityParser.ParseQuality(path) : quality; var finalQuality = (quality?.Quality ?? Quality.Unknown) == Quality.Unknown ? QualityParser.ParseQuality(path) : quality;
var finalLanguages = var finalLanguages =
@ -282,7 +282,7 @@ private ManualImportItem ProcessFile(string rootFolder, string baseFolder, strin
{ {
var localMovie = new LocalMovie(); var localMovie = new LocalMovie();
localMovie.Path = file; localMovie.Path = file;
localMovie.ReleaseGroup = Parser.Parser.ParseReleaseGroup(file); localMovie.ReleaseGroup = ReleaseGroupParser.ParseReleaseGroup(file);
localMovie.Quality = QualityParser.ParseQuality(file); localMovie.Quality = QualityParser.ParseQuality(file);
localMovie.Languages = LanguageParser.ParseLanguages(file); localMovie.Languages = LanguageParser.ParseLanguages(file);
localMovie.Size = _diskProvider.GetFileSize(file); localMovie.Size = _diskProvider.GetFileSize(file);
@ -327,7 +327,7 @@ private List<ManualImportItem> ProcessDownloadDirectory(string rootFolder, List<
localMovie.Path = file; localMovie.Path = file;
localMovie.Quality = new QualityModel(Quality.Unknown); localMovie.Quality = new QualityModel(Quality.Unknown);
localMovie.Languages = new List<Language> { Language.Unknown }; localMovie.Languages = new List<Language> { Language.Unknown };
localMovie.ReleaseGroup = Parser.Parser.ParseReleaseGroup(file); localMovie.ReleaseGroup = ReleaseGroupParser.ParseReleaseGroup(file);
localMovie.Size = _diskProvider.GetFileSize(file); localMovie.Size = _diskProvider.GetFileSize(file);
items.Add(MapItem(new ImportDecision(localMovie), rootFolder, null, null)); items.Add(MapItem(new ImportDecision(localMovie), rootFolder, null, null));

View file

@ -14,7 +14,7 @@ public static string GetSceneName(LocalMovie localMovie)
if (!otherVideoFiles && downloadClientInfo != null) if (!otherVideoFiles && downloadClientInfo != null)
{ {
return Parser.Parser.RemoveFileExtension(downloadClientInfo.ReleaseTitle); return FileExtensions.RemoveFileExtension(downloadClientInfo.ReleaseTitle);
} }
var fileName = Path.GetFileNameWithoutExtension(localMovie.Path.CleanFilePath()); var fileName = Path.GetFileNameWithoutExtension(localMovie.Path.CleanFilePath());

View file

@ -6,6 +6,7 @@
using NLog; using NLog;
using NzbDrone.Common.Extensions; using NzbDrone.Common.Extensions;
using NzbDrone.Common.Instrumentation; using NzbDrone.Common.Instrumentation;
using NzbDrone.Core.MediaFiles;
using NzbDrone.Core.Parser.Model; using NzbDrone.Core.Parser.Model;
namespace NzbDrone.Core.Parser namespace NzbDrone.Core.Parser
@ -21,8 +22,6 @@ public static class Parser
private static readonly Regex HardcodedSubsRegex = new Regex(@"\b((?<hcsub>(\w+(?<!SOFT|MULTI|HORRIBLE)SUBS?))|(?<hc>(HC|SUBBED)))\b", private static readonly Regex HardcodedSubsRegex = new Regex(@"\b((?<hcsub>(\w+(?<!SOFT|MULTI|HORRIBLE)SUBS?))|(?<hc>(HC|SUBBED)))\b",
RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
private static readonly RegexReplace[] PreSubstitutionRegex = Array.Empty<RegexReplace>();
private static readonly Regex[] ReportMovieTitleRegex = new[] private static readonly Regex[] ReportMovieTitleRegex = new[]
{ {
// Anime [Subgroup] and Year // Anime [Subgroup] and Year
@ -110,9 +109,6 @@ public static class Parser
private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^|[^a-zA-Z0-9_']\w[^a-zA-Z0-9_'])([aà](?!$|[^a-zA-Z0-9_']\w[^a-zA-Z0-9_'])|an|the|and|or|of)(?!$)(?:\b|_))|\W|_", private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^|[^a-zA-Z0-9_']\w[^a-zA-Z0-9_'])([aà](?!$|[^a-zA-Z0-9_']\w[^a-zA-Z0-9_'])|an|the|and|or|of)(?!$)(?:\b|_))|\W|_",
RegexOptions.IgnoreCase | RegexOptions.Compiled); RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex FileExtensionRegex = new Regex(@"\.[a-z0-9]{2,4}$",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex ReportImdbId = new Regex(@"(?<imdbid>tt\d{7,8})", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex ReportImdbId = new Regex(@"(?<imdbid>tt\d{7,8})", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex ReportTmdbId = new Regex(@"tmdb(id)?-(?<tmdbid>\d+)", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex ReportTmdbId = new Regex(@"tmdb(id)?-(?<tmdbid>\d+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
@ -123,44 +119,13 @@ public static class Parser
private static readonly Regex SimpleReleaseTitleRegex = new Regex(@"\s*(?:[<>?*|])", RegexOptions.Compiled | RegexOptions.IgnoreCase); private static readonly Regex SimpleReleaseTitleRegex = new Regex(@"\s*(?:[<>?*|])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
// Valid TLDs http://data.iana.org/TLD/tlds-alpha-by-domain.txt // Valid TLDs http://data.iana.org/TLD/tlds-alpha-by-domain.txt
private static readonly RegexReplace WebsitePrefixRegex = new RegexReplace(@"^(?:(?:\[|\()\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?<!Naruto-Kun\.)(?:[a-z]{2,6}\.[a-z]{2,6}|xn--[a-z0-9-]{4,}|[a-z]{2,})\b(?:\s*(?:\]|\))|[ -]{2,})[ -]*",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly RegexReplace WebsitePostfixRegex = new RegexReplace(@"(?:\[\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?:xn--[a-z0-9-]{4,}|[a-z]{2,6})\b(?:\s*\])$",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly RegexReplace CleanReleaseGroupRegex = new RegexReplace(@"(-(RP|1|NZBGeek|Obfuscated|Obfuscation|Scrambled|sample|Pre|postbot|xpost|Rakuv[a-z0-9]*|WhiteRev|BUYMORE|AsRequested|AlternativeToRequested|GEROV|Z0iDS3N|Chamele0n|4P|4Planet|AlteZachen|RePACKPOST))+$",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly RegexReplace CleanTorrentSuffixRegex = new RegexReplace(@"\[(?:ettv|rartv|rarbg|cttv|publichd)\]$",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex CleanQualityBracketsRegex = new Regex(@"\[[a-z0-9 ._-]+\]$", private static readonly Regex CleanQualityBracketsRegex = new Regex(@"\[[a-z0-9 ._-]+\]$",
RegexOptions.IgnoreCase | RegexOptions.Compiled); RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex ReleaseGroupRegex = new Regex(@"-(?<releasegroup>[a-z0-9]+(?<part2>-[a-z0-9]+)?(?!.+?(?:480p|576p|720p|1080p|2160p)))(?<!(?:WEB-(DL|Rip)|Blu-Ray|480p|576p|720p|1080p|2160p|DTS-HD|DTS-X|DTS-MA|DTS-ES|-ES|-EN|-CAT|-ENG|-JAP|-GER|-FRA|-FRE|-ITA|-HDRip|\d{1,2}-bit|[ ._]\d{4}-\d{2}|-\d{2}|tmdb(id)?-(?<tmdbid>\d+)|(?<imdbid>tt\d{7,8}))(?:\k<part2>)?)(?:\b|[-._ ]|$)|[-._ ]\[(?<releasegroup>[a-z0-9]+)\]$",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex InvalidReleaseGroupRegex = new Regex(@"^([se]\d+|[0-9a-f]{8})$", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex AnimeReleaseGroupRegex = new Regex(@"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex YearInTitleRegex = new Regex(@"^(?<title>.+?)(?:\W|_.)?[\(\[]?(?<year>\d{4})[\]\)]?", private static readonly Regex YearInTitleRegex = new Regex(@"^(?<title>.+?)(?:\W|_.)?[\(\[]?(?<year>\d{4})[\]\)]?",
RegexOptions.IgnoreCase | RegexOptions.Compiled); RegexOptions.IgnoreCase | RegexOptions.Compiled);
// Handle Exception Release Groups that don't follow -RlsGrp; Manual List
// groups whose releases end with RlsGroup) or RlsGroup]
private static readonly Regex ExceptionReleaseGroupRegex = new Regex(@"(?<=[._ \[])(?<releasegroup>(Silence|afm72|Panda|Ghost|MONOLITH|Tigole|Joy|ImE|UTR|t3nzin|Anime Time|Project Angel|Hakata Ramen|HONE|Vyndros|SEV|Garshasp|Kappa|Natty|RCVR|SAMPA|YOGI|r00t|EDGE2020|RZeroX|FreetheFish|Anna|Bandi|Qman|theincognito|HDO|DusIctv|DHD|CtrlHD|-ZR-|ADC|XZVN|RH|Kametsu|Garshasp)(?=\]|\)))", RegexOptions.IgnoreCase | RegexOptions.Compiled);
// Handle Exception Release Groups that don't follow -RlsGrp; Manual List
// name only...BE VERY CAREFUL WITH THIS, HIGH CHANCE OF FALSE POSITIVES
private static readonly Regex ExceptionReleaseGroupRegexExact = new Regex(@"\b(?<releasegroup>KRaLiMaRKo|E\.N\.D|D\-Z0N3|Koten_Gars|BluDragon|ZØNEHD|Tigole|HQMUX|VARYG|YIFY|YTS(.(MX|LT|AG))?|TMd|Eml HDTeam|LMain|DarQ|BEN THE MEN)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SpecialCharRegex = new Regex(@"(\&|\:|\\|\/)+", RegexOptions.Compiled); private static readonly Regex SpecialCharRegex = new Regex(@"(\&|\:|\\|\/)+", RegexOptions.Compiled);
private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled); private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled);
private static readonly Regex ArticleWordRegex = new Regex(@"^(a|an|the)\s", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex ArticleWordRegex = new Regex(@"^(a|an|the)\s", RegexOptions.IgnoreCase | RegexOptions.Compiled);
@ -215,7 +180,7 @@ public static ParsedMovieInfo ParseMovieTitle(string title, bool isDir = false)
if (ReversedTitleRegex.IsMatch(title)) if (ReversedTitleRegex.IsMatch(title))
{ {
var titleWithoutExtension = RemoveFileExtension(title).ToCharArray(); var titleWithoutExtension = FileExtensions.RemoveFileExtension(title).ToCharArray();
Array.Reverse(titleWithoutExtension); Array.Reverse(titleWithoutExtension);
title = $"{titleWithoutExtension}{title.Substring(titleWithoutExtension.Length)}"; title = $"{titleWithoutExtension}{title.Substring(titleWithoutExtension.Length)}";
@ -223,14 +188,14 @@ public static ParsedMovieInfo ParseMovieTitle(string title, bool isDir = false)
Logger.Debug("Reversed name detected. Converted to '{0}'", title); Logger.Debug("Reversed name detected. Converted to '{0}'", title);
} }
var releaseTitle = RemoveFileExtension(title); var releaseTitle = FileExtensions.RemoveFileExtension(title);
// Trim dashes from end // Trim dashes from end
releaseTitle = releaseTitle.Trim('-', '_'); releaseTitle = releaseTitle.Trim('-', '_');
releaseTitle = releaseTitle.Replace("【", "[").Replace("】", "]"); releaseTitle = releaseTitle.Replace("【", "[").Replace("】", "]");
foreach (var replace in PreSubstitutionRegex) foreach (var replace in ParserCommon.PreSubstitutionRegex)
{ {
if (replace.TryReplace(ref releaseTitle)) if (replace.TryReplace(ref releaseTitle))
{ {
@ -242,10 +207,10 @@ public static ParsedMovieInfo ParseMovieTitle(string title, bool isDir = false)
var simpleTitle = SimpleTitleRegex.Replace(releaseTitle); var simpleTitle = SimpleTitleRegex.Replace(releaseTitle);
// TODO: Quick fix stripping [url] - prefixes. // TODO: Quick fix stripping [url] - prefixes.
simpleTitle = WebsitePrefixRegex.Replace(simpleTitle); simpleTitle = ParserCommon.WebsitePrefixRegex.Replace(simpleTitle);
simpleTitle = WebsitePostfixRegex.Replace(simpleTitle); simpleTitle = ParserCommon.WebsitePostfixRegex.Replace(simpleTitle);
simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle); simpleTitle = ParserCommon.CleanTorrentSuffixRegex.Replace(simpleTitle);
simpleTitle = CleanQualityBracketsRegex.Replace(simpleTitle, m => simpleTitle = CleanQualityBracketsRegex.Replace(simpleTitle, m =>
{ {
@ -295,7 +260,7 @@ public static ParsedMovieInfo ParseMovieTitle(string title, bool isDir = false)
} }
} }
result.ReleaseGroup = ParseReleaseGroup(simpleReleaseTitle); result.ReleaseGroup = ReleaseGroupParser.ParseReleaseGroup(simpleReleaseTitle);
var subGroup = GetSubGroup(match); var subGroup = GetSubGroup(match);
if (!subGroup.IsNullOrWhiteSpace()) if (!subGroup.IsNullOrWhiteSpace())
@ -521,74 +486,6 @@ public static string ParseHardcodeSubs(string title)
return null; return null;
} }
public static string ParseReleaseGroup(string title)
{
title = title.Trim();
title = RemoveFileExtension(title);
title = WebsitePrefixRegex.Replace(title);
title = CleanTorrentSuffixRegex.Replace(title);
var animeMatch = AnimeReleaseGroupRegex.Match(title);
if (animeMatch.Success)
{
return animeMatch.Groups["subgroup"].Value;
}
title = CleanReleaseGroupRegex.Replace(title);
var exceptionReleaseGroupRegex = ExceptionReleaseGroupRegex.Matches(title);
if (exceptionReleaseGroupRegex.Count != 0)
{
return exceptionReleaseGroupRegex.OfType<Match>().Last().Groups["releasegroup"].Value;
}
var exceptionExactMatch = ExceptionReleaseGroupRegexExact.Matches(title);
if (exceptionExactMatch.Count != 0)
{
return exceptionExactMatch.OfType<Match>().Last().Groups["releasegroup"].Value;
}
var matches = ReleaseGroupRegex.Matches(title);
if (matches.Count != 0)
{
var group = matches.OfType<Match>().Last().Groups["releasegroup"].Value;
if (int.TryParse(group, out _))
{
return null;
}
if (InvalidReleaseGroupRegex.IsMatch(group))
{
return null;
}
return group;
}
return null;
}
public static string RemoveFileExtension(string title)
{
title = FileExtensionRegex.Replace(title, m =>
{
var extension = m.Value.ToLower();
if (MediaFiles.MediaFileExtensions.Extensions.Contains(extension) || new[] { ".par2", ".nzb" }.Contains(extension))
{
return string.Empty;
}
return m.Value;
});
return title;
}
public static bool HasMultipleLanguages(string title) public static bool HasMultipleLanguages(string title)
{ {
return MultiRegex.IsMatch(title); return MultiRegex.IsMatch(title);
@ -697,7 +594,7 @@ private static bool ValidateBeforeParsing(string title)
return false; return false;
} }
var titleWithoutExtension = RemoveFileExtension(title); var titleWithoutExtension = FileExtensions.RemoveFileExtension(title);
if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension))) if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension)))
{ {

View file

@ -0,0 +1,23 @@
using System.Text.RegularExpressions;
namespace NzbDrone.Core.Parser;
// These are functions shared between different parser functions
// they are not intended to be used outside of them parsing.
internal static class ParserCommon
{
internal static readonly RegexReplace[] PreSubstitutionRegex = System.Array.Empty<RegexReplace>();
// Valid TLDs http://data.iana.org/TLD/tlds-alpha-by-domain.txt
internal static readonly RegexReplace WebsitePrefixRegex = new (@"^(?:(?:\[|\()\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?<!Naruto-Kun\.)(?:[a-z]{2,6}\.[a-z]{2,6}|xn--[a-z0-9-]{4,}|[a-z]{2,})\b(?:\s*(?:\]|\))|[ -]{2,})[ -]*",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
internal static readonly RegexReplace WebsitePostfixRegex = new (@"(?:\[\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?:xn--[a-z0-9-]{4,}|[a-z]{2,6})\b(?:\s*\])$",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
internal static readonly RegexReplace CleanTorrentSuffixRegex = new (@"\[(?:ettv|rartv|rarbg|cttv|publichd)\]$",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
}

View file

@ -0,0 +1,87 @@
using System.Linq;
using System.Text.RegularExpressions;
using NzbDrone.Core.MediaFiles;
namespace NzbDrone.Core.Parser;
public static class ReleaseGroupParser
{
private static readonly Regex ReleaseGroupRegex = new (@"-(?<releasegroup>[a-z0-9]+(?<part2>-[a-z0-9]+)?(?!.+?(?:480p|576p|720p|1080p|2160p)))(?<!(?:WEB-(DL|Rip)|Blu-Ray|480p|576p|720p|1080p|2160p|DTS-HD|DTS-X|DTS-MA|DTS-ES|-ES|-EN|-CAT|-ENG|-JAP|-GER|-FRA|-FRE|-ITA|-HDRip|\d{1,2}-bit|[ ._]\d{4}-\d{2}|-\d{2}|tmdb(id)?-(?<tmdbid>\d+)|(?<imdbid>tt\d{7,8}))(?:\k<part2>)?)(?:\b|[-._ ]|$)|[-._ ]\[(?<releasegroup>[a-z0-9]+)\]$",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex InvalidReleaseGroupRegex = new (@"^([se]\d+|[0-9a-f]{8})$", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex AnimeReleaseGroupRegex = new (@"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
// Handle Exception Release Groups that don't follow -RlsGrp; Manual List
// name only...be very careful with this last; high chance of false positives
private static readonly Regex ExceptionReleaseGroupRegexExact = new (@"\b(?<releasegroup>KRaLiMaRKo|E\.N\.D|D\-Z0N3|Koten_Gars|BluDragon|ZØNEHD|Tigole|HQMUX|VARYG|YIFY|YTS(.(MX|LT|AG))?|TMd|Eml HDTeam|LMain|DarQ|BEN THE MEN)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled);
// groups whose releases end with RlsGroup) or RlsGroup]
private static readonly Regex ExceptionReleaseGroupRegex = new (@"(?<=[._ \[])(?<releasegroup>(Silence|afm72|Panda|Ghost|MONOLITH|Tigole|Joy|ImE|UTR|t3nzin|Anime Time|Project Angel|Hakata Ramen|HONE|Vyndros|SEV|Garshasp|Kappa|Natty|RCVR|SAMPA|YOGI|r00t|EDGE2020|RZeroX|FreetheFish|Anna|Bandi|Qman|theincognito|HDO|DusIctv|DHD|CtrlHD|-ZR-|ADC|XZVN|RH|Kametsu)(?=\]|\)))", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly RegexReplace CleanReleaseGroupRegex = new (@"(-(RP|1|NZBGeek|Obfuscated|Obfuscation|Scrambled|sample|Pre|postbot|xpost|Rakuv[a-z0-9]*|WhiteRev|BUYMORE|AsRequested|AlternativeToRequested|GEROV|Z0iDS3N|Chamele0n|4P|4Planet|AlteZachen|RePACKPOST))+$",
string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
public static string ParseReleaseGroup(string title)
{
title = title.Trim();
title = FileExtensions.RemoveFileExtension(title);
foreach (var replace in ParserCommon.PreSubstitutionRegex)
{
if (replace.TryReplace(ref title))
{
break;
}
}
title = ParserCommon.WebsitePrefixRegex.Replace(title);
title = ParserCommon.CleanTorrentSuffixRegex.Replace(title);
var animeMatch = AnimeReleaseGroupRegex.Match(title);
if (animeMatch.Success)
{
return animeMatch.Groups["subgroup"].Value;
}
title = CleanReleaseGroupRegex.Replace(title);
var exceptionReleaseGroupRegex = ExceptionReleaseGroupRegex.Matches(title);
if (exceptionReleaseGroupRegex.Count != 0)
{
return exceptionReleaseGroupRegex.OfType<Match>().Last().Groups["releasegroup"].Value;
}
var exceptionExactMatch = ExceptionReleaseGroupRegexExact.Matches(title);
if (exceptionExactMatch.Count != 0)
{
return exceptionExactMatch.OfType<Match>().Last().Groups["releasegroup"].Value;
}
var matches = ReleaseGroupRegex.Matches(title);
if (matches.Count != 0)
{
var group = matches.OfType<Match>().Last().Groups["releasegroup"].Value;
if (int.TryParse(group, out _))
{
return null;
}
if (InvalidReleaseGroupRegex.IsMatch(group))
{
return null;
}
return group;
}
return null;
}
}