diff --git a/src/NzbDrone.Core.Test/ParserTests/UrlFixture.cs b/src/NzbDrone.Core.Test/ParserTests/UrlFixture.cs new file mode 100644 index 0000000000..e99c6a4190 --- /dev/null +++ b/src/NzbDrone.Core.Test/ParserTests/UrlFixture.cs @@ -0,0 +1,43 @@ +using FluentAssertions; +using NUnit.Framework; +using NzbDrone.Core.Parser; +using NzbDrone.Core.Test.Framework; + +namespace NzbDrone.Core.Test.ParserTests +{ + [TestFixture] + public class UrlFixture : CoreTest + { + [TestCase("[www.test.com] - Movie.Title.2023.720p.HDTV.X264-DIMENSION", "Movie Title")] + [TestCase("test.net - Movie.Title.2023.720p.HDTV.X264-DIMENSION", "Movie Title")] + [TestCase("[www.test-hyphen.com] - Movie.Title.2023.720p.HDTV.X264-DIMENSION", "Movie Title")] + [TestCase("www.test123.org - Movie.Title.2023.720p.HDTV.X264-DIMENSION", "Movie Title")] + [TestCase("[test.co.uk] - Movie.Title.2023.720p.HDTV.X264-DIMENSION", "Movie Title")] + [TestCase("www.test-hyphen.net.au - Movie.Title.2023.720p.HDTV.X264-DIMENSION", "Movie Title")] + [TestCase("[www.test123.co.nz] - Movie.Title.2023.720p.HDTV.X264-DIMENSION", "Movie Title")] + [TestCase("test-hyphen123.org.au - Movie.Title.2023.720p.HDTV.X264-DIMENSION", "Movie Title")] + [TestCase("[www.test123.de] - Mad Movie Title 2023 [Bluray720p]", "Mad Movie Title")] + [TestCase("www.test-hyphen.de - Mad Movie Title 2023 [Bluray1080p]", "Mad Movie Title")] + [TestCase("www.test123.co.za - The Movie Title Bros. (2023)", "The Movie Title Bros.")] + [TestCase("[www.test-hyphen.ca] - Movie Title (2023)", "Movie Title")] + [TestCase("test123.ca - Movie Time 2023 720p HDTV x264 CRON", "Movie Time")] + [TestCase("[www.test-hyphen123.co.za] - Movie Title 2023", "Movie Title")] + public void should_not_parse_url_in_name(string postTitle, string title) + { + var result = Parser.Parser.ParseMovieTitle(postTitle).MovieTitle.CleanMovieTitle(); + result.Should().Be(title.CleanMovieTitle()); + } + + [TestCase("Movie.2023.English.HDTV.XviD-LOL[www.abb.com]", "LOL")] + [TestCase("Movie Title 2023 English HDTV XviD LOL[www.academy.org]", null)] + [TestCase("Movie Title Now 2023 DVDRip XviD RUNNER[www.aetna.net]", null)] + [TestCase("Movie.Title.2023.DVDRip.XviD-RUNNER[www.alfaromeo.io]", "RUNNER")] + [TestCase("Movie.Title.2023.English.HDTV.XviD-LOL[www.abbott.gov]", "LOL")] + [TestCase("Movie Title 2023 English HDTV XviD LOL[www.actor.org]", null)] + [TestCase("Movie Title Future 2023 DVDRip XviD RUNNER[www.allstate.net]", null)] + public void should_not_parse_url_in_group(string title, string expected) + { + Parser.Parser.ParseReleaseGroup(title).Should().Be(expected); + } + } +} diff --git a/src/NzbDrone.Core/Parser/Parser.cs b/src/NzbDrone.Core/Parser/Parser.cs index 558b325e53..2de88abf6b 100644 --- a/src/NzbDrone.Core/Parser/Parser.cs +++ b/src/NzbDrone.Core/Parser/Parser.cs @@ -120,11 +120,12 @@ public static class Parser private static readonly Regex SimpleReleaseTitleRegex = new Regex(@"\s*(?:[<>?*:|])", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly RegexReplace WebsitePrefixRegex = new RegexReplace(@"^\[\s*[-a-z]+(\.[a-z]+)+\s*\][- ]*|^www\.[a-z0-9]+\.(?:com|net|org|casa|pics|tc)[ -]*", + // Valid TLDs http://data.iana.org/TLD/tlds-alpha-by-domain.txt + private static readonly RegexReplace WebsitePrefixRegex = new RegexReplace(@"^(?:\[\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?:[a-z]{2,6}\.[a-z]{2,6}|xn--[a-z0-9-]{4,}|[a-z]{2,})\b(?:\s*\]|[ -]{2,})[ -]*", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Compiled); - private static readonly RegexReplace WebsitePostfixRegex = new RegexReplace(@"\[\s*[-a-z]+(\.[a-z0-9]+)+\s*\]$", + private static readonly RegexReplace WebsitePostfixRegex = new RegexReplace(@"(?:\[\s*)?(?:www\.)?[-a-z0-9-]{1,256}\.(?:xn--[a-z0-9-]{4,}|[a-z]{2,6})\b(?:\s*\])$", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Compiled);