From 281547d39d1e922212d8349a4e3a9a9c618242d8 Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Sat, 4 Jun 2022 22:42:38 +0300 Subject: [PATCH 01/11] Created lostfilm indexer and prepared UserPassCaptcha base settings --- .../Indexers/Definitions/Lostfilm.cs | 381 ++++++++++++++++++ .../UserPassCaptchaTorrentBaseSettings.cs | 46 +++ .../Settings/UserPassTorrentBaseSettings.cs | 14 +- 3 files changed, 436 insertions(+), 5 deletions(-) create mode 100644 src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs create mode 100644 src/NzbDrone.Core/Indexers/Settings/UserPassCaptchaTorrentBaseSettings.cs diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs new file mode 100644 index 000000000..d3b364f41 --- /dev/null +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -0,0 +1,381 @@ +using System; +using System.Collections.Generic; +using System.Collections.Specialized; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using AngleSharp.Html.Parser; +using FluentValidation; +using NLog; +using NzbDrone.Common.Http; +using NzbDrone.Core.Annotations; +using NzbDrone.Core.Configuration; +using NzbDrone.Core.Indexers.Exceptions; +using NzbDrone.Core.Indexers.Settings; +using NzbDrone.Core.IndexerSearch.Definitions; +using NzbDrone.Core.Messaging.Events; +using NzbDrone.Core.Parser; +using NzbDrone.Core.Parser.Model; +using NzbDrone.Core.Validation; + +namespace NzbDrone.Core.Indexers.Definitions +{ + public class Lostfilm : TorrentIndexerBase + { + public override string Name => "Lostfilm"; + public override string[] IndexerUrls => new string[] + { + "https://www.lostfilm.tv/", + "https://www.lostfilm.run/", + "https://www.lostfilmtv.site/", + "https://www.lostfilm.tv/", + "https://www.lostfilm.win/", + "https://www.lostfilm.tw/", + "https://www.lostfilmtv2.site/", + "https://www.lostfilm.uno/" + }; + public override string Description => "Lostfilm is russian tv shows voiceover group publishing their releases."; + public override string Language => "ru-RU"; + public override Encoding Encoding => Encoding.UTF8; + public override DownloadProtocol Protocol => DownloadProtocol.Torrent; + public override IndexerPrivacy Privacy => IndexerPrivacy.SemiPrivate; + public override IndexerCapabilities Capabilities => SetCapabilities(); + + public Lostfilm(IIndexerHttpClient httpClient, IEventAggregator eventAggregator, IIndexerStatusService indexerStatusService, IConfigService configService, Logger logger) + : base(httpClient, eventAggregator, indexerStatusService, configService, logger) + { + } + + public override IIndexerRequestGenerator GetRequestGenerator() + { + return new LostfilmRequestGenerator() { Settings = Settings, Capabilities = Capabilities }; + } + + public override IParseIndexerResponse GetParser() + { + return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger }; + } + + // protected override async Task DoLogin() + // { + // UpdateCookies(null, null); + + // var requestBuilder = new HttpRequestBuilder(Settings.BaseUrl + "index.php") + // { + // LogResponseContent = true, + // AllowAutoRedirect = true + // }; + + // var mainPage = await ExecuteAuth(new HttpRequest(Settings.BaseUrl)); + + // requestBuilder.Method = HttpMethod.Post; + // requestBuilder.PostProcess += r => r.RequestTimeout = TimeSpan.FromSeconds(15); + // requestBuilder.SetCookies(mainPage.GetCookies()); + + // var authLoginRequest = requestBuilder + // .AddFormParameter("login_name", Settings.Username) + // .AddFormParameter("login_password", Settings.Password) + // .AddFormParameter("login", "submit") + // .SetHeader("Content-Type", "application/x-www-form-urlencoded") + // .Build(); + + // var response = await ExecuteAuth(authLoginRequest); + + // if (response.Content != null && !CheckIfLoginNeeded(response)) + // { + // UpdateCookies(response.GetCookies(), DateTime.Now + TimeSpan.FromDays(30)); + // _logger.Debug("Anidub authentication succeeded"); + // } + // else + // { + // const string ErrorSelector = "#content .berror .berror_c"; + // var parser = new HtmlParser(); + // var document = await parser.ParseDocumentAsync(response.Content); + // var errorMessage = document.QuerySelector(ErrorSelector).TextContent.Trim(); + // throw new IndexerAuthException("Anidub authentication failed. Error: " + errorMessage); + // } + // } + + // protected override bool CheckIfLoginNeeded(HttpResponse httpResponse) + // { + // if (httpResponse.Content.Contains("index.php?action=logout")) + // { + // return false; + // } + + // return true; + // } + private IndexerCapabilities SetCapabilities() + { + var caps = new IndexerCapabilities + { + TvSearchParams = new List + { + TvSearchParam.Q, TvSearchParam.Season, TvSearchParam.Ep + } + }; + caps.Categories.AddCategoryMapping(1, NewznabStandardCategory.TV, "TV Shows"); + return caps; + } + } + + public class LostfilmRequestGenerator : IIndexerRequestGenerator + { + public UserPassCaptchaTorrentBaseSettings Settings { get; set; } + public IndexerCapabilities Capabilities { get; set; } + + public LostfilmRequestGenerator() + { + } + + private IEnumerable GetPagedRequests(string term, int[] categories) + { + var requestUrl = string.Empty; + + if (string.IsNullOrWhiteSpace(term)) + { + requestUrl = Settings.BaseUrl; + } + else + { + var queryCollection = new NameValueCollection + { + // Remove season and episode info from search term cause it breaks search + { "keywords", Regex.Replace(term, @"(?:[SsEe]?\d{1,4}){1,2}$", "").TrimEnd() }, + { "limit", "20" }, + { "orderby_sort", "entry_date|desc" } + }; + + requestUrl = string.Format("{0}/ajax/search_result/P0?{1}", Settings.BaseUrl.TrimEnd('/'), queryCollection.GetQueryString()); + } + + var request = new IndexerRequest(requestUrl, HttpAccept.Html); + yield return request; + } + + public IndexerPageableRequestChain GetSearchRequests(MovieSearchCriteria searchCriteria) + { + var pageableRequests = new IndexerPageableRequestChain(); + + pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories)); + + return pageableRequests; + } + + public IndexerPageableRequestChain GetSearchRequests(TvSearchCriteria searchCriteria) + { + var pageableRequests = new IndexerPageableRequestChain(); + + pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedTvSearchString), searchCriteria.Categories)); + + return pageableRequests; + } + + public IndexerPageableRequestChain GetSearchRequests(BasicSearchCriteria searchCriteria) + { + var pageableRequests = new IndexerPageableRequestChain(); + + pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories)); + + return pageableRequests; + } + + // Lostfilm doesn't support music, but this function required by interface + public IndexerPageableRequestChain GetSearchRequests(MusicSearchCriteria searchCriteria) + { + return new IndexerPageableRequestChain(); + } + + // Lostfilm doesn't support books, but this function required by interface + public IndexerPageableRequestChain GetSearchRequests(BookSearchCriteria searchCriteria) + { + return new IndexerPageableRequestChain(); + } + + public Func> GetCookies { get; set; } + public Action, DateTime?> CookiesUpdater { get; set; } + } + + public class LostfilmParser : IParseIndexerResponse + { + private readonly UserPassCaptchaTorrentBaseSettings _settings; + private readonly IndexerCapabilitiesCategories _categories; + private static readonly Regex EpisodesInfoQueryRegex = new Regex(@"сери[ия] (\d+)(?:-(\d+))? из.*", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex ResolutionInfoQueryRegex = new Regex(@"качество (\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex SizeInfoQueryRegex = new Regex(@"размер:(.*)\n", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex ReleaseDateInfoQueryRegex = new Regex(@"добавлен:(.*)\n", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex CategorieMovieRegex = new Regex(@"Фильм", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex CategorieOVARegex = new Regex(@"ОВА|OVA|ОНА|ONA|Special", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex CategorieDoramaRegex = new Regex(@"Дорама", RegexOptions.Compiled | RegexOptions.IgnoreCase); + public IIndexerHttpClient HttpClient { get; set; } + public Logger Logger { get; set; } + + public LostfilmParser(UserPassCaptchaTorrentBaseSettings settings, IndexerCapabilitiesCategories categories) + { + _settings = settings; + _categories = categories; + } + + private string composeTitle(AngleSharp.Html.Dom.IHtmlDocument dom, AngleSharp.Dom.IElement t, AngleSharp.Dom.IElement tr) + { + var name_ru = dom.QuerySelector("div.media__post__header > h1").TextContent.Trim(); + var name_en = dom.QuerySelector("div.media__panel > div:nth-of-type(1) > div.col-l:nth-of-type(1) > div > span").TextContent.Trim(); + var name_orig = dom.QuerySelector("div.media__panel > div:nth-of-type(1) > div.col-l:nth-of-type(2) > div > span").TextContent.Trim(); + + var title = name_ru + " / " + name_en; + if (name_en != name_orig) + { + title += " / " + name_orig; + } + + var tabName = t.TextContent; + tabName = tabName.Replace("Сезон", "Season"); + if (tabName.Contains("Серии")) + { + tabName = ""; + } + + var heading = tr.QuerySelector("h3.tracker_info_bold").TextContent; + + // Parse episodes info from heading if episods info present + var match = EpisodesInfoQueryRegex.Match(heading); + heading = tabName; + if (match.Success) + { + if (string.IsNullOrEmpty(match.Groups[2].Value)) + { + heading += " E" + match.Groups[1].Value; + } + else + { + heading += string.Format(" E{0}-{1}", match.Groups[1].Value, match.Groups[2].Value); + } + } + + return title + " - " + heading + " [" + getResolution(tr) + "p]"; + } + + private string getResolution(AngleSharp.Dom.IElement tr) + { + var resolution = tr.QuerySelector("div.tracker_info_left").TextContent; + return ResolutionInfoQueryRegex.Match(resolution).Groups[1].Value; + } + + private long getReleaseSize(AngleSharp.Dom.IElement tr) + { + var sizeStr = tr.QuerySelector("div.tracker_info_left").TextContent; + return ParseUtil.GetBytes(SizeInfoQueryRegex.Match(sizeStr).Groups[1].Value.Trim()); + } + + private DateTime getReleaseDate(AngleSharp.Dom.IElement tr) + { + var sizeStr = tr.QuerySelector("div.tracker_info_left").TextContent; + return DateTime.Parse(ReleaseDateInfoQueryRegex.Match(sizeStr).Groups[1].Value.Trim()); + } + + private ICollection MapCategories(AngleSharp.Html.Dom.IHtmlDocument dom, AngleSharp.Dom.IElement t, AngleSharp.Dom.IElement tr) + { + var rName = t.TextContent; + var rDesc = tr.QuerySelector("h3.tracker_info_bold").TextContent; + var type = dom.QuerySelector("div.releases-date:contains('Тип:')").TextContent; + + // Check OVA first cause OVA looks like anime with OVA in release name or description + if (CategorieOVARegex.IsMatch(rName) || CategorieOVARegex.IsMatch(rDesc)) + { + return _categories.MapTrackerCatDescToNewznab("OVA/ONA/Special"); + } + + // Check movies then, cause some of releases could be movies dorama and should go to movies category + if (CategorieMovieRegex.IsMatch(rName) || CategorieMovieRegex.IsMatch(rDesc)) + { + return _categories.MapTrackerCatDescToNewznab("Movies"); + } + + // Check dorama. Most of doramas are flaged as doramas in type info, but type info could have a lot of types at same time (movie, etc) + if (CategorieDoramaRegex.IsMatch(rName) || CategorieDoramaRegex.IsMatch(type)) + { + return _categories.MapTrackerCatDescToNewznab("Dorama"); + } + + return _categories.MapTrackerCatDescToNewznab("TV Anime"); + } + + private IList ParseRelease(IndexerResponse indexerResponse) + { + var torrentInfos = new List(); + var parser = new HtmlParser(); + var dom = parser.ParseDocument(indexerResponse.Content); + + foreach (var t in dom.QuerySelectorAll("ul.media__tabs__nav > li > a")) + { + var tr_id = t.Attributes["href"].Value; + var tr = dom.QuerySelector("div" + tr_id); + var seeders = int.Parse(tr.QuerySelector("div.circle_green_text_top").TextContent); + var url = indexerResponse.HttpRequest.Url.ToString(); + + var release = new TorrentInfo + { + Title = composeTitle(dom, t, tr), + InfoUrl = url, + DownloadVolumeFactor = 0, + UploadVolumeFactor = 1, + + Guid = url + tr_id, + Seeders = seeders, + Peers = seeders + int.Parse(tr.QuerySelector("div.circle_red_text_top").TextContent), + Grabs = int.Parse(tr.QuerySelector("div.circle_grey_text_top").TextContent), + Categories = MapCategories(dom, t, tr), + PublishDate = getReleaseDate(tr), + DownloadUrl = tr.QuerySelector("div.download_tracker > a.btn__green").Attributes["href"].Value, + MagnetUrl = tr.QuerySelector("div.download_tracker > a.btn__d-gray").Attributes["href"].Value, + Size = getReleaseSize(tr), + Resolution = getResolution(tr) + }; + torrentInfos.Add(release); + } + + return torrentInfos; + } + + public IList ParseResponse(IndexerResponse indexerResponse) + { + var torrentInfos = new List(); + + var parser = new HtmlParser(); + var dom = parser.ParseDocument(indexerResponse.Content); + var links = dom.QuerySelectorAll("a.ads-list__item__title"); + foreach (var link in links) + { + var url = link.GetAttribute("href"); + + // Some URLs in search are broken + if (url.StartsWith("//")) + { + url = "https:" + url; + } + + var releaseRequest = new IndexerRequest(url, HttpAccept.Html); + var releaseResponse = new IndexerResponse(releaseRequest, HttpClient.Execute(releaseRequest.HttpRequest)); + + // Throw common http errors here before we try to parse + if (releaseResponse.HttpResponse.HasHttpError) + { + if ((int)releaseResponse.HttpResponse.StatusCode == 429) + { + throw new TooManyRequestsException(releaseRequest.HttpRequest, releaseResponse.HttpResponse); + } + else + { + throw new IndexerException(releaseResponse, "Http error code: " + releaseResponse.HttpResponse.StatusCode); + } + } + + torrentInfos.AddRange(ParseRelease(releaseResponse)); + } + + return torrentInfos.ToArray(); + } + + public Action, DateTime?> CookiesUpdater { get; set; } + } +} diff --git a/src/NzbDrone.Core/Indexers/Settings/UserPassCaptchaTorrentBaseSettings.cs b/src/NzbDrone.Core/Indexers/Settings/UserPassCaptchaTorrentBaseSettings.cs new file mode 100644 index 000000000..7b03f66ee --- /dev/null +++ b/src/NzbDrone.Core/Indexers/Settings/UserPassCaptchaTorrentBaseSettings.cs @@ -0,0 +1,46 @@ +using FluentValidation; +using NzbDrone.Core.Annotations; +using NzbDrone.Core.Validation; + +namespace NzbDrone.Core.Indexers.Settings +{ + public class UserPassTorrentBaseSettings : ITorrentIndexerSettings + { + public class UserPassBaseSettingsValidator : AbstractValidator + { + public UserPassBaseSettingsValidator() + { + RuleFor(c => c.Username).NotEmpty(); + RuleFor(c => c.Password).NotEmpty(); + } + } + + private static readonly UserPassBaseSettingsValidator Validator = new UserPassBaseSettingsValidator(); + + public UserPassTorrentBaseSettings() + { + Username = ""; + Password = ""; + } + + [FieldDefinition(1, Label = "Base Url", HelpText = "Select which baseurl Prowlarr will use for requests to the site", Type = FieldType.Select, SelectOptionsProviderAction = "getUrls")] + public string BaseUrl { get; set; } + + [FieldDefinition(2, Label = "Username", HelpText = "Site Username", Privacy = PrivacyLevel.UserName)] + public string Username { get; set; } + + [FieldDefinition(3, Label = "Password", HelpText = "Site Password", Privacy = PrivacyLevel.Password, Type = FieldType.Password)] + public string Password { get; set; } + + [FieldDefinition(4)] + public IndexerBaseSettings BaseSettings { get; set; } = new IndexerBaseSettings(); + + [FieldDefinition(5)] + public IndexerTorrentBaseSettings TorrentBaseSettings { get; set; } = new IndexerTorrentBaseSettings(); + + public NzbDroneValidationResult Validate() + { + return new NzbDroneValidationResult(Validator.Validate(this)); + } + } +} diff --git a/src/NzbDrone.Core/Indexers/Settings/UserPassTorrentBaseSettings.cs b/src/NzbDrone.Core/Indexers/Settings/UserPassTorrentBaseSettings.cs index 7b03f66ee..4761bdeac 100644 --- a/src/NzbDrone.Core/Indexers/Settings/UserPassTorrentBaseSettings.cs +++ b/src/NzbDrone.Core/Indexers/Settings/UserPassTorrentBaseSettings.cs @@ -4,9 +4,9 @@ namespace NzbDrone.Core.Indexers.Settings { - public class UserPassTorrentBaseSettings : ITorrentIndexerSettings + public class UserPassCaptchaTorrentBaseSettings : ITorrentIndexerSettings { - public class UserPassBaseSettingsValidator : AbstractValidator + public class UserPassBaseSettingsValidator : AbstractValidator { public UserPassBaseSettingsValidator() { @@ -17,10 +17,11 @@ public UserPassBaseSettingsValidator() private static readonly UserPassBaseSettingsValidator Validator = new UserPassBaseSettingsValidator(); - public UserPassTorrentBaseSettings() + public UserPassCaptchaTorrentBaseSettings() { Username = ""; Password = ""; + Captcha = ""; } [FieldDefinition(1, Label = "Base Url", HelpText = "Select which baseurl Prowlarr will use for requests to the site", Type = FieldType.Select, SelectOptionsProviderAction = "getUrls")] @@ -32,10 +33,13 @@ public UserPassTorrentBaseSettings() [FieldDefinition(3, Label = "Password", HelpText = "Site Password", Privacy = PrivacyLevel.Password, Type = FieldType.Password)] public string Password { get; set; } - [FieldDefinition(4)] - public IndexerBaseSettings BaseSettings { get; set; } = new IndexerBaseSettings(); + [FieldDefinition(4, Label = "Captcha", HelpText = "Site Captcha", Privacy = PrivacyLevel.Normal, Type = FieldType.Captcha)] + public string Captcha { get; set; } [FieldDefinition(5)] + public IndexerBaseSettings BaseSettings { get; set; } = new IndexerBaseSettings(); + + [FieldDefinition(6)] public IndexerTorrentBaseSettings TorrentBaseSettings { get; set; } = new IndexerTorrentBaseSettings(); public NzbDroneValidationResult Validate() From bef802eda0d954f49cff8866c967fc2fb4109561 Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Sun, 5 Jun 2022 00:04:05 +0300 Subject: [PATCH 02/11] WIP --- .../Indexers/Definitions/Lostfilm.cs | 26 ++++++++++++++++--- .../UserPassCaptchaTorrentBaseSettings.cs | 22 +++++++++++----- .../Settings/UserPassTorrentBaseSettings.cs | 14 ++++------ 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index d3b364f41..87216ba9f 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -56,10 +56,30 @@ public override IParseIndexerResponse GetParser() return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger }; } - // protected override async Task DoLogin() - // { - // UpdateCookies(null, null); + protected override async Task DoLogin() + { + UpdateCookies(null, null); + var loginPage = await ExecuteAuth(new HttpRequest(Settings.BaseUrl + "login")); + // UpdateCookies(loginPage.Cookies); + var parser = new HtmlParser(); + var document = parser.ParseDocument(loginPage.Content); + var qCaptchaImg = document.QuerySelector("img#captcha_pictcha"); + if (qCaptchaImg != null) + { + var captchaUrl = Settings.BaseUrl + qCaptchaImg.GetAttribute("src"); + var captchaImage = await ExecuteAuth(new HttpRequest(captchaUrl)); + Settings.ExtraFieldData["CAPTCHA"] = captchaImage.Content; + } + + // else + // { + // configData.CaptchaImage.Value = new byte[0]; + // } + // configData.CaptchaCookie.Value = loginPage.Cookies; + } + + // UpdateCookies(null, null); // var requestBuilder = new HttpRequestBuilder(Settings.BaseUrl + "index.php") // { // LogResponseContent = true, diff --git a/src/NzbDrone.Core/Indexers/Settings/UserPassCaptchaTorrentBaseSettings.cs b/src/NzbDrone.Core/Indexers/Settings/UserPassCaptchaTorrentBaseSettings.cs index 7b03f66ee..3497be5aa 100644 --- a/src/NzbDrone.Core/Indexers/Settings/UserPassCaptchaTorrentBaseSettings.cs +++ b/src/NzbDrone.Core/Indexers/Settings/UserPassCaptchaTorrentBaseSettings.cs @@ -1,26 +1,29 @@ +using System.Collections.Generic; using FluentValidation; using NzbDrone.Core.Annotations; using NzbDrone.Core.Validation; namespace NzbDrone.Core.Indexers.Settings { - public class UserPassTorrentBaseSettings : ITorrentIndexerSettings + public class UserPassCaptchaTorrentBaseSettings : ITorrentIndexerSettings { - public class UserPassBaseSettingsValidator : AbstractValidator + public class UserPassCaptchaBaseSettingsValidator : AbstractValidator { - public UserPassBaseSettingsValidator() + public UserPassCaptchaBaseSettingsValidator() { RuleFor(c => c.Username).NotEmpty(); RuleFor(c => c.Password).NotEmpty(); } } - private static readonly UserPassBaseSettingsValidator Validator = new UserPassBaseSettingsValidator(); + private static readonly UserPassCaptchaBaseSettingsValidator Validator = new UserPassCaptchaBaseSettingsValidator(); - public UserPassTorrentBaseSettings() + public UserPassCaptchaTorrentBaseSettings() { Username = ""; Password = ""; + Captcha = ""; + ExtraFieldData = new Dictionary(); } [FieldDefinition(1, Label = "Base Url", HelpText = "Select which baseurl Prowlarr will use for requests to the site", Type = FieldType.Select, SelectOptionsProviderAction = "getUrls")] @@ -32,12 +35,17 @@ public UserPassTorrentBaseSettings() [FieldDefinition(3, Label = "Password", HelpText = "Site Password", Privacy = PrivacyLevel.Password, Type = FieldType.Password)] public string Password { get; set; } - [FieldDefinition(4)] - public IndexerBaseSettings BaseSettings { get; set; } = new IndexerBaseSettings(); + [FieldDefinition(4, Label = "Captcha", HelpText = "Site Captcha", Privacy = PrivacyLevel.Normal, Type = FieldType.Captcha)] + public string Captcha { get; set; } [FieldDefinition(5)] + public IndexerBaseSettings BaseSettings { get; set; } = new IndexerBaseSettings(); + + [FieldDefinition(6)] public IndexerTorrentBaseSettings TorrentBaseSettings { get; set; } = new IndexerTorrentBaseSettings(); + public Dictionary ExtraFieldData { get; set; } + public NzbDroneValidationResult Validate() { return new NzbDroneValidationResult(Validator.Validate(this)); diff --git a/src/NzbDrone.Core/Indexers/Settings/UserPassTorrentBaseSettings.cs b/src/NzbDrone.Core/Indexers/Settings/UserPassTorrentBaseSettings.cs index 4761bdeac..7b03f66ee 100644 --- a/src/NzbDrone.Core/Indexers/Settings/UserPassTorrentBaseSettings.cs +++ b/src/NzbDrone.Core/Indexers/Settings/UserPassTorrentBaseSettings.cs @@ -4,9 +4,9 @@ namespace NzbDrone.Core.Indexers.Settings { - public class UserPassCaptchaTorrentBaseSettings : ITorrentIndexerSettings + public class UserPassTorrentBaseSettings : ITorrentIndexerSettings { - public class UserPassBaseSettingsValidator : AbstractValidator + public class UserPassBaseSettingsValidator : AbstractValidator { public UserPassBaseSettingsValidator() { @@ -17,11 +17,10 @@ public UserPassBaseSettingsValidator() private static readonly UserPassBaseSettingsValidator Validator = new UserPassBaseSettingsValidator(); - public UserPassCaptchaTorrentBaseSettings() + public UserPassTorrentBaseSettings() { Username = ""; Password = ""; - Captcha = ""; } [FieldDefinition(1, Label = "Base Url", HelpText = "Select which baseurl Prowlarr will use for requests to the site", Type = FieldType.Select, SelectOptionsProviderAction = "getUrls")] @@ -33,13 +32,10 @@ public UserPassCaptchaTorrentBaseSettings() [FieldDefinition(3, Label = "Password", HelpText = "Site Password", Privacy = PrivacyLevel.Password, Type = FieldType.Password)] public string Password { get; set; } - [FieldDefinition(4, Label = "Captcha", HelpText = "Site Captcha", Privacy = PrivacyLevel.Normal, Type = FieldType.Captcha)] - public string Captcha { get; set; } - - [FieldDefinition(5)] + [FieldDefinition(4)] public IndexerBaseSettings BaseSettings { get; set; } = new IndexerBaseSettings(); - [FieldDefinition(6)] + [FieldDefinition(5)] public IndexerTorrentBaseSettings TorrentBaseSettings { get; set; } = new IndexerTorrentBaseSettings(); public NzbDroneValidationResult Validate() From d7b2b3789172d5afd11096738b750f59fbceea05 Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Sun, 5 Jun 2022 15:36:43 +0300 Subject: [PATCH 03/11] Finish login code for lostfilm --- .../Indexers/Definitions/Lostfilm.cs | 96 +++++++++++++++---- 1 file changed, 76 insertions(+), 20 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index 87216ba9f..53c3517c8 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Collections.Specialized; +using System.Net.Http; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; @@ -60,23 +61,78 @@ protected override async Task DoLogin() { UpdateCookies(null, null); var loginPage = await ExecuteAuth(new HttpRequest(Settings.BaseUrl + "login")); + UpdateCookies(loginPage.GetCookies(), DateTime.Now + TimeSpan.FromDays(30)); - // UpdateCookies(loginPage.Cookies); - var parser = new HtmlParser(); - var document = parser.ParseDocument(loginPage.Content); - var qCaptchaImg = document.QuerySelector("img#captcha_pictcha"); - if (qCaptchaImg != null) - { - var captchaUrl = Settings.BaseUrl + qCaptchaImg.GetAttribute("src"); - var captchaImage = await ExecuteAuth(new HttpRequest(captchaUrl)); - Settings.ExtraFieldData["CAPTCHA"] = captchaImage.Content; - } - + // TODO: Finish captcha + // var parser = new HtmlParser(); + // var document = parser.ParseDocument(loginPage.Content); + // var qCaptchaImg = document.QuerySelector("img#captcha_pictcha"); + // if (qCaptchaImg != null) + // { + // var captchaUrl = Settings.BaseUrl + qCaptchaImg.GetAttribute("src"); + // var captchaImage = await ExecuteAuth(new HttpRequest(captchaUrl)); + // Settings.ExtraFieldData["CAPTCHA"] = captchaImage.Content; + // } // else // { // configData.CaptchaImage.Value = new byte[0]; // } - // configData.CaptchaCookie.Value = loginPage.Cookies; + if (!Settings.Username.Contains("@")) + { + throw new IndexerAuthException("Username must be an e-mail address"); + } + + var data = new Dictionary + { + { "act", "users" }, + { "type", "login" }, + { "mail", Settings.Username }, + { "pass", Settings.Password }, + { "rem", "1" } + }; + + if (!string.IsNullOrWhiteSpace(Settings.Captcha)) + { + data.Add("need_captcha", "1"); + data.Add("captcha", Settings.Captcha); + } + + var requestBuilder = new HttpRequestBuilder(Settings.BaseUrl + "ajaxik.php") + { + LogResponseContent = true, + AllowAutoRedirect = true, + Method = HttpMethod.Post + }; + requestBuilder.PostProcess += r => r.RequestTimeout = TimeSpan.FromSeconds(15); + requestBuilder.SetCookies(loginPage.GetCookies()); + requestBuilder.SetHeader("Content-Type", "application/x-www-form-urlencoded"); + foreach (var item in data) + { + requestBuilder.AddFormParameter(item.Key, item.Value); + } + + var authLoginRequest = requestBuilder.Build(); + var response = await ExecuteAuth(authLoginRequest); + + if (response.Content != null && response.Content.Contains("\"success\":true")) + { + UpdateCookies(response.GetCookies(), DateTime.Now + TimeSpan.FromDays(30)); + } + else + { + var errorMessage = response.Content; + if (errorMessage.Contains("\"error\":2")) + { + errorMessage = "Captcha is incorrect"; + } + + if (errorMessage.Contains("\"error\":3")) + { + errorMessage = "E-mail or password is incorrect"; + } + + throw new IndexerAuthException(errorMessage); + } } // UpdateCookies(null, null); @@ -115,16 +171,16 @@ protected override async Task DoLogin() // throw new IndexerAuthException("Anidub authentication failed. Error: " + errorMessage); // } // } + protected override bool CheckIfLoginNeeded(HttpResponse httpResponse) + { + if (httpResponse.Content.Contains("href=\"/my\"")) + { + return false; + } - // protected override bool CheckIfLoginNeeded(HttpResponse httpResponse) - // { - // if (httpResponse.Content.Contains("index.php?action=logout")) - // { - // return false; - // } + return true; + } - // return true; - // } private IndexerCapabilities SetCapabilities() { var caps = new IndexerCapabilities From 84fcfa7912df1d04456339d5c372504b0dc29320 Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Sun, 5 Jun 2022 20:59:22 +0300 Subject: [PATCH 04/11] WIP: new lostfilm releases --- .../Indexers/Definitions/Lostfilm.cs | 377 +++++++++++------- 1 file changed, 224 insertions(+), 153 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index 53c3517c8..3d6a26c8f 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Collections.Specialized; +using System.Globalization; using System.Net.Http; using System.Text; using System.Text.RegularExpressions; @@ -17,6 +18,7 @@ using NzbDrone.Core.Messaging.Events; using NzbDrone.Core.Parser; using NzbDrone.Core.Parser.Model; +using NzbDrone.Core.ThingiProvider; using NzbDrone.Core.Validation; namespace NzbDrone.Core.Indexers.Definitions @@ -54,7 +56,7 @@ public override IIndexerRequestGenerator GetRequestGenerator() public override IParseIndexerResponse GetParser() { - return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger }; + return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger, Definition = Definition }; } protected override async Task DoLogin() @@ -69,9 +71,10 @@ protected override async Task DoLogin() // var qCaptchaImg = document.QuerySelector("img#captcha_pictcha"); // if (qCaptchaImg != null) // { - // var captchaUrl = Settings.BaseUrl + qCaptchaImg.GetAttribute("src"); - // var captchaImage = await ExecuteAuth(new HttpRequest(captchaUrl)); - // Settings.ExtraFieldData["CAPTCHA"] = captchaImage.Content; + // // var captchaUrl = Settings.BaseUrl + qCaptchaImg.GetAttribute("src"); + // // var captchaImage = await ExecuteAuth(new HttpRequest(captchaUrl)); + // // Settings.ExtraFieldData["CAPTCHA"] = captchaImage.Content; + // throw new IndexerAuthException("Captcha is not supported yet"); // } // else // { @@ -135,42 +138,6 @@ protected override async Task DoLogin() } } - // UpdateCookies(null, null); - // var requestBuilder = new HttpRequestBuilder(Settings.BaseUrl + "index.php") - // { - // LogResponseContent = true, - // AllowAutoRedirect = true - // }; - - // var mainPage = await ExecuteAuth(new HttpRequest(Settings.BaseUrl)); - - // requestBuilder.Method = HttpMethod.Post; - // requestBuilder.PostProcess += r => r.RequestTimeout = TimeSpan.FromSeconds(15); - // requestBuilder.SetCookies(mainPage.GetCookies()); - - // var authLoginRequest = requestBuilder - // .AddFormParameter("login_name", Settings.Username) - // .AddFormParameter("login_password", Settings.Password) - // .AddFormParameter("login", "submit") - // .SetHeader("Content-Type", "application/x-www-form-urlencoded") - // .Build(); - - // var response = await ExecuteAuth(authLoginRequest); - - // if (response.Content != null && !CheckIfLoginNeeded(response)) - // { - // UpdateCookies(response.GetCookies(), DateTime.Now + TimeSpan.FromDays(30)); - // _logger.Debug("Anidub authentication succeeded"); - // } - // else - // { - // const string ErrorSelector = "#content .berror .berror_c"; - // var parser = new HtmlParser(); - // var document = await parser.ParseDocumentAsync(response.Content); - // var errorMessage = document.QuerySelector(ErrorSelector).TextContent.Trim(); - // throw new IndexerAuthException("Anidub authentication failed. Error: " + errorMessage); - // } - // } protected override bool CheckIfLoginNeeded(HttpResponse httpResponse) { if (httpResponse.Content.Contains("href=\"/my\"")) @@ -210,21 +177,26 @@ private IEnumerable GetPagedRequests(string term, int[] categori if (string.IsNullOrWhiteSpace(term)) { - requestUrl = Settings.BaseUrl; + requestUrl = Settings.BaseUrl + "new"; } else { - var queryCollection = new NameValueCollection - { - // Remove season and episode info from search term cause it breaks search - { "keywords", Regex.Replace(term, @"(?:[SsEe]?\d{1,4}){1,2}$", "").TrimEnd() }, - { "limit", "20" }, - { "orderby_sort", "entry_date|desc" } - }; - - requestUrl = string.Format("{0}/ajax/search_result/P0?{1}", Settings.BaseUrl.TrimEnd('/'), queryCollection.GetQueryString()); + throw new Exception("Lostfilm search not implemented"); } + // else + // { + // var queryCollection = new NameValueCollection + // { + // // Remove season and episode info from search term cause it breaks search + // { "keywords", Regex.Replace(term, @"(?:[SsEe]?\d{1,4}){1,2}$", "").TrimEnd() }, + // { "limit", "20" }, + // { "orderby_sort", "entry_date|desc" } + // }; + + // requestUrl = string.Format("{0}/ajax/search_result/P0?{1}", Settings.BaseUrl.TrimEnd('/'), queryCollection.GetQueryString()); + // } + // TODO: Implement searching var request = new IndexerRequest(requestUrl, HttpAccept.Html); yield return request; } @@ -276,14 +248,10 @@ public class LostfilmParser : IParseIndexerResponse { private readonly UserPassCaptchaTorrentBaseSettings _settings; private readonly IndexerCapabilitiesCategories _categories; - private static readonly Regex EpisodesInfoQueryRegex = new Regex(@"сери[ия] (\d+)(?:-(\d+))? из.*", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex ResolutionInfoQueryRegex = new Regex(@"качество (\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex SizeInfoQueryRegex = new Regex(@"размер:(.*)\n", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex ReleaseDateInfoQueryRegex = new Regex(@"добавлен:(.*)\n", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex CategorieMovieRegex = new Regex(@"Фильм", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex CategorieOVARegex = new Regex(@"ОВА|OVA|ОНА|ONA|Special", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex CategorieDoramaRegex = new Regex(@"Дорама", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex ParsePlayEpisodeRegex = new Regex("PlayEpisode\\('(?\\d{1,3})(?\\d{3})(?\\d{3})'\\)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex ParseReleaseDetailsRegex = new Regex("Видео:\\ (?.+).\\ Размер:\\ (?.+).\\ Перевод", RegexOptions.Compiled | RegexOptions.IgnoreCase); public IIndexerHttpClient HttpClient { get; set; } + public ProviderDefinition Definition { get; set; } public Logger Logger { get; set; } public LostfilmParser(UserPassCaptchaTorrentBaseSettings settings, IndexerCapabilitiesCategories categories) @@ -292,146 +260,225 @@ public LostfilmParser(UserPassCaptchaTorrentBaseSettings settings, IndexerCapabi _categories = categories; } - private string composeTitle(AngleSharp.Html.Dom.IHtmlDocument dom, AngleSharp.Dom.IElement t, AngleSharp.Dom.IElement tr) + internal class TrackerUrlDetails { - var name_ru = dom.QuerySelector("div.media__post__header > h1").TextContent.Trim(); - var name_en = dom.QuerySelector("div.media__panel > div:nth-of-type(1) > div.col-l:nth-of-type(1) > div > span").TextContent.Trim(); - var name_orig = dom.QuerySelector("div.media__panel > div:nth-of-type(1) > div.col-l:nth-of-type(2) > div > span").TextContent.Trim(); + internal string seriesId { get; private set; } + internal string season { get; private set; } + internal string episode { get; private set; } - var title = name_ru + " / " + name_en; - if (name_en != name_orig) + internal TrackerUrlDetails(string seriesId, string season, string episode) { - title += " / " + name_orig; + this.seriesId = seriesId; + this.season = season; + this.episode = episode; } - var tabName = t.TextContent; - tabName = tabName.Replace("Сезон", "Season"); - if (tabName.Contains("Серии")) + internal TrackerUrlDetails(AngleSharp.Dom.IElement button) { - tabName = ""; + var trigger = button.GetAttribute("onclick"); + var match = ParsePlayEpisodeRegex.Match(trigger); + + seriesId = match.Groups["id"].Value.TrimStart('0'); + season = match.Groups["season"].Value.TrimStart('0'); + episode = match.Groups["episode"].Value.TrimStart('0'); } - var heading = tr.QuerySelector("h3.tracker_info_bold").TextContent; - - // Parse episodes info from heading if episods info present - var match = EpisodesInfoQueryRegex.Match(heading); - heading = tabName; - if (match.Success) + // TODO: see if query.GetEpisodeString() is sufficient + internal string GetEpisodeString() { - if (string.IsNullOrEmpty(match.Groups[2].Value)) + var result = string.Empty; + + if (!string.IsNullOrEmpty(season) && season != "0" && season != "999") { - heading += " E" + match.Groups[1].Value; + result += "S" + season; + + if (!string.IsNullOrEmpty(episode) && episode != "0" && episode != "999") + { + result += "E" + episode; + } } - else + + return result; + } + } + + private IList FollowTrackerRedirection(string url, TrackerUrlDetails details) + { + var req = new IndexerRequest(url, HttpAccept.Html); + var results = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition)); + var releases = new List(); + + var parser = new HtmlParser(); + var document = parser.ParseDocument(results.Content); + var rows = document.QuerySelectorAll("div.inner-box--item"); + + var serieTitle = document.QuerySelector("div.inner-box--subtitle").TextContent; + serieTitle = serieTitle.Substring(0, serieTitle.LastIndexOf(',')); + + var episodeInfo = document.QuerySelector("div.inner-box--text").TextContent; + var episodeName = TrimString(episodeInfo, '(', ')'); + + foreach (var row in rows) + { + var detailsInfo = row.QuerySelector("div.inner-box--desc").TextContent; + var releaseDetails = ParseReleaseDetailsRegex.Match(detailsInfo); + + // ReSharper states "Expression is always false" + // TODO Refactor to get the intended operation + if (releaseDetails == null) { - heading += string.Format(" E{0}-{1}", match.Groups[1].Value, match.Groups[2].Value); + throw new FormatException("Failed to map release details string: " + detailsInfo); } + + /* + * For supported qualities see: + * - TvCategoryParser.cs + * - https://github.com/SickRage/SickRage/wiki/Quality-Settings#quality-names-to-recognize-the-quality-of-a-file + */ + var quality = releaseDetails.Groups["quality"].Value.Trim(); + + // Adapt shitty quality format for common algorythms + quality = Regex.Replace(quality, "-Rip", "Rip", RegexOptions.IgnoreCase); + quality = Regex.Replace(quality, "WEB-DLRip", "WEBDL", RegexOptions.IgnoreCase); + quality = Regex.Replace(quality, "WEB-DL", "WEBDL", RegexOptions.IgnoreCase); + quality = Regex.Replace(quality, "HDTVRip", "HDTV", RegexOptions.IgnoreCase); + + // Fix forgotten p-Progressive suffix in resolution index + quality = Regex.Replace(quality, "1080 ", "1080p ", RegexOptions.IgnoreCase); + quality = Regex.Replace(quality, "720 ", "720p ", RegexOptions.IgnoreCase); + + var techComponents = new[] + { + "rus", + quality, + "(LostFilm)" + }; + var techInfo = string.Join(" ", techComponents); + + // Ru title: downloadLink.TextContent.Replace("\n", ""); + // En title should be manually constructed. + var titleComponents = new[] + { + serieTitle, details.GetEpisodeString(), episodeName, techInfo + }; + var downloadLink = row.QuerySelector("div.inner-box--link > a"); + var sizeString = releaseDetails.Groups["size"].Value.ToUpper(); + sizeString = sizeString.Replace("ТБ", "TB"); // untested + sizeString = sizeString.Replace("ГБ", "GB"); + sizeString = sizeString.Replace("МБ", "MB"); + sizeString = sizeString.Replace("КБ", "KB"); // untested + var link = downloadLink.GetAttribute("href"); + + // TODO this feels sparse compared to other trackers. Expand later + var release = new TorrentInfo + { + Categories = _categories.MapTrackerCatToNewznab("1"), + Title = string.Join(" - ", titleComponents), + DownloadUrl = link, + Guid = link, + Size = ParseUtil.GetBytes(sizeString), + + // add missing torznab fields not available from results + Seeders = 1, + Peers = 2, + DownloadVolumeFactor = 0, + UploadVolumeFactor = 1, + MinimumRatio = 1, + MinimumSeedTime = 172800 // 48 hours + }; + + releases.Add(release); } - return title + " - " + heading + " [" + getResolution(tr) + "p]"; + return releases; } - private string getResolution(AngleSharp.Dom.IElement tr) + private IList FetchTrackerReleases(TrackerUrlDetails details) { - var resolution = tr.QuerySelector("div.tracker_info_left").TextContent; - return ResolutionInfoQueryRegex.Match(resolution).Groups[1].Value; - } - - private long getReleaseSize(AngleSharp.Dom.IElement tr) - { - var sizeStr = tr.QuerySelector("div.tracker_info_left").TextContent; - return ParseUtil.GetBytes(SizeInfoQueryRegex.Match(sizeStr).Groups[1].Value.Trim()); - } - - private DateTime getReleaseDate(AngleSharp.Dom.IElement tr) - { - var sizeStr = tr.QuerySelector("div.tracker_info_left").TextContent; - return DateTime.Parse(ReleaseDateInfoQueryRegex.Match(sizeStr).Groups[1].Value.Trim()); - } - - private ICollection MapCategories(AngleSharp.Html.Dom.IHtmlDocument dom, AngleSharp.Dom.IElement t, AngleSharp.Dom.IElement tr) - { - var rName = t.TextContent; - var rDesc = tr.QuerySelector("h3.tracker_info_bold").TextContent; - var type = dom.QuerySelector("div.releases-date:contains('Тип:')").TextContent; - - // Check OVA first cause OVA looks like anime with OVA in release name or description - if (CategorieOVARegex.IsMatch(rName) || CategorieOVARegex.IsMatch(rDesc)) + var queryCollection = new NameValueCollection { - return _categories.MapTrackerCatDescToNewznab("OVA/ONA/Special"); - } + { "c", details.seriesId }, + { "s", details.season }, + { "e", string.IsNullOrEmpty(details.episode) ? "999" : details.episode } // 999 is a synonym for the whole serie + }; + var url = _settings.BaseUrl + "v_search.php" + "?" + queryCollection.GetQueryString(); - // Check movies then, cause some of releases could be movies dorama and should go to movies category - if (CategorieMovieRegex.IsMatch(rName) || CategorieMovieRegex.IsMatch(rDesc)) + // Get redirection page with generated link on it. This link can't be constructed manually as it contains Hash field and hashing algo is unknown. + var req = new IndexerRequest(url, HttpAccept.Html); + var results = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition)); + + if (results.Content == null) { - return _categories.MapTrackerCatDescToNewznab("Movies"); + throw new Exception("Empty response from " + url); } - // Check dorama. Most of doramas are flaged as doramas in type info, but type info could have a lot of types at same time (movie, etc) - if (CategorieDoramaRegex.IsMatch(rName) || CategorieDoramaRegex.IsMatch(type)) + if (results.Content == "log in first") { - return _categories.MapTrackerCatDescToNewznab("Dorama"); + throw new Exception(results.Content); } - return _categories.MapTrackerCatDescToNewznab("TV Anime"); + var parser = new HtmlParser(); + var document = parser.ParseDocument(results.Content); + var meta = document.QuerySelector("meta"); + var metaContent = meta.GetAttribute("content"); + + // Follow redirection defined by async url.replace + var redirectionUrl = metaContent.Substring(metaContent.IndexOf("http")); + return FollowTrackerRedirection(redirectionUrl, details); } private IList ParseRelease(IndexerResponse indexerResponse) { - var torrentInfos = new List(); + var releases = new List(); var parser = new HtmlParser(); var dom = parser.ParseDocument(indexerResponse.Content); - foreach (var t in dom.QuerySelectorAll("ul.media__tabs__nav > li > a")) + var playButton = dom.QuerySelector("div.external-btn"); + if (playButton != null && !playButton.ClassList.Contains("inactive")) { - var tr_id = t.Attributes["href"].Value; - var tr = dom.QuerySelector("div" + tr_id); - var seeders = int.Parse(tr.QuerySelector("div.circle_green_text_top").TextContent); - var url = indexerResponse.HttpRequest.Url.ToString(); + // var details = new Uri(url); + var dateString = dom.QuerySelector("div.title-block > div.details-pane > div.left-box").TextContent; + var key = dateString.Contains("TBA") ? "ru: " : "eng: "; + dateString = TrimString(dateString, key, " г."); // '... Дата выхода eng: 09 марта 2012 г. ...' -> '09 марта 2012' + DateTime date; - var release = new TorrentInfo - { - Title = composeTitle(dom, t, tr), - InfoUrl = url, - DownloadVolumeFactor = 0, - UploadVolumeFactor = 1, + //dateString might be just a year, e.g. https://www.lostfilm.tv/series/Ghosted/season_1/episode_14/ + if (dateString.Length == 4) + { + date = DateTime.ParseExact(dateString, "yyyy", CultureInfo.InvariantCulture).ToLocalTime(); + } + else + { + date = DateTime.Parse(dateString, new CultureInfo("ru-RU")); // dd mmmm yyyy + } - Guid = url + tr_id, - Seeders = seeders, - Peers = seeders + int.Parse(tr.QuerySelector("div.circle_red_text_top").TextContent), - Grabs = int.Parse(tr.QuerySelector("div.circle_grey_text_top").TextContent), - Categories = MapCategories(dom, t, tr), - PublishDate = getReleaseDate(tr), - DownloadUrl = tr.QuerySelector("div.download_tracker > a.btn__green").Attributes["href"].Value, - MagnetUrl = tr.QuerySelector("div.download_tracker > a.btn__d-gray").Attributes["href"].Value, - Size = getReleaseSize(tr), - Resolution = getResolution(tr) - }; - torrentInfos.Add(release); + var urlDetails = new TrackerUrlDetails(playButton); + var episodeReleases = FetchTrackerReleases(urlDetails); + + foreach (var release in episodeReleases) + { + release.InfoUrl = indexerResponse.Request.Url.ToString(); + release.PublishDate = date; + } + + releases.AddRange(episodeReleases); } - return torrentInfos; + return releases; } public IList ParseResponse(IndexerResponse indexerResponse) { - var torrentInfos = new List(); + var releases = new List(); var parser = new HtmlParser(); var dom = parser.ParseDocument(indexerResponse.Content); - var links = dom.QuerySelectorAll("a.ads-list__item__title"); - foreach (var link in links) + var rows = dom.QuerySelectorAll("div.row"); + foreach (var r in rows) { - var url = link.GetAttribute("href"); - - // Some URLs in search are broken - if (url.StartsWith("//")) - { - url = "https:" + url; - } - - var releaseRequest = new IndexerRequest(url, HttpAccept.Html); - var releaseResponse = new IndexerResponse(releaseRequest, HttpClient.Execute(releaseRequest.HttpRequest)); + var link = r.QuerySelector("a").GetAttribute("href"); + var releaseRequest = new IndexerRequest(_settings.BaseUrl + link.TrimStart('/'), HttpAccept.Html); + var releaseResponse = new IndexerResponse(releaseRequest, HttpClient.ExecuteProxied(releaseRequest.HttpRequest, Definition)); // Throw common http errors here before we try to parse if (releaseResponse.HttpResponse.HasHttpError) @@ -446,12 +493,36 @@ public IList ParseResponse(IndexerResponse indexerResponse) } } - torrentInfos.AddRange(ParseRelease(releaseResponse)); + releases.AddRange(ParseRelease(releaseResponse)); } - return torrentInfos.ToArray(); + return releases.ToArray(); } public Action, DateTime?> CookiesUpdater { get; set; } + + private string TrimString(string s, char startChar, char endChar) + { + var start = s.IndexOf(startChar); + var end = s.LastIndexOf(endChar); + return (start != -1 && end != -1) ? s.Substring(start + 1, end - start - 1) : null; + } + + private string TrimString(string s, string startString, string endString) + { + var start = s.IndexOf(startString); + var end = s.LastIndexOf(endString); + return (start != -1 && end != -1) ? s.Substring(start + startString.Length, end - start - startString.Length) : null; + } + + // private DateTime DateFromEpisodeColumn(AngleSharp.Dom.IElement dateColumn) + // { + // var dateString = dateColumn.QuerySelector("span.small-text")?.TextContent; + + // // 'Eng: 23.05.2017' -> '23.05.2017' OR '23.05.2017' -> '23.05.2017' + // dateString = string.IsNullOrEmpty(dateString) ? dateColumn.QuerySelector("span")?.TextContent : dateString.Substring(dateString.IndexOf(":") + 2); + // var date = DateTime.Parse(dateString, new CultureInfo("RU-ru")); // dd.mm.yyyy + // return date; + // } } } From cef74bd68058b1e7340cfbda1b42847d0f3d9f0f Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Sun, 5 Jun 2022 21:36:17 +0300 Subject: [PATCH 05/11] Finished working on fetching new releases --- .../Indexers/Definitions/Lostfilm.cs | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index 3d6a26c8f..b0319a93c 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -56,7 +56,7 @@ public override IIndexerRequestGenerator GetRequestGenerator() public override IParseIndexerResponse GetParser() { - return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger, Definition = Definition }; + return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger, Definition = Definition, Indexer = this }; } protected override async Task DoLogin() @@ -184,19 +184,6 @@ private IEnumerable GetPagedRequests(string term, int[] categori throw new Exception("Lostfilm search not implemented"); } - // else - // { - // var queryCollection = new NameValueCollection - // { - // // Remove season and episode info from search term cause it breaks search - // { "keywords", Regex.Replace(term, @"(?:[SsEe]?\d{1,4}){1,2}$", "").TrimEnd() }, - // { "limit", "20" }, - // { "orderby_sort", "entry_date|desc" } - // }; - - // requestUrl = string.Format("{0}/ajax/search_result/P0?{1}", Settings.BaseUrl.TrimEnd('/'), queryCollection.GetQueryString()); - // } - // TODO: Implement searching var request = new IndexerRequest(requestUrl, HttpAccept.Html); yield return request; } @@ -252,6 +239,7 @@ public class LostfilmParser : IParseIndexerResponse private static readonly Regex ParseReleaseDetailsRegex = new Regex("Видео:\\ (?.+).\\ Размер:\\ (?.+).\\ Перевод", RegexOptions.Compiled | RegexOptions.IgnoreCase); public IIndexerHttpClient HttpClient { get; set; } public ProviderDefinition Definition { get; set; } + public Lostfilm Indexer { get; set; } public Logger Logger { get; set; } public LostfilmParser(UserPassCaptchaTorrentBaseSettings settings, IndexerCapabilitiesCategories categories) @@ -404,7 +392,13 @@ private IList FetchTrackerReleases(TrackerUrlDetails details) var url = _settings.BaseUrl + "v_search.php" + "?" + queryCollection.GetQueryString(); // Get redirection page with generated link on it. This link can't be constructed manually as it contains Hash field and hashing algo is unknown. - var req = new IndexerRequest(url, HttpAccept.Html); + var requestBuilder = new HttpRequestBuilder(url) + { + AllowAutoRedirect = true, + }; + requestBuilder.PostProcess += r => r.RequestTimeout = TimeSpan.FromSeconds(15); + requestBuilder.SetCookies(Indexer.Cookies); + var req = new IndexerRequest(requestBuilder.Build()); var results = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition)); if (results.Content == null) From 1cc312471ae0b97324613e099c6bd83b2de03ba5 Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Tue, 7 Jun 2022 00:55:06 +0300 Subject: [PATCH 06/11] WIP: search --- .../Indexers/Definitions/Lostfilm.cs | 182 ++++++++++++++++-- 1 file changed, 171 insertions(+), 11 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index b0319a93c..fd4c21112 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -2,12 +2,14 @@ using System.Collections.Generic; using System.Collections.Specialized; using System.Globalization; +using System.Linq; using System.Net.Http; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using AngleSharp.Html.Parser; using FluentValidation; +using Newtonsoft.Json.Linq; using NLog; using NzbDrone.Common.Http; using NzbDrone.Core.Annotations; @@ -51,7 +53,7 @@ public Lostfilm(IIndexerHttpClient httpClient, IEventAggregator eventAggregator, public override IIndexerRequestGenerator GetRequestGenerator() { - return new LostfilmRequestGenerator() { Settings = Settings, Capabilities = Capabilities }; + return new LostfilmRequestGenerator() { Settings = Settings, Capabilities = Capabilities, HttpClient = _httpClient, Logger = _logger, Definition = Definition, Indexer = this }; } public override IParseIndexerResponse GetParser() @@ -166,33 +168,173 @@ public class LostfilmRequestGenerator : IIndexerRequestGenerator { public UserPassCaptchaTorrentBaseSettings Settings { get; set; } public IndexerCapabilities Capabilities { get; set; } + public IIndexerHttpClient HttpClient { get; set; } + public ProviderDefinition Definition { get; set; } + public Lostfilm Indexer { get; set; } + public Logger Logger { get; set; } public LostfilmRequestGenerator() { } - private IEnumerable GetPagedRequests(string term, int[] categories) + private IList GetSearchPageURLs(string term, int? season, string episode) { - var requestUrl = string.Empty; + var urls = new List(); + /* + Torznab query for some series could contains sanitized title. E.g. "Star Wars: The Clone Wars" will become "Star Wars The Clone Wars". + Search API on LostFilm.tv doesn't return anything on such search query so the query should be "morphed" even for "tvsearch" queries. + Also the queries to Specials is a union of Series and Episode titles. E.g.: "Breaking Bad - El Camino: A Breaking Bad Movie". + The algorythm works in the following way: + 1. Search with the full SearchTerm. Just for example, let's search for episode by it's name + - {Star Wars The Clone Wars To Catch a Jedi} + 2. [loop] If none were found, repeat search with SearchTerm reduced by 1 word from the end. Fail search if no words left and no results were obtained + - {Star Wars The Clone Wars To Catch a} Jedi + - {Star Wars The Clone Wars To Catch} a Jedi + - ... + - {Star Wars} The Clone Wars To Catch a Jedi + 3. When we got few results, try to filter them with the words excluded before + - [Star Wars: The Clone Wars, Star Wars Rebels, Star Wars: Forces of Destiny] + .filterBy(The Clone Wars To Catch a Jedi) + 4. [loop] Reduce filterTerm by 1 word from the end. Fail search if no words left and no results were obtained + .filterBy(The Clone Wars To Catch a) / Jedi + .filterBy(The Clone Wars To Catch) / a Jedi + ... + .filterBy(The Clone Wars) / To Catch a Jedi + 5. [loop] Now we know that series we're looking for is called "Star Wars The Clone Wars". Fetch series detail page for it and try to apply remaining words as episode filter, reducing filter by 1 word each time we get no results: + - .episodes().filteredBy(To Catch a Jedi) + - .episodes().filteredBy(To Catch a) / Jedi + - ... + - .episodes() / To Catch a Jedi + Test queries: + - "Star Wars The Clone Wars To Catch a Jedi" -> S05E19 + - "Breaking Bad El Camino A Breaking Bad Movie" -> Special + - "The Magicians (2015)" -> Year should be ignored + */ + + // Search query words. Consists of Series keywords that will be used for series search request, and Episode keywords that will be used for episode filtering. + var keywords = new List(term.Split(' ')); + + // Keywords count related to Series Search. + var searchKeywords = keywords.Count; + + // Keywords count related to Series Filter. + var serieFilterKeywords = 0; + + // Overall (keywords.count - searchKeywords - serieFilterKeywords) are related to episode filter + do + { + var searchString = string.Join(" ", keywords.Take(searchKeywords)); + var data = new Dictionary + { + { "act", "common" }, + { "type", "search" }, + { "val", searchString } + }; + + var requestBuilder = new HttpRequestBuilder(Settings.BaseUrl + "ajaxik.php"); + foreach (var item in data) + { + requestBuilder.AddFormParameter(item.Key, item.Value); + } + + requestBuilder.PostProcess += r => r.RequestTimeout = TimeSpan.FromSeconds(15); + requestBuilder.SetCookies(Indexer.Cookies); + var req = new IndexerRequest(requestBuilder.Build()); + var response = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition)); + + if (response.Content == null) + { + continue; + } + + var json = JToken.Parse(response.Content); + if (json == null || json.Type == JTokenType.Array) + { + continue; // Search loop + } + + // Protect from {"data":false,"result":"ok"} + var jsonData = json["data"]; + if (jsonData.Type != JTokenType.Object) + { + continue; // Search loop + } + + var jsonSeries = jsonData["series"]; + if (jsonSeries == null || !jsonSeries.HasValues) + { + continue; // Search loop + } + + var series = jsonSeries.ToList(); + + // Filter found series + if (series.Count() > 1) + { + serieFilterKeywords = keywords.Count - searchKeywords; + + do + { + var serieFilter = string.Join(" ", keywords.GetRange(searchKeywords, serieFilterKeywords)); + var filteredSeries = series.Where(s => s["title_orig"].Value().Contains(serieFilter)).ToList(); + + if (filteredSeries.Count() > 0) + { + series = filteredSeries; + break; // Serie Filter loop + } + } + while (--serieFilterKeywords > 0); + } + + foreach (var serie in series) + { + var link = serie["link"].ToString(); + var season_url = (season == null) || (season == 0) ? "/seasons" : "/season_" + season.ToString(); + var url = Settings.BaseUrl + link.TrimStart('/') + season_url; + + if (!string.IsNullOrEmpty(episode)) + { + // Fetch single episode releases + // TODO: Add a togglable Quick Path via v_search.php in Indexer Settings + url += "/episode_" + episode; + } + + urls.Add(url); + } + } + while (--searchKeywords > 0); + + return urls; + } + + private IEnumerable GetPagedRequests(string term, int[] categories, int? season, string episode) + { + var requestUrls = new List(); if (string.IsNullOrWhiteSpace(term)) { - requestUrl = Settings.BaseUrl + "new"; + requestUrls.Add(Settings.BaseUrl + "new"); } else { - throw new Exception("Lostfilm search not implemented"); + requestUrls.AddRange(GetSearchPageURLs(term, season, episode)); } - var request = new IndexerRequest(requestUrl, HttpAccept.Html); - yield return request; + var requests = new List(); + foreach (var url in requestUrls) + { + requests.Add(new IndexerRequest(url, HttpAccept.Html)); + } + + yield return requests; } public IndexerPageableRequestChain GetSearchRequests(MovieSearchCriteria searchCriteria) { var pageableRequests = new IndexerPageableRequestChain(); - pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories)); + pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories, null, "")); return pageableRequests; } @@ -201,7 +343,7 @@ public IndexerPageableRequestChain GetSearchRequests(TvSearchCriteria searchCrit { var pageableRequests = new IndexerPageableRequestChain(); - pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedTvSearchString), searchCriteria.Categories)); + pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedTvSearchString), searchCriteria.Categories, searchCriteria.Season, searchCriteria.Episode)); return pageableRequests; } @@ -210,7 +352,7 @@ public IndexerPageableRequestChain GetSearchRequests(BasicSearchCriteria searchC { var pageableRequests = new IndexerPageableRequestChain(); - pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories)); + pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories, null, "")); return pageableRequests; } @@ -461,7 +603,7 @@ private IList ParseRelease(IndexerResponse indexerResponse) return releases; } - public IList ParseResponse(IndexerResponse indexerResponse) + private IList ParseNewResponse(IndexerResponse indexerResponse) { var releases = new List(); @@ -493,6 +635,24 @@ public IList ParseResponse(IndexerResponse indexerResponse) return releases.ToArray(); } + private IList ParseSearchResponse(IndexerResponse indexerResponse) + { + var releases = new List(); + return releases.ToArray(); + } + + public IList ParseResponse(IndexerResponse indexerResponse) + { + if (indexerResponse.Request.Url.Path == "/new") + { + return ParseNewResponse(indexerResponse); + } + else + { + return ParseSearchResponse(indexerResponse); + } + } + public Action, DateTime?> CookiesUpdater { get; set; } private string TrimString(string s, char startChar, char endChar) From 2aac15111256aed3f05a1bfa130f28612aecafc1 Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Wed, 8 Jun 2022 00:15:40 +0300 Subject: [PATCH 07/11] WIP searching --- .../Indexers/Definitions/Lostfilm.cs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index fd4c21112..5b799bfa3 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -220,6 +220,8 @@ private IList GetSearchPageURLs(string term, int? season, string episode // Keywords count related to Series Filter. var serieFilterKeywords = 0; + Logger.Info(searchKeywords); + // Overall (keywords.count - searchKeywords - serieFilterKeywords) are related to episode filter do { @@ -231,7 +233,12 @@ private IList GetSearchPageURLs(string term, int? season, string episode { "val", searchString } }; - var requestBuilder = new HttpRequestBuilder(Settings.BaseUrl + "ajaxik.php"); + Logger.Info(data); + + var requestBuilder = new HttpRequestBuilder(Settings.BaseUrl + "ajaxik.php") + { + Method = HttpMethod.Post + }; foreach (var item in data) { requestBuilder.AddFormParameter(item.Key, item.Value); @@ -321,13 +328,10 @@ private IEnumerable GetPagedRequests(string term, int[] categori requestUrls.AddRange(GetSearchPageURLs(term, season, episode)); } - var requests = new List(); foreach (var url in requestUrls) { - requests.Add(new IndexerRequest(url, HttpAccept.Html)); + yield return new IndexerRequest(url, HttpAccept.Html); } - - yield return requests; } public IndexerPageableRequestChain GetSearchRequests(MovieSearchCriteria searchCriteria) @@ -335,7 +339,6 @@ public IndexerPageableRequestChain GetSearchRequests(MovieSearchCriteria searchC var pageableRequests = new IndexerPageableRequestChain(); pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories, null, "")); - return pageableRequests; } @@ -344,7 +347,6 @@ public IndexerPageableRequestChain GetSearchRequests(TvSearchCriteria searchCrit var pageableRequests = new IndexerPageableRequestChain(); pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedTvSearchString), searchCriteria.Categories, searchCriteria.Season, searchCriteria.Episode)); - return pageableRequests; } From e7d2f8c081fdfb833e1ebe2deaadf9e67761936b Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Wed, 8 Jun 2022 00:40:28 +0300 Subject: [PATCH 08/11] Search works for first result, but not parsed yet --- src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index 5b799bfa3..7ee8f1904 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -61,6 +61,11 @@ public override IParseIndexerResponse GetParser() return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger, Definition = Definition, Indexer = this }; } + public IDictionary GetCookiesPublic() + { + return GetCookies(); + } + protected override async Task DoLogin() { UpdateCookies(null, null); @@ -220,8 +225,6 @@ private IList GetSearchPageURLs(string term, int? season, string episode // Keywords count related to Series Filter. var serieFilterKeywords = 0; - Logger.Info(searchKeywords); - // Overall (keywords.count - searchKeywords - serieFilterKeywords) are related to episode filter do { @@ -233,8 +236,6 @@ private IList GetSearchPageURLs(string term, int? season, string episode { "val", searchString } }; - Logger.Info(data); - var requestBuilder = new HttpRequestBuilder(Settings.BaseUrl + "ajaxik.php") { Method = HttpMethod.Post @@ -245,7 +246,7 @@ private IList GetSearchPageURLs(string term, int? season, string episode } requestBuilder.PostProcess += r => r.RequestTimeout = TimeSpan.FromSeconds(15); - requestBuilder.SetCookies(Indexer.Cookies); + requestBuilder.SetCookies(Indexer.GetCookiesPublic()); var req = new IndexerRequest(requestBuilder.Build()); var response = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition)); @@ -328,6 +329,7 @@ private IEnumerable GetPagedRequests(string term, int[] categori requestUrls.AddRange(GetSearchPageURLs(term, season, episode)); } + Logger.Info(requestUrls.Count()); foreach (var url in requestUrls) { yield return new IndexerRequest(url, HttpAccept.Html); @@ -541,7 +543,7 @@ private IList FetchTrackerReleases(TrackerUrlDetails details) AllowAutoRedirect = true, }; requestBuilder.PostProcess += r => r.RequestTimeout = TimeSpan.FromSeconds(15); - requestBuilder.SetCookies(Indexer.Cookies); + requestBuilder.SetCookies(Indexer.GetCookiesPublic()); var req = new IndexerRequest(requestBuilder.Build()); var results = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition)); From e13b0b7a973429dde3c32a066f3ec8aca028a7d8 Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Thu, 9 Jun 2022 18:45:18 +0300 Subject: [PATCH 09/11] WIP --- src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index 7ee8f1904..79c233341 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -329,7 +329,6 @@ private IEnumerable GetPagedRequests(string term, int[] categori requestUrls.AddRange(GetSearchPageURLs(term, season, episode)); } - Logger.Info(requestUrls.Count()); foreach (var url in requestUrls) { yield return new IndexerRequest(url, HttpAccept.Html); @@ -651,10 +650,18 @@ public IList ParseResponse(IndexerResponse indexerResponse) { return ParseNewResponse(indexerResponse); } - else + + if (indexerResponse.Request.Url.Path.Contains("/episode_")) { - return ParseSearchResponse(indexerResponse); + return ParseNewResponse(indexerResponse); } + + if (indexerResponse.Request.Url.Path == "/new") + { + return ParseNewResponse(indexerResponse); + } + + return new List().ToArray(); } public Action, DateTime?> CookiesUpdater { get; set; } From ab207ac75be4c1761adad0ab7ffe678c8ab3b7d2 Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Thu, 9 Jun 2022 19:07:09 +0300 Subject: [PATCH 10/11] Lostfilm parse /new page in IndexerRequestsGenerarator to parse in IndexerParser only releases pages --- .../Indexers/Definitions/Lostfilm.cs | 69 ++++++++----------- 1 file changed, 27 insertions(+), 42 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index 79c233341..4d9137914 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -182,8 +182,29 @@ public LostfilmRequestGenerator() { } - private IList GetSearchPageURLs(string term, int? season, string episode) + private IList GetNewReleaseURLs() { + Logger.Debug("GetNewReleaseURLs"); + var urls = new List(); + + var req = new IndexerRequest(Settings.BaseUrl + "new", HttpAccept.Html); + var response = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition)); + + var parser = new HtmlParser(); + var dom = parser.ParseDocument(response.Content); + var rows = dom.QuerySelectorAll("div.row"); + foreach (var r in rows) + { + var path = r.QuerySelector("a").GetAttribute("href"); + urls.Add(Settings.BaseUrl + path.TrimStart('/')); + } + + return urls; + } + + private IList GetSearchReleaseURLs(string term, int? season, string episode) + { + Logger.Debug("GetSearchReleaseURLs"); var urls = new List(); /* Torznab query for some series could contains sanitized title. E.g. "Star Wars: The Clone Wars" will become "Star Wars The Clone Wars". @@ -247,8 +268,8 @@ private IList GetSearchPageURLs(string term, int? season, string episode requestBuilder.PostProcess += r => r.RequestTimeout = TimeSpan.FromSeconds(15); requestBuilder.SetCookies(Indexer.GetCookiesPublic()); - var req = new IndexerRequest(requestBuilder.Build()); - var response = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition)); + var request = new IndexerRequest(requestBuilder.Build()); + var response = new IndexerResponse(request, HttpClient.ExecuteProxied(request.HttpRequest, Definition)); if (response.Content == null) { @@ -322,13 +343,14 @@ private IEnumerable GetPagedRequests(string term, int[] categori if (string.IsNullOrWhiteSpace(term)) { - requestUrls.Add(Settings.BaseUrl + "new"); + requestUrls.AddRange(GetNewReleaseURLs()); } else { - requestUrls.AddRange(GetSearchPageURLs(term, season, episode)); + requestUrls.AddRange(GetSearchReleaseURLs(term, season, episode)); } + Logger.Debug("GetPagedRequests: " + requestUrls.Count.ToString()); foreach (var url in requestUrls) { yield return new IndexerRequest(url, HttpAccept.Html); @@ -607,38 +629,6 @@ private IList ParseRelease(IndexerResponse indexerResponse) } private IList ParseNewResponse(IndexerResponse indexerResponse) - { - var releases = new List(); - - var parser = new HtmlParser(); - var dom = parser.ParseDocument(indexerResponse.Content); - var rows = dom.QuerySelectorAll("div.row"); - foreach (var r in rows) - { - var link = r.QuerySelector("a").GetAttribute("href"); - var releaseRequest = new IndexerRequest(_settings.BaseUrl + link.TrimStart('/'), HttpAccept.Html); - var releaseResponse = new IndexerResponse(releaseRequest, HttpClient.ExecuteProxied(releaseRequest.HttpRequest, Definition)); - - // Throw common http errors here before we try to parse - if (releaseResponse.HttpResponse.HasHttpError) - { - if ((int)releaseResponse.HttpResponse.StatusCode == 429) - { - throw new TooManyRequestsException(releaseRequest.HttpRequest, releaseResponse.HttpResponse); - } - else - { - throw new IndexerException(releaseResponse, "Http error code: " + releaseResponse.HttpResponse.StatusCode); - } - } - - releases.AddRange(ParseRelease(releaseResponse)); - } - - return releases.ToArray(); - } - - private IList ParseSearchResponse(IndexerResponse indexerResponse) { var releases = new List(); return releases.ToArray(); @@ -646,11 +636,6 @@ private IList ParseSearchResponse(IndexerResponse indexerResponse) public IList ParseResponse(IndexerResponse indexerResponse) { - if (indexerResponse.Request.Url.Path == "/new") - { - return ParseNewResponse(indexerResponse); - } - if (indexerResponse.Request.Url.Path.Contains("/episode_")) { return ParseNewResponse(indexerResponse); From 8e1151f5c703814e6b2a36ddbb8f4b436a194701 Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Thu, 9 Jun 2022 19:25:44 +0300 Subject: [PATCH 11/11] Added season page parsing --- .../Indexers/Definitions/Lostfilm.cs | 59 +++++++++++++------ 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index 4d9137914..a59c0c63f 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -588,8 +588,9 @@ private IList FetchTrackerReleases(TrackerUrlDetails details) return FollowTrackerRedirection(redirectionUrl, details); } - private IList ParseRelease(IndexerResponse indexerResponse) + private IList ParseEpisodeResponse(IndexerResponse indexerResponse) { + Logger.Debug("ParsingEpisodeResponse: " + indexerResponse.Request.Url.ToString()); var releases = new List(); var parser = new HtmlParser(); var dom = parser.ParseDocument(indexerResponse.Content); @@ -625,12 +626,37 @@ private IList ParseRelease(IndexerResponse indexerResponse) releases.AddRange(episodeReleases); } - return releases; + return releases.ToArray(); } - private IList ParseNewResponse(IndexerResponse indexerResponse) + private IList ParseSeasonResponse(IndexerResponse indexerResponse) { - var releases = new List(); + Logger.Debug("ParsingSeasonResponse: " + indexerResponse.Request.Url.ToString()); + var releases = new List(); + var parser = new HtmlParser(); + var document = parser.ParseDocument(indexerResponse.Content); + var seasons = document.QuerySelectorAll("div.serie-block"); + var rowSelector = "table.movie-parts-list > tbody > tr"; + + foreach (var season in seasons) + { + // Could ne null if serie-block is for Extras + var seasonButton = season.QuerySelector("div.movie-details-block > div.external-btn"); + + var lastEpisode = season.QuerySelector(rowSelector); + var dateColumn = lastEpisode.QuerySelector("td.delta"); + var date = DateFromEpisodeColumn(dateColumn); + + var urlDetails = new TrackerUrlDetails(seasonButton); + var seasonReleases = FetchTrackerReleases(urlDetails); + + foreach (var release in seasonReleases) + { + release.InfoUrl = indexerResponse.Request.Url.ToString(); + release.PublishDate = date; + } + } + return releases.ToArray(); } @@ -638,15 +664,10 @@ public IList ParseResponse(IndexerResponse indexerResponse) { if (indexerResponse.Request.Url.Path.Contains("/episode_")) { - return ParseNewResponse(indexerResponse); + return ParseEpisodeResponse(indexerResponse); } - if (indexerResponse.Request.Url.Path == "/new") - { - return ParseNewResponse(indexerResponse); - } - - return new List().ToArray(); + return ParseSeasonResponse(indexerResponse); } public Action, DateTime?> CookiesUpdater { get; set; } @@ -665,14 +686,14 @@ private string TrimString(string s, string startString, string endString) return (start != -1 && end != -1) ? s.Substring(start + startString.Length, end - start - startString.Length) : null; } - // private DateTime DateFromEpisodeColumn(AngleSharp.Dom.IElement dateColumn) - // { - // var dateString = dateColumn.QuerySelector("span.small-text")?.TextContent; + private DateTime DateFromEpisodeColumn(AngleSharp.Dom.IElement dateColumn) + { + var dateString = dateColumn.QuerySelector("span.small-text")?.TextContent; - // // 'Eng: 23.05.2017' -> '23.05.2017' OR '23.05.2017' -> '23.05.2017' - // dateString = string.IsNullOrEmpty(dateString) ? dateColumn.QuerySelector("span")?.TextContent : dateString.Substring(dateString.IndexOf(":") + 2); - // var date = DateTime.Parse(dateString, new CultureInfo("RU-ru")); // dd.mm.yyyy - // return date; - // } + // 'Eng: 23.05.2017' -> '23.05.2017' OR '23.05.2017' -> '23.05.2017' + dateString = string.IsNullOrEmpty(dateString) ? dateColumn.QuerySelector("span")?.TextContent : dateString.Substring(dateString.IndexOf(":") + 2); + var date = DateTime.Parse(dateString, new CultureInfo("RU-ru")); // dd.mm.yyyy + return date; + } } }