From 84fcfa7912df1d04456339d5c372504b0dc29320 Mon Sep 17 00:00:00 2001 From: Dmitry Chepurovskiy Date: Sun, 5 Jun 2022 20:59:22 +0300 Subject: [PATCH] WIP: new lostfilm releases --- .../Indexers/Definitions/Lostfilm.cs | 377 +++++++++++------- 1 file changed, 224 insertions(+), 153 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs index 53c3517c8..3d6a26c8f 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Lostfilm.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Collections.Specialized; +using System.Globalization; using System.Net.Http; using System.Text; using System.Text.RegularExpressions; @@ -17,6 +18,7 @@ using NzbDrone.Core.Messaging.Events; using NzbDrone.Core.Parser; using NzbDrone.Core.Parser.Model; +using NzbDrone.Core.ThingiProvider; using NzbDrone.Core.Validation; namespace NzbDrone.Core.Indexers.Definitions @@ -54,7 +56,7 @@ public override IIndexerRequestGenerator GetRequestGenerator() public override IParseIndexerResponse GetParser() { - return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger }; + return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger, Definition = Definition }; } protected override async Task DoLogin() @@ -69,9 +71,10 @@ protected override async Task DoLogin() // var qCaptchaImg = document.QuerySelector("img#captcha_pictcha"); // if (qCaptchaImg != null) // { - // var captchaUrl = Settings.BaseUrl + qCaptchaImg.GetAttribute("src"); - // var captchaImage = await ExecuteAuth(new HttpRequest(captchaUrl)); - // Settings.ExtraFieldData["CAPTCHA"] = captchaImage.Content; + // // var captchaUrl = Settings.BaseUrl + qCaptchaImg.GetAttribute("src"); + // // var captchaImage = await ExecuteAuth(new HttpRequest(captchaUrl)); + // // Settings.ExtraFieldData["CAPTCHA"] = captchaImage.Content; + // throw new IndexerAuthException("Captcha is not supported yet"); // } // else // { @@ -135,42 +138,6 @@ protected override async Task DoLogin() } } - // UpdateCookies(null, null); - // var requestBuilder = new HttpRequestBuilder(Settings.BaseUrl + "index.php") - // { - // LogResponseContent = true, - // AllowAutoRedirect = true - // }; - - // var mainPage = await ExecuteAuth(new HttpRequest(Settings.BaseUrl)); - - // requestBuilder.Method = HttpMethod.Post; - // requestBuilder.PostProcess += r => r.RequestTimeout = TimeSpan.FromSeconds(15); - // requestBuilder.SetCookies(mainPage.GetCookies()); - - // var authLoginRequest = requestBuilder - // .AddFormParameter("login_name", Settings.Username) - // .AddFormParameter("login_password", Settings.Password) - // .AddFormParameter("login", "submit") - // .SetHeader("Content-Type", "application/x-www-form-urlencoded") - // .Build(); - - // var response = await ExecuteAuth(authLoginRequest); - - // if (response.Content != null && !CheckIfLoginNeeded(response)) - // { - // UpdateCookies(response.GetCookies(), DateTime.Now + TimeSpan.FromDays(30)); - // _logger.Debug("Anidub authentication succeeded"); - // } - // else - // { - // const string ErrorSelector = "#content .berror .berror_c"; - // var parser = new HtmlParser(); - // var document = await parser.ParseDocumentAsync(response.Content); - // var errorMessage = document.QuerySelector(ErrorSelector).TextContent.Trim(); - // throw new IndexerAuthException("Anidub authentication failed. Error: " + errorMessage); - // } - // } protected override bool CheckIfLoginNeeded(HttpResponse httpResponse) { if (httpResponse.Content.Contains("href=\"/my\"")) @@ -210,21 +177,26 @@ private IEnumerable GetPagedRequests(string term, int[] categori if (string.IsNullOrWhiteSpace(term)) { - requestUrl = Settings.BaseUrl; + requestUrl = Settings.BaseUrl + "new"; } else { - var queryCollection = new NameValueCollection - { - // Remove season and episode info from search term cause it breaks search - { "keywords", Regex.Replace(term, @"(?:[SsEe]?\d{1,4}){1,2}$", "").TrimEnd() }, - { "limit", "20" }, - { "orderby_sort", "entry_date|desc" } - }; - - requestUrl = string.Format("{0}/ajax/search_result/P0?{1}", Settings.BaseUrl.TrimEnd('/'), queryCollection.GetQueryString()); + throw new Exception("Lostfilm search not implemented"); } + // else + // { + // var queryCollection = new NameValueCollection + // { + // // Remove season and episode info from search term cause it breaks search + // { "keywords", Regex.Replace(term, @"(?:[SsEe]?\d{1,4}){1,2}$", "").TrimEnd() }, + // { "limit", "20" }, + // { "orderby_sort", "entry_date|desc" } + // }; + + // requestUrl = string.Format("{0}/ajax/search_result/P0?{1}", Settings.BaseUrl.TrimEnd('/'), queryCollection.GetQueryString()); + // } + // TODO: Implement searching var request = new IndexerRequest(requestUrl, HttpAccept.Html); yield return request; } @@ -276,14 +248,10 @@ public class LostfilmParser : IParseIndexerResponse { private readonly UserPassCaptchaTorrentBaseSettings _settings; private readonly IndexerCapabilitiesCategories _categories; - private static readonly Regex EpisodesInfoQueryRegex = new Regex(@"сери[ия] (\d+)(?:-(\d+))? из.*", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex ResolutionInfoQueryRegex = new Regex(@"качество (\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex SizeInfoQueryRegex = new Regex(@"размер:(.*)\n", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex ReleaseDateInfoQueryRegex = new Regex(@"добавлен:(.*)\n", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex CategorieMovieRegex = new Regex(@"Фильм", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex CategorieOVARegex = new Regex(@"ОВА|OVA|ОНА|ONA|Special", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex CategorieDoramaRegex = new Regex(@"Дорама", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex ParsePlayEpisodeRegex = new Regex("PlayEpisode\\('(?\\d{1,3})(?\\d{3})(?\\d{3})'\\)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private static readonly Regex ParseReleaseDetailsRegex = new Regex("Видео:\\ (?.+).\\ Размер:\\ (?.+).\\ Перевод", RegexOptions.Compiled | RegexOptions.IgnoreCase); public IIndexerHttpClient HttpClient { get; set; } + public ProviderDefinition Definition { get; set; } public Logger Logger { get; set; } public LostfilmParser(UserPassCaptchaTorrentBaseSettings settings, IndexerCapabilitiesCategories categories) @@ -292,146 +260,225 @@ public LostfilmParser(UserPassCaptchaTorrentBaseSettings settings, IndexerCapabi _categories = categories; } - private string composeTitle(AngleSharp.Html.Dom.IHtmlDocument dom, AngleSharp.Dom.IElement t, AngleSharp.Dom.IElement tr) + internal class TrackerUrlDetails { - var name_ru = dom.QuerySelector("div.media__post__header > h1").TextContent.Trim(); - var name_en = dom.QuerySelector("div.media__panel > div:nth-of-type(1) > div.col-l:nth-of-type(1) > div > span").TextContent.Trim(); - var name_orig = dom.QuerySelector("div.media__panel > div:nth-of-type(1) > div.col-l:nth-of-type(2) > div > span").TextContent.Trim(); + internal string seriesId { get; private set; } + internal string season { get; private set; } + internal string episode { get; private set; } - var title = name_ru + " / " + name_en; - if (name_en != name_orig) + internal TrackerUrlDetails(string seriesId, string season, string episode) { - title += " / " + name_orig; + this.seriesId = seriesId; + this.season = season; + this.episode = episode; } - var tabName = t.TextContent; - tabName = tabName.Replace("Сезон", "Season"); - if (tabName.Contains("Серии")) + internal TrackerUrlDetails(AngleSharp.Dom.IElement button) { - tabName = ""; + var trigger = button.GetAttribute("onclick"); + var match = ParsePlayEpisodeRegex.Match(trigger); + + seriesId = match.Groups["id"].Value.TrimStart('0'); + season = match.Groups["season"].Value.TrimStart('0'); + episode = match.Groups["episode"].Value.TrimStart('0'); } - var heading = tr.QuerySelector("h3.tracker_info_bold").TextContent; - - // Parse episodes info from heading if episods info present - var match = EpisodesInfoQueryRegex.Match(heading); - heading = tabName; - if (match.Success) + // TODO: see if query.GetEpisodeString() is sufficient + internal string GetEpisodeString() { - if (string.IsNullOrEmpty(match.Groups[2].Value)) + var result = string.Empty; + + if (!string.IsNullOrEmpty(season) && season != "0" && season != "999") { - heading += " E" + match.Groups[1].Value; + result += "S" + season; + + if (!string.IsNullOrEmpty(episode) && episode != "0" && episode != "999") + { + result += "E" + episode; + } } - else + + return result; + } + } + + private IList FollowTrackerRedirection(string url, TrackerUrlDetails details) + { + var req = new IndexerRequest(url, HttpAccept.Html); + var results = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition)); + var releases = new List(); + + var parser = new HtmlParser(); + var document = parser.ParseDocument(results.Content); + var rows = document.QuerySelectorAll("div.inner-box--item"); + + var serieTitle = document.QuerySelector("div.inner-box--subtitle").TextContent; + serieTitle = serieTitle.Substring(0, serieTitle.LastIndexOf(',')); + + var episodeInfo = document.QuerySelector("div.inner-box--text").TextContent; + var episodeName = TrimString(episodeInfo, '(', ')'); + + foreach (var row in rows) + { + var detailsInfo = row.QuerySelector("div.inner-box--desc").TextContent; + var releaseDetails = ParseReleaseDetailsRegex.Match(detailsInfo); + + // ReSharper states "Expression is always false" + // TODO Refactor to get the intended operation + if (releaseDetails == null) { - heading += string.Format(" E{0}-{1}", match.Groups[1].Value, match.Groups[2].Value); + throw new FormatException("Failed to map release details string: " + detailsInfo); } + + /* + * For supported qualities see: + * - TvCategoryParser.cs + * - https://github.com/SickRage/SickRage/wiki/Quality-Settings#quality-names-to-recognize-the-quality-of-a-file + */ + var quality = releaseDetails.Groups["quality"].Value.Trim(); + + // Adapt shitty quality format for common algorythms + quality = Regex.Replace(quality, "-Rip", "Rip", RegexOptions.IgnoreCase); + quality = Regex.Replace(quality, "WEB-DLRip", "WEBDL", RegexOptions.IgnoreCase); + quality = Regex.Replace(quality, "WEB-DL", "WEBDL", RegexOptions.IgnoreCase); + quality = Regex.Replace(quality, "HDTVRip", "HDTV", RegexOptions.IgnoreCase); + + // Fix forgotten p-Progressive suffix in resolution index + quality = Regex.Replace(quality, "1080 ", "1080p ", RegexOptions.IgnoreCase); + quality = Regex.Replace(quality, "720 ", "720p ", RegexOptions.IgnoreCase); + + var techComponents = new[] + { + "rus", + quality, + "(LostFilm)" + }; + var techInfo = string.Join(" ", techComponents); + + // Ru title: downloadLink.TextContent.Replace("\n", ""); + // En title should be manually constructed. + var titleComponents = new[] + { + serieTitle, details.GetEpisodeString(), episodeName, techInfo + }; + var downloadLink = row.QuerySelector("div.inner-box--link > a"); + var sizeString = releaseDetails.Groups["size"].Value.ToUpper(); + sizeString = sizeString.Replace("ТБ", "TB"); // untested + sizeString = sizeString.Replace("ГБ", "GB"); + sizeString = sizeString.Replace("МБ", "MB"); + sizeString = sizeString.Replace("КБ", "KB"); // untested + var link = downloadLink.GetAttribute("href"); + + // TODO this feels sparse compared to other trackers. Expand later + var release = new TorrentInfo + { + Categories = _categories.MapTrackerCatToNewznab("1"), + Title = string.Join(" - ", titleComponents), + DownloadUrl = link, + Guid = link, + Size = ParseUtil.GetBytes(sizeString), + + // add missing torznab fields not available from results + Seeders = 1, + Peers = 2, + DownloadVolumeFactor = 0, + UploadVolumeFactor = 1, + MinimumRatio = 1, + MinimumSeedTime = 172800 // 48 hours + }; + + releases.Add(release); } - return title + " - " + heading + " [" + getResolution(tr) + "p]"; + return releases; } - private string getResolution(AngleSharp.Dom.IElement tr) + private IList FetchTrackerReleases(TrackerUrlDetails details) { - var resolution = tr.QuerySelector("div.tracker_info_left").TextContent; - return ResolutionInfoQueryRegex.Match(resolution).Groups[1].Value; - } - - private long getReleaseSize(AngleSharp.Dom.IElement tr) - { - var sizeStr = tr.QuerySelector("div.tracker_info_left").TextContent; - return ParseUtil.GetBytes(SizeInfoQueryRegex.Match(sizeStr).Groups[1].Value.Trim()); - } - - private DateTime getReleaseDate(AngleSharp.Dom.IElement tr) - { - var sizeStr = tr.QuerySelector("div.tracker_info_left").TextContent; - return DateTime.Parse(ReleaseDateInfoQueryRegex.Match(sizeStr).Groups[1].Value.Trim()); - } - - private ICollection MapCategories(AngleSharp.Html.Dom.IHtmlDocument dom, AngleSharp.Dom.IElement t, AngleSharp.Dom.IElement tr) - { - var rName = t.TextContent; - var rDesc = tr.QuerySelector("h3.tracker_info_bold").TextContent; - var type = dom.QuerySelector("div.releases-date:contains('Тип:')").TextContent; - - // Check OVA first cause OVA looks like anime with OVA in release name or description - if (CategorieOVARegex.IsMatch(rName) || CategorieOVARegex.IsMatch(rDesc)) + var queryCollection = new NameValueCollection { - return _categories.MapTrackerCatDescToNewznab("OVA/ONA/Special"); - } + { "c", details.seriesId }, + { "s", details.season }, + { "e", string.IsNullOrEmpty(details.episode) ? "999" : details.episode } // 999 is a synonym for the whole serie + }; + var url = _settings.BaseUrl + "v_search.php" + "?" + queryCollection.GetQueryString(); - // Check movies then, cause some of releases could be movies dorama and should go to movies category - if (CategorieMovieRegex.IsMatch(rName) || CategorieMovieRegex.IsMatch(rDesc)) + // Get redirection page with generated link on it. This link can't be constructed manually as it contains Hash field and hashing algo is unknown. + var req = new IndexerRequest(url, HttpAccept.Html); + var results = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition)); + + if (results.Content == null) { - return _categories.MapTrackerCatDescToNewznab("Movies"); + throw new Exception("Empty response from " + url); } - // Check dorama. Most of doramas are flaged as doramas in type info, but type info could have a lot of types at same time (movie, etc) - if (CategorieDoramaRegex.IsMatch(rName) || CategorieDoramaRegex.IsMatch(type)) + if (results.Content == "log in first") { - return _categories.MapTrackerCatDescToNewznab("Dorama"); + throw new Exception(results.Content); } - return _categories.MapTrackerCatDescToNewznab("TV Anime"); + var parser = new HtmlParser(); + var document = parser.ParseDocument(results.Content); + var meta = document.QuerySelector("meta"); + var metaContent = meta.GetAttribute("content"); + + // Follow redirection defined by async url.replace + var redirectionUrl = metaContent.Substring(metaContent.IndexOf("http")); + return FollowTrackerRedirection(redirectionUrl, details); } private IList ParseRelease(IndexerResponse indexerResponse) { - var torrentInfos = new List(); + var releases = new List(); var parser = new HtmlParser(); var dom = parser.ParseDocument(indexerResponse.Content); - foreach (var t in dom.QuerySelectorAll("ul.media__tabs__nav > li > a")) + var playButton = dom.QuerySelector("div.external-btn"); + if (playButton != null && !playButton.ClassList.Contains("inactive")) { - var tr_id = t.Attributes["href"].Value; - var tr = dom.QuerySelector("div" + tr_id); - var seeders = int.Parse(tr.QuerySelector("div.circle_green_text_top").TextContent); - var url = indexerResponse.HttpRequest.Url.ToString(); + // var details = new Uri(url); + var dateString = dom.QuerySelector("div.title-block > div.details-pane > div.left-box").TextContent; + var key = dateString.Contains("TBA") ? "ru: " : "eng: "; + dateString = TrimString(dateString, key, " г."); // '... Дата выхода eng: 09 марта 2012 г. ...' -> '09 марта 2012' + DateTime date; - var release = new TorrentInfo - { - Title = composeTitle(dom, t, tr), - InfoUrl = url, - DownloadVolumeFactor = 0, - UploadVolumeFactor = 1, + //dateString might be just a year, e.g. https://www.lostfilm.tv/series/Ghosted/season_1/episode_14/ + if (dateString.Length == 4) + { + date = DateTime.ParseExact(dateString, "yyyy", CultureInfo.InvariantCulture).ToLocalTime(); + } + else + { + date = DateTime.Parse(dateString, new CultureInfo("ru-RU")); // dd mmmm yyyy + } - Guid = url + tr_id, - Seeders = seeders, - Peers = seeders + int.Parse(tr.QuerySelector("div.circle_red_text_top").TextContent), - Grabs = int.Parse(tr.QuerySelector("div.circle_grey_text_top").TextContent), - Categories = MapCategories(dom, t, tr), - PublishDate = getReleaseDate(tr), - DownloadUrl = tr.QuerySelector("div.download_tracker > a.btn__green").Attributes["href"].Value, - MagnetUrl = tr.QuerySelector("div.download_tracker > a.btn__d-gray").Attributes["href"].Value, - Size = getReleaseSize(tr), - Resolution = getResolution(tr) - }; - torrentInfos.Add(release); + var urlDetails = new TrackerUrlDetails(playButton); + var episodeReleases = FetchTrackerReleases(urlDetails); + + foreach (var release in episodeReleases) + { + release.InfoUrl = indexerResponse.Request.Url.ToString(); + release.PublishDate = date; + } + + releases.AddRange(episodeReleases); } - return torrentInfos; + return releases; } public IList ParseResponse(IndexerResponse indexerResponse) { - var torrentInfos = new List(); + var releases = new List(); var parser = new HtmlParser(); var dom = parser.ParseDocument(indexerResponse.Content); - var links = dom.QuerySelectorAll("a.ads-list__item__title"); - foreach (var link in links) + var rows = dom.QuerySelectorAll("div.row"); + foreach (var r in rows) { - var url = link.GetAttribute("href"); - - // Some URLs in search are broken - if (url.StartsWith("//")) - { - url = "https:" + url; - } - - var releaseRequest = new IndexerRequest(url, HttpAccept.Html); - var releaseResponse = new IndexerResponse(releaseRequest, HttpClient.Execute(releaseRequest.HttpRequest)); + var link = r.QuerySelector("a").GetAttribute("href"); + var releaseRequest = new IndexerRequest(_settings.BaseUrl + link.TrimStart('/'), HttpAccept.Html); + var releaseResponse = new IndexerResponse(releaseRequest, HttpClient.ExecuteProxied(releaseRequest.HttpRequest, Definition)); // Throw common http errors here before we try to parse if (releaseResponse.HttpResponse.HasHttpError) @@ -446,12 +493,36 @@ public IList ParseResponse(IndexerResponse indexerResponse) } } - torrentInfos.AddRange(ParseRelease(releaseResponse)); + releases.AddRange(ParseRelease(releaseResponse)); } - return torrentInfos.ToArray(); + return releases.ToArray(); } public Action, DateTime?> CookiesUpdater { get; set; } + + private string TrimString(string s, char startChar, char endChar) + { + var start = s.IndexOf(startChar); + var end = s.LastIndexOf(endChar); + return (start != -1 && end != -1) ? s.Substring(start + 1, end - start - 1) : null; + } + + private string TrimString(string s, string startString, string endString) + { + var start = s.IndexOf(startString); + var end = s.LastIndexOf(endString); + return (start != -1 && end != -1) ? s.Substring(start + startString.Length, end - start - startString.Length) : null; + } + + // private DateTime DateFromEpisodeColumn(AngleSharp.Dom.IElement dateColumn) + // { + // var dateString = dateColumn.QuerySelector("span.small-text")?.TextContent; + + // // 'Eng: 23.05.2017' -> '23.05.2017' OR '23.05.2017' -> '23.05.2017' + // dateString = string.IsNullOrEmpty(dateString) ? dateColumn.QuerySelector("span")?.TextContent : dateString.Substring(dateString.IndexOf(":") + 2); + // var date = DateTime.Parse(dateString, new CultureInfo("RU-ru")); // dd.mm.yyyy + // return date; + // } } }