WIP: new lostfilm releases

This commit is contained in:
Dmitry Chepurovskiy 2022-06-05 20:59:22 +03:00
parent d7b2b37891
commit 84fcfa7912
No known key found for this signature in database
GPG key ID: 5B3A5FDCBFF9B3A4

View file

@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Globalization;
using System.Net.Http;
using System.Text;
using System.Text.RegularExpressions;
@ -17,6 +18,7 @@
using NzbDrone.Core.Messaging.Events;
using NzbDrone.Core.Parser;
using NzbDrone.Core.Parser.Model;
using NzbDrone.Core.ThingiProvider;
using NzbDrone.Core.Validation;
namespace NzbDrone.Core.Indexers.Definitions
@ -54,7 +56,7 @@ public override IIndexerRequestGenerator GetRequestGenerator()
public override IParseIndexerResponse GetParser()
{
return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger };
return new LostfilmParser(Settings, Capabilities.Categories) { HttpClient = _httpClient, Logger = _logger, Definition = Definition };
}
protected override async Task DoLogin()
@ -69,9 +71,10 @@ protected override async Task DoLogin()
// var qCaptchaImg = document.QuerySelector("img#captcha_pictcha");
// if (qCaptchaImg != null)
// {
// var captchaUrl = Settings.BaseUrl + qCaptchaImg.GetAttribute("src");
// var captchaImage = await ExecuteAuth(new HttpRequest(captchaUrl));
// Settings.ExtraFieldData["CAPTCHA"] = captchaImage.Content;
// // var captchaUrl = Settings.BaseUrl + qCaptchaImg.GetAttribute("src");
// // var captchaImage = await ExecuteAuth(new HttpRequest(captchaUrl));
// // Settings.ExtraFieldData["CAPTCHA"] = captchaImage.Content;
// throw new IndexerAuthException("Captcha is not supported yet");
// }
// else
// {
@ -135,42 +138,6 @@ protected override async Task DoLogin()
}
}
// UpdateCookies(null, null);
// var requestBuilder = new HttpRequestBuilder(Settings.BaseUrl + "index.php")
// {
// LogResponseContent = true,
// AllowAutoRedirect = true
// };
// var mainPage = await ExecuteAuth(new HttpRequest(Settings.BaseUrl));
// requestBuilder.Method = HttpMethod.Post;
// requestBuilder.PostProcess += r => r.RequestTimeout = TimeSpan.FromSeconds(15);
// requestBuilder.SetCookies(mainPage.GetCookies());
// var authLoginRequest = requestBuilder
// .AddFormParameter("login_name", Settings.Username)
// .AddFormParameter("login_password", Settings.Password)
// .AddFormParameter("login", "submit")
// .SetHeader("Content-Type", "application/x-www-form-urlencoded")
// .Build();
// var response = await ExecuteAuth(authLoginRequest);
// if (response.Content != null && !CheckIfLoginNeeded(response))
// {
// UpdateCookies(response.GetCookies(), DateTime.Now + TimeSpan.FromDays(30));
// _logger.Debug("Anidub authentication succeeded");
// }
// else
// {
// const string ErrorSelector = "#content .berror .berror_c";
// var parser = new HtmlParser();
// var document = await parser.ParseDocumentAsync(response.Content);
// var errorMessage = document.QuerySelector(ErrorSelector).TextContent.Trim();
// throw new IndexerAuthException("Anidub authentication failed. Error: " + errorMessage);
// }
// }
protected override bool CheckIfLoginNeeded(HttpResponse httpResponse)
{
if (httpResponse.Content.Contains("href=\"/my\""))
@ -210,21 +177,26 @@ private IEnumerable<IndexerRequest> GetPagedRequests(string term, int[] categori
if (string.IsNullOrWhiteSpace(term))
{
requestUrl = Settings.BaseUrl;
requestUrl = Settings.BaseUrl + "new";
}
else
{
var queryCollection = new NameValueCollection
{
// Remove season and episode info from search term cause it breaks search
{ "keywords", Regex.Replace(term, @"(?:[SsEe]?\d{1,4}){1,2}$", "").TrimEnd() },
{ "limit", "20" },
{ "orderby_sort", "entry_date|desc" }
};
requestUrl = string.Format("{0}/ajax/search_result/P0?{1}", Settings.BaseUrl.TrimEnd('/'), queryCollection.GetQueryString());
throw new Exception("Lostfilm search not implemented");
}
// else
// {
// var queryCollection = new NameValueCollection
// {
// // Remove season and episode info from search term cause it breaks search
// { "keywords", Regex.Replace(term, @"(?:[SsEe]?\d{1,4}){1,2}$", "").TrimEnd() },
// { "limit", "20" },
// { "orderby_sort", "entry_date|desc" }
// };
// requestUrl = string.Format("{0}/ajax/search_result/P0?{1}", Settings.BaseUrl.TrimEnd('/'), queryCollection.GetQueryString());
// }
// TODO: Implement searching
var request = new IndexerRequest(requestUrl, HttpAccept.Html);
yield return request;
}
@ -276,14 +248,10 @@ public class LostfilmParser : IParseIndexerResponse
{
private readonly UserPassCaptchaTorrentBaseSettings _settings;
private readonly IndexerCapabilitiesCategories _categories;
private static readonly Regex EpisodesInfoQueryRegex = new Regex(@"сери[ия] (\d+)(?:-(\d+))? из.*", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex ResolutionInfoQueryRegex = new Regex(@"качество (\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex SizeInfoQueryRegex = new Regex(@"размер:(.*)\n", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex ReleaseDateInfoQueryRegex = new Regex(@"добавлен:(.*)\n", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex CategorieMovieRegex = new Regex(@"Фильм", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex CategorieOVARegex = new Regex(@"ОВА|OVA|ОНА|ONA|Special", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex CategorieDoramaRegex = new Regex(@"Дорама", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex ParsePlayEpisodeRegex = new Regex("PlayEpisode\\('(?<id>\\d{1,3})(?<season>\\d{3})(?<episode>\\d{3})'\\)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex ParseReleaseDetailsRegex = new Regex("Видео:\\ (?<quality>.+).\\ Размер:\\ (?<size>.+).\\ Перевод", RegexOptions.Compiled | RegexOptions.IgnoreCase);
public IIndexerHttpClient HttpClient { get; set; }
public ProviderDefinition Definition { get; set; }
public Logger Logger { get; set; }
public LostfilmParser(UserPassCaptchaTorrentBaseSettings settings, IndexerCapabilitiesCategories categories)
@ -292,146 +260,225 @@ public LostfilmParser(UserPassCaptchaTorrentBaseSettings settings, IndexerCapabi
_categories = categories;
}
private string composeTitle(AngleSharp.Html.Dom.IHtmlDocument dom, AngleSharp.Dom.IElement t, AngleSharp.Dom.IElement tr)
internal class TrackerUrlDetails
{
var name_ru = dom.QuerySelector("div.media__post__header > h1").TextContent.Trim();
var name_en = dom.QuerySelector("div.media__panel > div:nth-of-type(1) > div.col-l:nth-of-type(1) > div > span").TextContent.Trim();
var name_orig = dom.QuerySelector("div.media__panel > div:nth-of-type(1) > div.col-l:nth-of-type(2) > div > span").TextContent.Trim();
internal string seriesId { get; private set; }
internal string season { get; private set; }
internal string episode { get; private set; }
var title = name_ru + " / " + name_en;
if (name_en != name_orig)
internal TrackerUrlDetails(string seriesId, string season, string episode)
{
title += " / " + name_orig;
this.seriesId = seriesId;
this.season = season;
this.episode = episode;
}
var tabName = t.TextContent;
tabName = tabName.Replace("Сезон", "Season");
if (tabName.Contains("Серии"))
internal TrackerUrlDetails(AngleSharp.Dom.IElement button)
{
tabName = "";
var trigger = button.GetAttribute("onclick");
var match = ParsePlayEpisodeRegex.Match(trigger);
seriesId = match.Groups["id"].Value.TrimStart('0');
season = match.Groups["season"].Value.TrimStart('0');
episode = match.Groups["episode"].Value.TrimStart('0');
}
var heading = tr.QuerySelector("h3.tracker_info_bold").TextContent;
// Parse episodes info from heading if episods info present
var match = EpisodesInfoQueryRegex.Match(heading);
heading = tabName;
if (match.Success)
// TODO: see if query.GetEpisodeString() is sufficient
internal string GetEpisodeString()
{
if (string.IsNullOrEmpty(match.Groups[2].Value))
var result = string.Empty;
if (!string.IsNullOrEmpty(season) && season != "0" && season != "999")
{
heading += " E" + match.Groups[1].Value;
result += "S" + season;
if (!string.IsNullOrEmpty(episode) && episode != "0" && episode != "999")
{
result += "E" + episode;
}
}
else
return result;
}
}
private IList<TorrentInfo> FollowTrackerRedirection(string url, TrackerUrlDetails details)
{
var req = new IndexerRequest(url, HttpAccept.Html);
var results = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition));
var releases = new List<TorrentInfo>();
var parser = new HtmlParser();
var document = parser.ParseDocument(results.Content);
var rows = document.QuerySelectorAll("div.inner-box--item");
var serieTitle = document.QuerySelector("div.inner-box--subtitle").TextContent;
serieTitle = serieTitle.Substring(0, serieTitle.LastIndexOf(','));
var episodeInfo = document.QuerySelector("div.inner-box--text").TextContent;
var episodeName = TrimString(episodeInfo, '(', ')');
foreach (var row in rows)
{
var detailsInfo = row.QuerySelector("div.inner-box--desc").TextContent;
var releaseDetails = ParseReleaseDetailsRegex.Match(detailsInfo);
// ReSharper states "Expression is always false"
// TODO Refactor to get the intended operation
if (releaseDetails == null)
{
heading += string.Format(" E{0}-{1}", match.Groups[1].Value, match.Groups[2].Value);
throw new FormatException("Failed to map release details string: " + detailsInfo);
}
/*
* For supported qualities see:
* - TvCategoryParser.cs
* - https://github.com/SickRage/SickRage/wiki/Quality-Settings#quality-names-to-recognize-the-quality-of-a-file
*/
var quality = releaseDetails.Groups["quality"].Value.Trim();
// Adapt shitty quality format for common algorythms
quality = Regex.Replace(quality, "-Rip", "Rip", RegexOptions.IgnoreCase);
quality = Regex.Replace(quality, "WEB-DLRip", "WEBDL", RegexOptions.IgnoreCase);
quality = Regex.Replace(quality, "WEB-DL", "WEBDL", RegexOptions.IgnoreCase);
quality = Regex.Replace(quality, "HDTVRip", "HDTV", RegexOptions.IgnoreCase);
// Fix forgotten p-Progressive suffix in resolution index
quality = Regex.Replace(quality, "1080 ", "1080p ", RegexOptions.IgnoreCase);
quality = Regex.Replace(quality, "720 ", "720p ", RegexOptions.IgnoreCase);
var techComponents = new[]
{
"rus",
quality,
"(LostFilm)"
};
var techInfo = string.Join(" ", techComponents);
// Ru title: downloadLink.TextContent.Replace("\n", "");
// En title should be manually constructed.
var titleComponents = new[]
{
serieTitle, details.GetEpisodeString(), episodeName, techInfo
};
var downloadLink = row.QuerySelector("div.inner-box--link > a");
var sizeString = releaseDetails.Groups["size"].Value.ToUpper();
sizeString = sizeString.Replace("ТБ", "TB"); // untested
sizeString = sizeString.Replace("ГБ", "GB");
sizeString = sizeString.Replace("МБ", "MB");
sizeString = sizeString.Replace("КБ", "KB"); // untested
var link = downloadLink.GetAttribute("href");
// TODO this feels sparse compared to other trackers. Expand later
var release = new TorrentInfo
{
Categories = _categories.MapTrackerCatToNewznab("1"),
Title = string.Join(" - ", titleComponents),
DownloadUrl = link,
Guid = link,
Size = ParseUtil.GetBytes(sizeString),
// add missing torznab fields not available from results
Seeders = 1,
Peers = 2,
DownloadVolumeFactor = 0,
UploadVolumeFactor = 1,
MinimumRatio = 1,
MinimumSeedTime = 172800 // 48 hours
};
releases.Add(release);
}
return title + " - " + heading + " [" + getResolution(tr) + "p]";
return releases;
}
private string getResolution(AngleSharp.Dom.IElement tr)
private IList<TorrentInfo> FetchTrackerReleases(TrackerUrlDetails details)
{
var resolution = tr.QuerySelector("div.tracker_info_left").TextContent;
return ResolutionInfoQueryRegex.Match(resolution).Groups[1].Value;
}
private long getReleaseSize(AngleSharp.Dom.IElement tr)
{
var sizeStr = tr.QuerySelector("div.tracker_info_left").TextContent;
return ParseUtil.GetBytes(SizeInfoQueryRegex.Match(sizeStr).Groups[1].Value.Trim());
}
private DateTime getReleaseDate(AngleSharp.Dom.IElement tr)
{
var sizeStr = tr.QuerySelector("div.tracker_info_left").TextContent;
return DateTime.Parse(ReleaseDateInfoQueryRegex.Match(sizeStr).Groups[1].Value.Trim());
}
private ICollection<IndexerCategory> MapCategories(AngleSharp.Html.Dom.IHtmlDocument dom, AngleSharp.Dom.IElement t, AngleSharp.Dom.IElement tr)
{
var rName = t.TextContent;
var rDesc = tr.QuerySelector("h3.tracker_info_bold").TextContent;
var type = dom.QuerySelector("div.releases-date:contains('Тип:')").TextContent;
// Check OVA first cause OVA looks like anime with OVA in release name or description
if (CategorieOVARegex.IsMatch(rName) || CategorieOVARegex.IsMatch(rDesc))
var queryCollection = new NameValueCollection
{
return _categories.MapTrackerCatDescToNewznab("OVA/ONA/Special");
}
{ "c", details.seriesId },
{ "s", details.season },
{ "e", string.IsNullOrEmpty(details.episode) ? "999" : details.episode } // 999 is a synonym for the whole serie
};
var url = _settings.BaseUrl + "v_search.php" + "?" + queryCollection.GetQueryString();
// Check movies then, cause some of releases could be movies dorama and should go to movies category
if (CategorieMovieRegex.IsMatch(rName) || CategorieMovieRegex.IsMatch(rDesc))
// Get redirection page with generated link on it. This link can't be constructed manually as it contains Hash field and hashing algo is unknown.
var req = new IndexerRequest(url, HttpAccept.Html);
var results = new IndexerResponse(req, HttpClient.ExecuteProxied(req.HttpRequest, Definition));
if (results.Content == null)
{
return _categories.MapTrackerCatDescToNewznab("Movies");
throw new Exception("Empty response from " + url);
}
// Check dorama. Most of doramas are flaged as doramas in type info, but type info could have a lot of types at same time (movie, etc)
if (CategorieDoramaRegex.IsMatch(rName) || CategorieDoramaRegex.IsMatch(type))
if (results.Content == "log in first")
{
return _categories.MapTrackerCatDescToNewznab("Dorama");
throw new Exception(results.Content);
}
return _categories.MapTrackerCatDescToNewznab("TV Anime");
var parser = new HtmlParser();
var document = parser.ParseDocument(results.Content);
var meta = document.QuerySelector("meta");
var metaContent = meta.GetAttribute("content");
// Follow redirection defined by async url.replace
var redirectionUrl = metaContent.Substring(metaContent.IndexOf("http"));
return FollowTrackerRedirection(redirectionUrl, details);
}
private IList<TorrentInfo> ParseRelease(IndexerResponse indexerResponse)
{
var torrentInfos = new List<TorrentInfo>();
var releases = new List<TorrentInfo>();
var parser = new HtmlParser();
var dom = parser.ParseDocument(indexerResponse.Content);
foreach (var t in dom.QuerySelectorAll("ul.media__tabs__nav > li > a"))
var playButton = dom.QuerySelector("div.external-btn");
if (playButton != null && !playButton.ClassList.Contains("inactive"))
{
var tr_id = t.Attributes["href"].Value;
var tr = dom.QuerySelector("div" + tr_id);
var seeders = int.Parse(tr.QuerySelector("div.circle_green_text_top").TextContent);
var url = indexerResponse.HttpRequest.Url.ToString();
// var details = new Uri(url);
var dateString = dom.QuerySelector("div.title-block > div.details-pane > div.left-box").TextContent;
var key = dateString.Contains("TBA") ? "ru: " : "eng: ";
dateString = TrimString(dateString, key, " г."); // '... Дата выхода eng: 09 марта 2012 г. ...' -> '09 марта 2012'
DateTime date;
var release = new TorrentInfo
{
Title = composeTitle(dom, t, tr),
InfoUrl = url,
DownloadVolumeFactor = 0,
UploadVolumeFactor = 1,
//dateString might be just a year, e.g. https://www.lostfilm.tv/series/Ghosted/season_1/episode_14/
if (dateString.Length == 4)
{
date = DateTime.ParseExact(dateString, "yyyy", CultureInfo.InvariantCulture).ToLocalTime();
}
else
{
date = DateTime.Parse(dateString, new CultureInfo("ru-RU")); // dd mmmm yyyy
}
Guid = url + tr_id,
Seeders = seeders,
Peers = seeders + int.Parse(tr.QuerySelector("div.circle_red_text_top").TextContent),
Grabs = int.Parse(tr.QuerySelector("div.circle_grey_text_top").TextContent),
Categories = MapCategories(dom, t, tr),
PublishDate = getReleaseDate(tr),
DownloadUrl = tr.QuerySelector("div.download_tracker > a.btn__green").Attributes["href"].Value,
MagnetUrl = tr.QuerySelector("div.download_tracker > a.btn__d-gray").Attributes["href"].Value,
Size = getReleaseSize(tr),
Resolution = getResolution(tr)
};
torrentInfos.Add(release);
var urlDetails = new TrackerUrlDetails(playButton);
var episodeReleases = FetchTrackerReleases(urlDetails);
foreach (var release in episodeReleases)
{
release.InfoUrl = indexerResponse.Request.Url.ToString();
release.PublishDate = date;
}
releases.AddRange(episodeReleases);
}
return torrentInfos;
return releases;
}
public IList<ReleaseInfo> ParseResponse(IndexerResponse indexerResponse)
{
var torrentInfos = new List<ReleaseInfo>();
var releases = new List<ReleaseInfo>();
var parser = new HtmlParser();
var dom = parser.ParseDocument(indexerResponse.Content);
var links = dom.QuerySelectorAll("a.ads-list__item__title");
foreach (var link in links)
var rows = dom.QuerySelectorAll("div.row");
foreach (var r in rows)
{
var url = link.GetAttribute("href");
// Some URLs in search are broken
if (url.StartsWith("//"))
{
url = "https:" + url;
}
var releaseRequest = new IndexerRequest(url, HttpAccept.Html);
var releaseResponse = new IndexerResponse(releaseRequest, HttpClient.Execute(releaseRequest.HttpRequest));
var link = r.QuerySelector("a").GetAttribute("href");
var releaseRequest = new IndexerRequest(_settings.BaseUrl + link.TrimStart('/'), HttpAccept.Html);
var releaseResponse = new IndexerResponse(releaseRequest, HttpClient.ExecuteProxied(releaseRequest.HttpRequest, Definition));
// Throw common http errors here before we try to parse
if (releaseResponse.HttpResponse.HasHttpError)
@ -446,12 +493,36 @@ public IList<ReleaseInfo> ParseResponse(IndexerResponse indexerResponse)
}
}
torrentInfos.AddRange(ParseRelease(releaseResponse));
releases.AddRange(ParseRelease(releaseResponse));
}
return torrentInfos.ToArray();
return releases.ToArray();
}
public Action<IDictionary<string, string>, DateTime?> CookiesUpdater { get; set; }
private string TrimString(string s, char startChar, char endChar)
{
var start = s.IndexOf(startChar);
var end = s.LastIndexOf(endChar);
return (start != -1 && end != -1) ? s.Substring(start + 1, end - start - 1) : null;
}
private string TrimString(string s, string startString, string endString)
{
var start = s.IndexOf(startString);
var end = s.LastIndexOf(endString);
return (start != -1 && end != -1) ? s.Substring(start + startString.Length, end - start - startString.Length) : null;
}
// private DateTime DateFromEpisodeColumn(AngleSharp.Dom.IElement dateColumn)
// {
// var dateString = dateColumn.QuerySelector("span.small-text")?.TextContent;
// // 'Eng: 23.05.2017' -> '23.05.2017' OR '23.05.2017' -> '23.05.2017'
// dateString = string.IsNullOrEmpty(dateString) ? dateColumn.QuerySelector("span")?.TextContent : dateString.Substring(dateString.IndexOf(":") + 2);
// var date = DateTime.Parse(dateString, new CultureInfo("RU-ru")); // dd.mm.yyyy
// return date;
// }
}
}