From 6207a2fdf776c7a40e49d7fa15b83673dd6f7d31 Mon Sep 17 00:00:00 2001 From: Alex Riina Date: Tue, 20 Apr 2021 18:43:49 -0400 Subject: [PATCH 1/2] add more story meta-data and fix more extra chapter headings --- fanficfare/adapters/adapter_novelfull.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fanficfare/adapters/adapter_novelfull.py b/fanficfare/adapters/adapter_novelfull.py index 33bf26c5..d54735ef 100644 --- a/fanficfare/adapters/adapter_novelfull.py +++ b/fanficfare/adapters/adapter_novelfull.py @@ -21,7 +21,17 @@ class NovelFullSiteAdapter(BaseSiteAdapter): return "novelfull.com" def getSiteURLPattern(self): - return r"https?://%s/(?P.+).html?" % re.escape(self.getSiteDomain()) + return r"https?://%s/(index\.php/)?(?P.+?)(/.*)?\.html?" % re.escape(self.getSiteDomain()) + + def __init__(self, configuration, url): + super(NovelFullSiteAdapter, self).__init__(configuration, url) + + story_id = re.match(self.getSiteURLPattern(), url).group('story_id') + self.story.setMetadata('storyId', story_id) + + self._setURL("http://%s/%s.html" % (self.getSiteDomain(), story_id)) + + self.story.setMetadata('siteabbrev', 'nvlfl') def extractChapterUrlsAndMetadata(self): data = self.get_request(self.url) @@ -75,7 +85,7 @@ class NovelFullSiteAdapter(BaseSiteAdapter): content = soup.find(id="chapter-content") # Remove chapter header if present - chapter_header = content.find("p", text=re.compile(r"Chapter \d+:")) + chapter_header = content.find(["p", "h3"], text=re.compile(r"Chapter \d+:")) if chapter_header: chapter_header.decompose() From 536a759a7fab7a8abeb0b860b8b8894372b35a70 Mon Sep 17 00:00:00 2001 From: Alex Riina Date: Tue, 20 Apr 2021 18:47:23 -0400 Subject: [PATCH 2/2] default to https --- fanficfare/adapters/adapter_novelfull.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficfare/adapters/adapter_novelfull.py b/fanficfare/adapters/adapter_novelfull.py index d54735ef..e009c5cb 100644 --- a/fanficfare/adapters/adapter_novelfull.py +++ b/fanficfare/adapters/adapter_novelfull.py @@ -29,7 +29,7 @@ class NovelFullSiteAdapter(BaseSiteAdapter): story_id = re.match(self.getSiteURLPattern(), url).group('story_id') self.story.setMetadata('storyId', story_id) - self._setURL("http://%s/%s.html" % (self.getSiteDomain(), story_id)) + self._setURL("https://%s/%s.html" % (self.getSiteDomain(), story_id)) self.story.setMetadata('siteabbrev', 'nvlfl')