New site: fictionhunt.com

2026-05-08 12:36:11 +02:00 · 2016-09-21 19:07:50 -05:00 · 2016-09-21 19:07:50 -05:00 · a21b555255
commit a21b555255
parent f25b46d268
4 changed files with 179 additions and 0 deletions
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@ -1177,6 +1177,21 @@ romance_label: Romance
 ## this should go in your personal.ini, not defaults.ini.
 #is_adult:true

+[fictionhunt.com]
+## Archive only site for ffnet HP stories.
+
+## Site dedicated to these categories/characters/ships
+extracategories:Harry Potter
+
+extra_valid_entries: origin,originUrl,originHTML,reviews
+originHTML_label:Original Story URL
+
+## Assume entryUrl, apply to "<a class='%slink' href='%s'>%s</a>" to
+## make entryHTML.
+make_linkhtml_entries:origin
+
+add_to_extra_titlepage_entries:originHTML
+
 [fictionmania.tv]
 ## website encoding(s) In theory, each website reports the character
 ## encoding they use for each page.  In practice, some sites report it
--- a/fanficfare/adapters/init.py
+++ b/fanficfare/adapters/init.py
@ -143,6 +143,7 @@ import adapter_haremlucifaelcom
 import adapter_kiarepositorymujajinet
 import adapter_fanfictionlucifaelcom
 import adapter_adultfanfictionorg
+import adapter_fictionhuntcom

 ## This bit of complexity allows adapters to be added by just adding
 ## importing.  It eliminates the long if/else clauses we used to need
--- a/fanficfare/adapters/adapter_fictionhuntcom.py
+++ b/fanficfare/adapters/adapter_fictionhuntcom.py
@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2016 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+logger = logging.getLogger(__name__)
+import re
+import urllib2
+
+from .. import exceptions as exceptions
+from ..htmlcleanup import stripHTML
+
+from base_adapter import BaseSiteAdapter,  makeDate
+
+class FictionHuntComSiteAdapter(BaseSiteAdapter):
+
+    def __init__(self, config, url):
+        BaseSiteAdapter.__init__(self, config, url)
+        self.story.setMetadata('siteabbrev','fichunt')
+
+        # get storyId from url--url validation guarantees second part is storyId
+        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
+
+        # normalized story URL.
+        self._setURL("http://"+self.getSiteDomain()\
+                         +"/read/"+self.story.getMetadata('storyId')+"/1")
+
+        # The date format will vary from site to site.
+        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
+        self.dateformat = "%d-%m-%Y"
+
+    @staticmethod
+    def getSiteDomain():
+        return 'fictionhunt.com'
+
+    @classmethod
+    def getSiteExampleURLs(cls):
+        return "http://fictionhunt.com/read/1234/1/ https://fictionhunt.com/read/1234/12/ http://fictionhunt.com/read/1234/1/Story_Title http://m.fanfiction.net/read/1234/1/"
+
+    def getSiteURLPattern(self):
+        return r"http://(www.)?fictionhunt.com/read/\d+(/\d+)?(/|/[^/]+)?/?$"
+
+    def use_pagecache(self):
+        '''
+        adapters that will work with the page cache need to implement
+        this and change it to True.
+        '''
+        return True
+
+    def doExtractChapterUrlsAndMetadata(self,get_cover=True):
+
+        # fetch the chapter.  From that we will get almost all the
+        # metadata and chapter list
+
+        url = self.url
+        try:
+            data = self._fetchUrl(url)
+        except urllib2.HTTPError, e:
+            if e.code == 404:
+                raise exceptions.StoryDoesNotExist(self.meta)
+            else:
+                raise e
+
+        # use BeautifulSoup HTML parser to make everything easier to find.
+        soup = self.make_soup(data)
+
+        self.story.setMetadata('title',stripHTML(soup.find('div',{'class':'title'})).strip())
+
+        self.setDescription(url,'<i>(Story descriptions not available on fictionhunt.com)</i>')
+
+        # Find authorid and URL from... author url.
+        # fictionhunt doesn't have author pages, use ffnet original author link.
+        a = soup.find('a', href=re.compile(r"fanfiction.net/u/\d+"))
+        self.story.setMetadata('authorId',a['href'].split('/')[2])
+        self.story.setMetadata('authorUrl','https://www.fanfiction.net/u/'+self.story.getMetadata('authorId'))
+        self.story.setMetadata('author',a.string)
+
+        # Find original ffnet URL
+        a = soup.find('a', href=re.compile(r"fanfiction.net/s/\d+"))
+        self.story.setMetadata('origin',stripHTML(a))
+        self.story.setMetadata('originUrl',a['href'])
+
+        # Fleur D. & Harry P. & Hermione G. & Susan B. - Words: 42,848 - Rated: M - English - None - Chapters: 9 - Reviews: 248 - Updated: 21-09-2016 - Published: 16-05-2015 - by Elven Sorcerer (FFN)
+        # None - Words: 13,087 - Rated: M - English - Romance & Supernatural - Chapters: 3 - Reviews: 5 - Updated: 21-09-2016 - Published: 20-09-2016
+        # Harry P. & OC - Words: 10,910 - Rated: M - English - None - Chapters: 5 - Reviews: 6 - Updated: 21-09-2016 - Published: 11-09-2016
+        # Dudley D. & Harry P. & Nagini & Vernon D. - Words: 4,328 - Rated: K+ - English - None - Chapters: 2 - Updated: 21-09-2016 - Published: 20-09-2016 -
+        details = soup.find('div',{'class':'details'})
+
+        detail_re = \
+            r'(?P<characters>.+) - Words: (?P<numWords>[0-9,]+) - Rated: (?P<rating>[a-zA-Z\\+]+) - (?P<language>.+) - (?P<genre>.+)'+ \
+            r' - Chapters: (?P<numChapters>[0-9,]+)( - Reviews: (?P<reviews>[0-9,]+))? - Updated: (?P<dateUpdated>[0-9-]+)'+ \
+            r' - Published: (?P<datePublished>[0-9-]+)(?P<completed> - Complete)?'
+
+        details_dict = re.match(detail_re,stripHTML(details)).groupdict()
+        print details_dict
+
+        # lists
+        for meta in ('characters','genre'):
+            if details_dict[meta] != 'None':
+                self.story.extendList(meta,details_dict[meta].split(' & '))
+
+        # scalars
+        for meta in ('numWords','numChapters','rating','language','reviews'):
+            self.story.setMetadata(meta,details_dict[meta])
+
+        # dates
+        for meta in ('datePublished','dateUpdated'):
+            self.story.setMetadata(meta, makeDate(details_dict[meta], self.dateformat))
+
+        # status
+        if details_dict['completed']:
+            self.story.setMetadata('status', 'Completed')
+        else:
+            self.story.setMetadata('status', 'In-Progress')
+
+        # It's assumed that the number of chapters is correct.
+        # There's no complete list of chapters, so the only
+        # alternative is to get the num of chaps from the last
+        # indiated chapter list instead.
+        for i in range(1,1+int(self.story.getMetadata('numChapters'))):
+            self.chapterUrls.append((unicode(i),"http://"+self.getSiteDomain()\
+                                         +"/read/"+self.story.getMetadata('storyId')+"/%s"%i))
+
+    def getChapterText(self, url):
+        logger.debug('Getting chapter text from: %s' % url)
+        data = self._fetchUrl(url)
+
+        soup = self.make_soup(data)
+
+        div = soup.find('div', {'class' : 'text'})
+
+        return self.utf8FromSoup(url,div)
+
+def getClass():
+    return FictionHuntComSiteAdapter
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@ -1190,6 +1190,21 @@ romance_label: Romance
 ## this should go in your personal.ini, not defaults.ini.
 #is_adult:true

+[fictionhunt.com]
+## Archive only site for ffnet HP stories.
+
+## Site dedicated to these categories/characters/ships
+extracategories:Harry Potter
+
+extra_valid_entries: origin,originUrl,originHTML,reviews
+originHTML_label:Original Story URL
+
+## Assume entryUrl, apply to "<a class='%slink' href='%s'>%s</a>" to
+## make entryHTML.
+make_linkhtml_entries:origin
+
+add_to_extra_titlepage_entries:originHTML
+
 [fictionmania.tv]
 ## website encoding(s) In theory, each website reports the character
 ## encoding they use for each page.  In practice, some sites report it