Added adapter for http://fictionmania.tv/

2026-05-08 12:36:11 +02:00 · 2014-06-17 22:50:51 +02:00 · 2014-06-17 22:50:51 +02:00 · 3d1d3f4e26
commit 3d1d3f4e26
parent 7a5d77975a
4 changed files with 257 additions and 0 deletions
--- a/defaults.ini
+++ b/defaults.ini
@ -880,6 +880,45 @@ extraships:Harry Potter/Hermione Granger
 #username:YourName
 #password:yourpassword

+[fictionmania.tv]
+## website encoding(s) In theory, each website reports the character
+## encoding they use for each page.  In practice, some sites report it
+## incorrectly.  Each adapter has a default list, usually "utf8,
+## Windows-1252" or "Windows-1252, utf8", but this will let you
+## explicitly set the encoding and order if you need to.  The special
+## value 'auto' will call chardet and use the encoding it reports if
+## it has +90% confidence.  'auto' is not reliable.
+website_encodings:ISO-8859-1,auto
+
+## items to include in the log page Empty metadata entries, or those
+## that haven't changed since the last update, will *not* appear, even
+## if in the list.  You can include extra text or HTML that will be
+## included as-is in each log entry. Eg: logpage_entries: ...,<br />,
+## summary,<br />,...
+## Don't include numChapters since all stories are a single "chapter", there's
+## no way to reliably find the next chapter
+logpage_entries: dateCreated,datePublished,dateUpdated,numChapters,numWords,status,series,title,author,description,category,genre,rating,warnings
+
+## items to include in the title page
+## Empty metadata entries will *not* appear, even if in the list.
+## You can include extra text or HTML that will be included as-is in
+## the title page. Eg: titlepage_entries: ...,<br />,summary,<br />,...
+## All current formats already include title and author.
+## Don't include numChapters since all stories are a single "chapter", there's
+## no way to reliably find the next chapter
+titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numWords,site,description
+
+## Extra metadata that this adapter knows about.  See [dramione.org]
+## for examples of how to use them.
+extra_valid_entries:fileName,fileSize,oldName,newName,keyWords,mainCharactersAge,readings
+
+## Turns all space characters into "&nbsp" HTML entities to forcefully preserve
+## formatting with spaces. Enabling this will blow up the filesize quite a bit
+## and is probably not a good idea, unless you absolutely need the story
+## formatting.
+## Specific to fictionmania.tv
+non_breaking_spaces:false
+
 [fictionpad.com]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In
--- a/fanficdownloader/adapters/init.py
+++ b/fanficdownloader/adapters/init.py
@ -129,6 +129,7 @@ import adapter_bloodshedversecom
 import adapter_nocturnallightnet
 import adapter_fanfichu
 import adapter_fanfictioncsodaidokhu
+import adapter_fictionmaniatv

 ## This bit of complexity allows adapters to be added by just adding
 ## importing.  It eliminates the long if/else clauses we used to need
--- a/fanficdownloader/adapters/adapter_fictionmaniatv.py
+++ b/fanficdownloader/adapters/adapter_fictionmaniatv.py
@ -0,0 +1,178 @@
+import re
+import urllib2
+import urlparse
+
+from .. import BeautifulSoup
+from ..BeautifulSoup import NavigableString
+
+from base_adapter import BaseSiteAdapter, makeDate
+from .. import exceptions
+
+
+def getClass():
+    return FictionManiaTVAdapter
+
+
+def _get_query_data(url):
+    components = urlparse.urlparse(url)
+    query_data = urlparse.parse_qs(components.query)
+    return dict((key, data[0]) for key, data in query_data.items())
+
+# yields Tag _and_ NavigableString siblings from the given tag. The
+# BeautifulSoup findNextSiblings() method for some reasons only returns either
+# NavigableStrings _or_ Tag objects, not both.
+def _yield_next_siblings(tag):
+    sibling = tag.nextSibling
+    while sibling:
+        yield sibling
+        sibling = sibling.nextSibling
+
+
+class FictionManiaTVAdapter(BaseSiteAdapter):
+    SITE_ABBREVIATION = 'fmt'
+    SITE_DOMAIN = 'fictionmania.tv'
+
+    BASE_URL = 'http://' + SITE_DOMAIN + '/stories/'
+    READ_TEXT_STORY_URL_TEMPLATE = BASE_URL + 'readtextstory.html?storyID=%s'
+    DETAILS_URL_TEMPLATE = BASE_URL + 'details.html?storyID=%s'
+
+    DATETIME_FORMAT = '%m/%d/%Y'
+    ALTERNATIVE_DATETIME_FORMAT = '%m/%d/%y'
+
+    def __init__(self, config, url):
+        BaseSiteAdapter.__init__(self, config, url)
+
+        query_data = urlparse.parse_qs(self.parsedUrl.query)
+        story_id = query_data['storyID'][0]
+
+        self.story.setMetadata('storyId', story_id)
+        self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id)
+        self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
+
+        # Always single chapters, probably should use the Anthology feature to
+        # merge chapters of a story
+        self.story.setMetadata('numChapters', 1)
+
+    def _customized_fetch_url(self, url, exception=None, parameters=None):
+        if exception:
+            try:
+                data = self._fetchUrl(url, parameters)
+            except urllib2.HTTPError:
+                raise exception(self.url)
+        # Just let self._fetchUrl throw the exception, don't catch and
+        # customize it.
+        else:
+            data = self._fetchUrl(url, parameters)
+
+        return BeautifulSoup.BeautifulSoup(data)
+
+    @staticmethod
+    def getSiteDomain():
+        return FictionManiaTVAdapter.SITE_DOMAIN
+
+    @classmethod
+    def getSiteExampleURLs(cls):
+        return cls.READ_TEXT_STORY_URL_TEMPLATE % 1234
+
+    def getSiteURLPattern(self):
+        return re.escape(self.BASE_URL) + '(readtextstory|details)\.html\?storyID=\d+$'
+
+    def extractChapterUrlsAndMetadata(self):
+        url = self.DETAILS_URL_TEMPLATE % self.story.getMetadata('storyId')
+        soup = self._customized_fetch_url(url)
+
+        keep_summary_html = self.getConfig('keep_summary_html')
+        for row in soup.find('table')('tr'):
+            cells = row('td')
+            key = cells[0].b.string.strip(':')
+            try:
+                value = cells[1].string
+            except AttributeError:
+                value = None
+
+            if key == 'Story Name-Title':
+                self.story.setMetadata('title', value)
+                self.chapterUrls.append((value, self.url))
+
+            elif key == 'File Name':
+                self.story.setMetadata('fileName', value)
+
+            elif key == 'File Size':
+                self.story.setMetadata('fileSize', value)
+
+            elif key == 'Author':
+                element = cells[1].a
+                self.story.setMetadata('author', element.string)
+                query_data = _get_query_data(element['href'])
+                self.story.setMetadata('authorId', query_data['word'])
+                self.story.setMetadata('authorUrl', urlparse.urljoin(url, element['href']))
+
+            elif key == 'Date Added':
+                try:
+                    date = makeDate(value, self.DATETIME_FORMAT)
+                except ValueError:
+                    date = makeDate(value, self.ALTERNATIVE_DATETIME_FORMAT)
+                self.story.setMetadata('datePublished', date)
+
+            elif key == 'Old Name':
+                self.story.setMetadata('oldName', value)
+
+            elif key == 'New Name':
+                self.story.setMetadata('newName', value)
+
+            elif key == 'Other Key Names':
+                for name in value.split(', '):
+                    self.story.addToList('characters', name)
+
+            # I have no clue how the rating system works, if you are reading
+            # transgender fanfiction, you are probably an adult.
+            elif key == 'Rating':
+                self.story.setMetadata('rating', value)
+
+            elif key == 'Complete':
+                self.story.setMetadata('status', 'Complete' if value == 'Complete' else 'In-Progress')
+
+            elif key == 'Categories':
+                for element in cells[1]('a'):
+                    self.story.addToList('category', element.string)
+
+            elif key == 'Key Words':
+                for element in cells[1]('a'):
+                    self.story.addToList('keyWords', element.string)
+
+            elif key == 'Main Characters Age':
+                element = cells[1].a
+                self.story.setMetadata('mainCharactersAge', element.string)
+
+            elif key == 'Synopsis':
+                element = cells[1]
+
+                # Replace td with div to avoid possible strange formatting in
+                # the ebook later on
+                element.name = 'div'
+
+                if keep_summary_html:
+                    self.story.setMetadata('description', unicode(element))
+                else:
+                    self.story.setMetadata('description', ''.join(element(text=True)))
+
+            elif key == 'Reads':
+                self.story.setMetadata('readings', value)
+
+    def getChapterText(self, url):
+        soup = self._customized_fetch_url(url)
+        element = soup.find('pre')
+        element.name = 'div'
+
+        # The story's content is contained in a <pre> tag, probably taken 1:1
+        # from the source text file. A simple replacement of all newline
+        # characters with a break line tag should take care of formatting.
+
+        # While wrapping in paragraphs would be possible, it's too much work,
+        # I'd rather display the story 1:1 like it was found in the pre tag.
+        content = unicode(element)
+        content = content.replace('\n', '<br />')
+
+        if self.getConfig('non_breaking_spaces'):
+            content = content.replace(' ', '&nbsp;')
+        return content
--- a/plugin-defaults.ini
+++ b/plugin-defaults.ini
@ -874,6 +874,45 @@ extraships:Harry Potter/Hermione Granger
 #username:YourName
 #password:yourpassword

+[fictionmania.tv]
+## website encoding(s) In theory, each website reports the character
+## encoding they use for each page.  In practice, some sites report it
+## incorrectly.  Each adapter has a default list, usually "utf8,
+## Windows-1252" or "Windows-1252, utf8", but this will let you
+## explicitly set the encoding and order if you need to.  The special
+## value 'auto' will call chardet and use the encoding it reports if
+## it has +90% confidence.  'auto' is not reliable.
+website_encodings:ISO-8859-1,auto
+
+## items to include in the log page Empty metadata entries, or those
+## that haven't changed since the last update, will *not* appear, even
+## if in the list.  You can include extra text or HTML that will be
+## included as-is in each log entry. Eg: logpage_entries: ...,<br />,
+## summary,<br />,...
+## Don't include numChapters since all stories are a single "chapter", there's
+## no way to reliably find the next chapter
+logpage_entries: dateCreated,datePublished,dateUpdated,numChapters,numWords,status,series,title,author,description,category,genre,rating,warnings
+
+## items to include in the title page
+## Empty metadata entries will *not* appear, even if in the list.
+## You can include extra text or HTML that will be included as-is in
+## the title page. Eg: titlepage_entries: ...,<br />,summary,<br />,...
+## All current formats already include title and author.
+## Don't include numChapters since all stories are a single "chapter", there's
+## no way to reliably find the next chapter
+titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numWords,site,description
+
+## Extra metadata that this adapter knows about.  See [dramione.org]
+## for examples of how to use them.
+extra_valid_entries:fileName,fileSize,oldName,newName,keyWords,mainCharactersAge,readings
+
+## Turns all space characters into "&nbsp" HTML entities to forcefully preserve
+## formatting with spaces. Enabling this will blow up the filesize quite a bit
+## and is probably not a good idea, unless you absolutely need the story
+## formatting.
+## Specific to fictionmania.tv
+non_breaking_spaces:false
+
 [fictionpad.com]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In