mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-08 12:36:11 +02:00
Added adapter for http://fictionmania.tv/
This commit is contained in:
parent
7a5d77975a
commit
3d1d3f4e26
4 changed files with 257 additions and 0 deletions
39
defaults.ini
39
defaults.ini
|
|
@ -880,6 +880,45 @@ extraships:Harry Potter/Hermione Granger
|
|||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
[fictionmania.tv]
|
||||
## website encoding(s) In theory, each website reports the character
|
||||
## encoding they use for each page. In practice, some sites report it
|
||||
## incorrectly. Each adapter has a default list, usually "utf8,
|
||||
## Windows-1252" or "Windows-1252, utf8", but this will let you
|
||||
## explicitly set the encoding and order if you need to. The special
|
||||
## value 'auto' will call chardet and use the encoding it reports if
|
||||
## it has +90% confidence. 'auto' is not reliable.
|
||||
website_encodings:ISO-8859-1,auto
|
||||
|
||||
## items to include in the log page Empty metadata entries, or those
|
||||
## that haven't changed since the last update, will *not* appear, even
|
||||
## if in the list. You can include extra text or HTML that will be
|
||||
## included as-is in each log entry. Eg: logpage_entries: ...,<br />,
|
||||
## summary,<br />,...
|
||||
## Don't include numChapters since all stories are a single "chapter", there's
|
||||
## no way to reliably find the next chapter
|
||||
logpage_entries: dateCreated,datePublished,dateUpdated,numChapters,numWords,status,series,title,author,description,category,genre,rating,warnings
|
||||
|
||||
## items to include in the title page
|
||||
## Empty metadata entries will *not* appear, even if in the list.
|
||||
## You can include extra text or HTML that will be included as-is in
|
||||
## the title page. Eg: titlepage_entries: ...,<br />,summary,<br />,...
|
||||
## All current formats already include title and author.
|
||||
## Don't include numChapters since all stories are a single "chapter", there's
|
||||
## no way to reliably find the next chapter
|
||||
titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numWords,site,description
|
||||
|
||||
## Extra metadata that this adapter knows about. See [dramione.org]
|
||||
## for examples of how to use them.
|
||||
extra_valid_entries:fileName,fileSize,oldName,newName,keyWords,mainCharactersAge,readings
|
||||
|
||||
## Turns all space characters into " " HTML entities to forcefully preserve
|
||||
## formatting with spaces. Enabling this will blow up the filesize quite a bit
|
||||
## and is probably not a good idea, unless you absolutely need the story
|
||||
## formatting.
|
||||
## Specific to fictionmania.tv
|
||||
non_breaking_spaces:false
|
||||
|
||||
[fictionpad.com]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
|
|
|
|||
|
|
@ -129,6 +129,7 @@ import adapter_bloodshedversecom
|
|||
import adapter_nocturnallightnet
|
||||
import adapter_fanfichu
|
||||
import adapter_fanfictioncsodaidokhu
|
||||
import adapter_fictionmaniatv
|
||||
|
||||
## This bit of complexity allows adapters to be added by just adding
|
||||
## importing. It eliminates the long if/else clauses we used to need
|
||||
|
|
|
|||
178
fanficdownloader/adapters/adapter_fictionmaniatv.py
Normal file
178
fanficdownloader/adapters/adapter_fictionmaniatv.py
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
import re
|
||||
import urllib2
|
||||
import urlparse
|
||||
|
||||
from .. import BeautifulSoup
|
||||
from ..BeautifulSoup import NavigableString
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
from .. import exceptions
|
||||
|
||||
|
||||
def getClass():
|
||||
return FictionManiaTVAdapter
|
||||
|
||||
|
||||
def _get_query_data(url):
|
||||
components = urlparse.urlparse(url)
|
||||
query_data = urlparse.parse_qs(components.query)
|
||||
return dict((key, data[0]) for key, data in query_data.items())
|
||||
|
||||
# yields Tag _and_ NavigableString siblings from the given tag. The
|
||||
# BeautifulSoup findNextSiblings() method for some reasons only returns either
|
||||
# NavigableStrings _or_ Tag objects, not both.
|
||||
def _yield_next_siblings(tag):
|
||||
sibling = tag.nextSibling
|
||||
while sibling:
|
||||
yield sibling
|
||||
sibling = sibling.nextSibling
|
||||
|
||||
|
||||
class FictionManiaTVAdapter(BaseSiteAdapter):
|
||||
SITE_ABBREVIATION = 'fmt'
|
||||
SITE_DOMAIN = 'fictionmania.tv'
|
||||
|
||||
BASE_URL = 'http://' + SITE_DOMAIN + '/stories/'
|
||||
READ_TEXT_STORY_URL_TEMPLATE = BASE_URL + 'readtextstory.html?storyID=%s'
|
||||
DETAILS_URL_TEMPLATE = BASE_URL + 'details.html?storyID=%s'
|
||||
|
||||
DATETIME_FORMAT = '%m/%d/%Y'
|
||||
ALTERNATIVE_DATETIME_FORMAT = '%m/%d/%y'
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
query_data = urlparse.parse_qs(self.parsedUrl.query)
|
||||
story_id = query_data['storyID'][0]
|
||||
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
self._setURL(self.READ_TEXT_STORY_URL_TEMPLATE % story_id)
|
||||
self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
|
||||
|
||||
# Always single chapters, probably should use the Anthology feature to
|
||||
# merge chapters of a story
|
||||
self.story.setMetadata('numChapters', 1)
|
||||
|
||||
def _customized_fetch_url(self, url, exception=None, parameters=None):
|
||||
if exception:
|
||||
try:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
except urllib2.HTTPError:
|
||||
raise exception(self.url)
|
||||
# Just let self._fetchUrl throw the exception, don't catch and
|
||||
# customize it.
|
||||
else:
|
||||
data = self._fetchUrl(url, parameters)
|
||||
|
||||
return BeautifulSoup.BeautifulSoup(data)
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return FictionManiaTVAdapter.SITE_DOMAIN
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return cls.READ_TEXT_STORY_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape(self.BASE_URL) + '(readtextstory|details)\.html\?storyID=\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url = self.DETAILS_URL_TEMPLATE % self.story.getMetadata('storyId')
|
||||
soup = self._customized_fetch_url(url)
|
||||
|
||||
keep_summary_html = self.getConfig('keep_summary_html')
|
||||
for row in soup.find('table')('tr'):
|
||||
cells = row('td')
|
||||
key = cells[0].b.string.strip(':')
|
||||
try:
|
||||
value = cells[1].string
|
||||
except AttributeError:
|
||||
value = None
|
||||
|
||||
if key == 'Story Name-Title':
|
||||
self.story.setMetadata('title', value)
|
||||
self.chapterUrls.append((value, self.url))
|
||||
|
||||
elif key == 'File Name':
|
||||
self.story.setMetadata('fileName', value)
|
||||
|
||||
elif key == 'File Size':
|
||||
self.story.setMetadata('fileSize', value)
|
||||
|
||||
elif key == 'Author':
|
||||
element = cells[1].a
|
||||
self.story.setMetadata('author', element.string)
|
||||
query_data = _get_query_data(element['href'])
|
||||
self.story.setMetadata('authorId', query_data['word'])
|
||||
self.story.setMetadata('authorUrl', urlparse.urljoin(url, element['href']))
|
||||
|
||||
elif key == 'Date Added':
|
||||
try:
|
||||
date = makeDate(value, self.DATETIME_FORMAT)
|
||||
except ValueError:
|
||||
date = makeDate(value, self.ALTERNATIVE_DATETIME_FORMAT)
|
||||
self.story.setMetadata('datePublished', date)
|
||||
|
||||
elif key == 'Old Name':
|
||||
self.story.setMetadata('oldName', value)
|
||||
|
||||
elif key == 'New Name':
|
||||
self.story.setMetadata('newName', value)
|
||||
|
||||
elif key == 'Other Key Names':
|
||||
for name in value.split(', '):
|
||||
self.story.addToList('characters', name)
|
||||
|
||||
# I have no clue how the rating system works, if you are reading
|
||||
# transgender fanfiction, you are probably an adult.
|
||||
elif key == 'Rating':
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
elif key == 'Complete':
|
||||
self.story.setMetadata('status', 'Complete' if value == 'Complete' else 'In-Progress')
|
||||
|
||||
elif key == 'Categories':
|
||||
for element in cells[1]('a'):
|
||||
self.story.addToList('category', element.string)
|
||||
|
||||
elif key == 'Key Words':
|
||||
for element in cells[1]('a'):
|
||||
self.story.addToList('keyWords', element.string)
|
||||
|
||||
elif key == 'Main Characters Age':
|
||||
element = cells[1].a
|
||||
self.story.setMetadata('mainCharactersAge', element.string)
|
||||
|
||||
elif key == 'Synopsis':
|
||||
element = cells[1]
|
||||
|
||||
# Replace td with div to avoid possible strange formatting in
|
||||
# the ebook later on
|
||||
element.name = 'div'
|
||||
|
||||
if keep_summary_html:
|
||||
self.story.setMetadata('description', unicode(element))
|
||||
else:
|
||||
self.story.setMetadata('description', ''.join(element(text=True)))
|
||||
|
||||
elif key == 'Reads':
|
||||
self.story.setMetadata('readings', value)
|
||||
|
||||
def getChapterText(self, url):
|
||||
soup = self._customized_fetch_url(url)
|
||||
element = soup.find('pre')
|
||||
element.name = 'div'
|
||||
|
||||
# The story's content is contained in a <pre> tag, probably taken 1:1
|
||||
# from the source text file. A simple replacement of all newline
|
||||
# characters with a break line tag should take care of formatting.
|
||||
|
||||
# While wrapping in paragraphs would be possible, it's too much work,
|
||||
# I'd rather display the story 1:1 like it was found in the pre tag.
|
||||
content = unicode(element)
|
||||
content = content.replace('\n', '<br />')
|
||||
|
||||
if self.getConfig('non_breaking_spaces'):
|
||||
content = content.replace(' ', ' ')
|
||||
return content
|
||||
|
|
@ -874,6 +874,45 @@ extraships:Harry Potter/Hermione Granger
|
|||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
[fictionmania.tv]
|
||||
## website encoding(s) In theory, each website reports the character
|
||||
## encoding they use for each page. In practice, some sites report it
|
||||
## incorrectly. Each adapter has a default list, usually "utf8,
|
||||
## Windows-1252" or "Windows-1252, utf8", but this will let you
|
||||
## explicitly set the encoding and order if you need to. The special
|
||||
## value 'auto' will call chardet and use the encoding it reports if
|
||||
## it has +90% confidence. 'auto' is not reliable.
|
||||
website_encodings:ISO-8859-1,auto
|
||||
|
||||
## items to include in the log page Empty metadata entries, or those
|
||||
## that haven't changed since the last update, will *not* appear, even
|
||||
## if in the list. You can include extra text or HTML that will be
|
||||
## included as-is in each log entry. Eg: logpage_entries: ...,<br />,
|
||||
## summary,<br />,...
|
||||
## Don't include numChapters since all stories are a single "chapter", there's
|
||||
## no way to reliably find the next chapter
|
||||
logpage_entries: dateCreated,datePublished,dateUpdated,numChapters,numWords,status,series,title,author,description,category,genre,rating,warnings
|
||||
|
||||
## items to include in the title page
|
||||
## Empty metadata entries will *not* appear, even if in the list.
|
||||
## You can include extra text or HTML that will be included as-is in
|
||||
## the title page. Eg: titlepage_entries: ...,<br />,summary,<br />,...
|
||||
## All current formats already include title and author.
|
||||
## Don't include numChapters since all stories are a single "chapter", there's
|
||||
## no way to reliably find the next chapter
|
||||
titlepage_entries: seriesHTML,category,genre,language,characters,ships,status,datePublished,dateUpdated,dateCreated,rating,warnings,numWords,site,description
|
||||
|
||||
## Extra metadata that this adapter knows about. See [dramione.org]
|
||||
## for examples of how to use them.
|
||||
extra_valid_entries:fileName,fileSize,oldName,newName,keyWords,mainCharactersAge,readings
|
||||
|
||||
## Turns all space characters into " " HTML entities to forcefully preserve
|
||||
## formatting with spaces. Enabling this will blow up the filesize quite a bit
|
||||
## and is probably not a good idea, unless you absolutely need the story
|
||||
## formatting.
|
||||
## Specific to fictionmania.tv
|
||||
non_breaking_spaces:false
|
||||
|
||||
[fictionpad.com]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
|
|
|
|||
Loading…
Reference in a new issue