Clean up on adapters

This commit is contained in:
Besnef 2012-10-21 09:16:12 -04:00
commit 55fac6a7e3
95 changed files with 1216 additions and 575 deletions

View file

@ -1,6 +1,6 @@
# ffd-retief-hrd fanfictiondownloader
application: fanfictiondownloader
version: 4-4-27
version: 4-4-29
runtime: python27
api_version: 1
threadsafe: true

View file

@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
description = 'UI plugin to download FanFiction stories from various sites.'
supported_platforms = ['windows', 'osx', 'linux']
author = 'Jim Miller'
version = (1, 6, 11)
version = (1, 6, 14)
minimum_calibre_version = (0, 8, 57)
#: This field defines the GUI plugin class that contains all the code

View file

@ -496,7 +496,10 @@ make_firstimage_cover:true
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
book['publisher'] = story.getMetadata("site")
book['tags'] = story.getSubjectTags(removeallentities=True)
book['comments'] = sanitize_comments_html(story.getMetadata("description"))
if story.getMetadata("description"):
book['comments'] = sanitize_comments_html(story.getMetadata("description"))
else:
book['comments']=''
book['series'] = story.getMetadata("series", removeallentities=True)
# adapter.opener is the element with a threadlock. But del
@ -538,7 +541,7 @@ make_firstimage_cover:true
print("from URL(%s)"%url)
# try to find by identifier url first.
searchstr = 'identifiers:"=url:%s"'%url.replace(":","|")
searchstr = 'identifiers:"=url:=%s"'%url.replace(":","|")
identicalbooks = db.search_getting_ids(searchstr, None)
if len(identicalbooks) < 1:
# find dups
@ -634,9 +637,12 @@ make_firstimage_cover:true
if book['good']: # there shouldn't be any !'good' books at this point.
# if still 'good', make a temp file to write the output to.
tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'],
suffix='.'+options['fileform'],
dir=options['tdir'])
# For HTML format users, make the filename inside the zip something reasonable.
# For crazy long titles/authors, limit it to 200chars.
# For weird/OS-unsafe characters, use file safe only.
tmp = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100],
suffix='.'+options['fileform'],
dir=options['tdir'])
print("title:"+book['title'])
print("outfile:"+tmp.name)
book['outfile'] = tmp.name

View file

@ -14,7 +14,6 @@ from StringIO import StringIO
from calibre.utils.ipc.server import Server
from calibre.utils.ipc.job import ParallelJob
from calibre.utils.logging import Log
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
@ -109,9 +108,6 @@ def do_download_for_worker(book,options):
when run as a worker job
'''
try:
# import logging
# logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
book['comment'] = 'Download started...'
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
@ -123,7 +119,7 @@ def do_download_for_worker(book,options):
# images only for epub, even if the user mistakenly turned it
# on else where.
if options['fileform'] != "epub":
if options['fileform'] not in ("epub","html"):
configuration.set("overrides","include_images","false")
adapter = adapters.getAdapter(configuration,book['url'])

View file

@ -161,8 +161,10 @@ extratags: FanFiction
## for regexp details.
## Make sure to keep at least one space at the start of each line and
## to escape % to %%, if used.
## Two or three part lines. Two part effect everything.
## Two, three or five part lines. Two part effect everything.
## Three part effect only those key(s) lists.
## *Five* part lines. Effect only when trailing conditional key=>regexp matches
## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp]
#replace_metadata:
# genre,category=>Sci-Fi=>SF
# Puella Magi Madoka Magica.* => Madoka
@ -170,7 +172,9 @@ extratags: FanFiction
# Crossover: (.*)=>\1
# title=>(.*)Great(.*)=>\1Moderate\2
# .*-Centered=>
# characters=>Sam W\.=>Sam Witwicky&&category=>Transformers
# characters=>Sam W\.=>Sam Winchester&&category=>Supernatural
## Some readers don't show horizontal rule (<hr />) tags correctly.
## This replaces them all with a centered '* * *'. (Note centering
## doesn't work on some devices either.)
@ -184,10 +188,15 @@ keep_summary_html:true
## Don't like the numbers at the start of chapter titles on some
## sites? You can use strip_chapter_numbers to strip them off. Just
## want to make them all look the same? Strip them off, then add them
## back on with add_chapter_numbers. Don't like the way it strips
## numbers or adds them back? See chapter_title_strip_pattern and
## chapter_title_add_pattern.
## back on with add_chapter_numbers:true. Only want them added back
## on for Table of Contents(toc)? Use add_chapter_numbers:toconly.
## (toconly doesn't work on mobi output.) Don't like the way it
## strips numbers or adds them back? See chapter_title_strip_pattern
## and chapter_title_add_pattern.
strip_chapter_numbers:false
## add_chapter_numbers can be true, false or toconly
## (Note number is not added when there's only one chapter.)
add_chapter_numbers:false
## (Two versions of chapter_title_strip_pattern are shown below. You
@ -218,6 +227,22 @@ chapter_title_add_pattern:${index}. ${title}
## Each output format has a section that overrides [defaults]
[html]
## include images from img tags in the body and summary of
## stories. Images will be converted to jpg for size if possible.
## include_images is *only* available in epub and html output formats.
## include_images is *not* available in the web service in any format.
#include_images:false
## Note that it's *highly* recommended to use zipfile output or story
## unique destination directories to avoid overwriting images.
#output_filename: books/${author}/${title}/${title}-${siteabbrev}_${authorId}_${storyId}${formatext}
#zip_output: false
## This switch prevents FFDL from doing any processing on the images.
## Usually they would be converted to jpg, resized and optionally made
## grayscale.
no_image_processing: true
## output background color--only used by html and epub (and ignored in
## epub by many readers). Included below in output_css--will be
## ignored if not in output_css.
@ -261,13 +286,17 @@ zip_output: false
## mobi generated from epub by calibre will have a TOC at the end.
include_tocpage: false
## include a Update Log page before the story text. If included, the
## log will be updated each time the epub is an all the metadata
## include a Update Log page before the story text. If 'true', the
## log will be updated each time the epub is and all the metadata
## fields that have changed since the last update (typically
## dateUpdated,numChapters,numWords at a minimum) will be shown.
## Great for tracking when chapters came out and when the description,
## etc changed.
include_logpage: false
## If set to 'smart', logpage will only be included if the story is
## status:In-Progress or already had a logpage. That way you don't
## end up with Completed stories that have just one logpage entry.
#include_logpage: smart
## items to include in the log page Empty metadata entries, or those
## that haven't changed since the last update, will *not* appear, even
@ -320,6 +349,8 @@ output_css:
## include images from img tags in the body and summary of
## stories. Images will be converted to jpg for size if possible.
## include_images is *only* available in epub and html output format.
## include_images is *not* available in the web service in any format.
#include_images:false
## If set, the first image found will be made the cover image. If
@ -374,31 +405,31 @@ nook_img_fix:true
## URLs like: http://test1.com?sid=12345
[test1.com]
extratags: FanFiction,Testing
extracategories:Fafner
extragenres:Romance,Fluff
extracharacters:Reginald Smythe-Smythe,Mokona,Harry P.
extraships:Smythe-Smythe/Mokona
extrawarnings:Extreme Bogosity
# extracategories:Fafner
# extragenres:Romance,Fluff
# extracharacters:Reginald Smythe-Smythe,Mokona,Harry P.
# extraships:Smythe-Smythe/Mokona
# extrawarnings:Extreme Bogosity
extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
# extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
include_in_compositeJ:dateCreated
include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ
include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated,
# include_in_compositeJ:dateCreated
# include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ
# include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated,
extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
extra_subject_tags: metaA,metaB,metaC
# extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
# extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
# extra_subject_tags: metaA,metaB,metaC
replace_metadata:
compositeL=>Val=>VALUE
series,extratags=>Test=>Plan
Puella Magi Madoka Magica.* => Madoka
Comedy=>Humor
Crossover: (.*)=>\1
(.*)Great(.*)=>\1Moderate\2
.*-Centered=>
characters=>Harry P\.=>Harry Potter
# replace_metadata:
# compositeL=>Val=>VALUE
# series,extratags=>Test=>Plan
# Puella Magi Madoka Magica.* => Madoka
# Comedy=>Humor
# Crossover: (.*)=>\1
# (.*)Great(.*)=>\1Moderate\2
# .*-Centered=>
# characters=>Harry P\.=>Harry Potter
## If necessary, you can define [<site>:<format>] sections to
@ -556,9 +587,8 @@ cliches_label:Character Cliches
## specific entries to titlepage/logpage without having to copy the
## entire titlepage_entries line. (But if you want them higher than
## the end, you will need to copy titlepage_entries.)
#extra_titlepage_entries: themes,hermiones,dracos,timeline,cliches
## adds to include_subject_tags instead of replacing it.
#extra_titlepage_entries: themes,timeline,cliches
#extra_logpage_entries: themes,timeline,cliches
#extra_subject_tags: themes,timeline,cliches
[erosnsappho.sycophanthex.com]
@ -1019,6 +1049,15 @@ extracategories:Harry Potter
## Site dedicated to these categories/characters/ships
extracategories:Prison Break
[www.qaf-fic.com]
## Site dedicated to these categories/characters/ships
extracategories:Queer as Folk
## Some sites do not require a login, but do require the user to
## confirm they are adult for adult content. In commandline version,
## this should go in your personal.ini, not defaults.ini.
#is_adult:true
[www.scarvesandcoffee.net]
## Site dedicated to these categories/characters/ships
extracategories:Glee

View file

@ -15,8 +15,6 @@
# limitations under the License.
#
import logging
import sys, os
from os.path import normpath, expanduser, isfile, join
from StringIO import StringIO
@ -26,6 +24,14 @@ import string
import ConfigParser
from subprocess import call
import logging
if sys.version_info >= (2, 7):
# suppresses default logger. Logging is setup in fanficdownload/__init__.py so it works in calibre, too.
rootlogger = logging.getLogger()
loghandler=logging.NullHandler()
loghandler.setFormatter(logging.Formatter("(=====)(levelname)s:%(message)s"))
rootlogger.addHandler(loghandler)
from fanficdownloader import adapters,writers,exceptions
from fanficdownloader.configurable import Configuration
from fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data
@ -79,11 +85,9 @@ def main():
(options, args) = parser.parse_args()
if options.debug:
logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
else:
logging.basicConfig(level=logging.INFO,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
if not options.debug:
logger = logging.getLogger("fanficdownloader")
logger.setLevel(logging.INFO)
if len(args) != 1:
parser.error("incorrect number of arguments")
@ -120,8 +124,6 @@ def main():
logging.debug('reading %s config file(s), if present'%conflist)
configuration.read(conflist)
print("has include_in_tags?%s"%configuration.hasConfig("include_in_tags"))
try:
configuration.add_section("overrides")
except ConfigParser.DuplicateSectionError:
@ -138,7 +140,7 @@ def main():
# images only for epub, even if the user mistakenly turned it
# on else where.
if options.format != "epub":
if options.format not in ("epub","html"):
configuration.set("overrides","include_images","false")
if options.options:

View file

@ -1 +1,16 @@
# -*- coding: utf-8 -*-
try:
# just a way to switch between web service and CLI/PI
import google.appengine.api
except:
import sys
if sys.version_info >= (2, 7):
import logging
logger = logging.getLogger(__name__)
loghandler=logging.StreamHandler()
loghandler.setFormatter(logging.Formatter("FFDL:%(levelname)s:%(filename)s(%(lineno)d):%(message)s"))
logger.addHandler(loghandler)
loghandler.setLevel(logging.DEBUG)
logger.setLevel(logging.DEBUG)

View file

@ -20,6 +20,8 @@ from os.path import dirname, basename, normpath
import logging
import urlparse as up
logger = logging.getLogger(__name__)
from .. import exceptions as exceptions
## must import each adapter here.
@ -103,6 +105,7 @@ import adapter_bloodtiesfancom
import adapter_indeathnet
import adapter_dwiggiecom
import adapter_jlaunlimitedcom
import adapter_qafficcom
## This bit of complexity allows adapters to be added by just adding
@ -124,9 +127,9 @@ for x in imports():
def getAdapter(config,url):
logging.debug("trying url:"+url)
logger.debug("trying url:"+url)
(cls,fixedurl) = getClassFor(url)
logging.debug("fixedurl:"+fixedurl)
logger.debug("fixedurl:"+fixedurl)
if cls:
adapter = cls(config,fixedurl) # raises InvalidStoryURL
return adapter
@ -164,11 +167,11 @@ def getClassFor(url):
cls = getClassFromList(domain)
if not cls and domain.startswith("www."):
domain = domain.replace("www.","")
logging.debug("trying site:without www: "+domain)
logger.debug("trying site:without www: "+domain)
cls = getClassFromList(domain)
fixedurl = fixedurl.replace("http://www.","http://")
if not cls:
logging.debug("trying site:www."+domain)
logger.debug("trying site:www."+domain)
cls = getClassFromList("www."+domain)
fixedurl = fixedurl.replace("http://","http://www.")

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -41,7 +42,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -65,7 +66,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -204,7 +205,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -30,6 +31,8 @@ def getClass():
return ArchiveOfOurOwnOrgAdapter
logger = logging.getLogger(__name__)
class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
def __init__(self, config, url):
@ -48,13 +51,13 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# get storyId from url--url validation guarantees query correct
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('storyId',m.group('id'))
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId'))
else:
@ -104,14 +107,14 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
params['authenticity_token'] = data.split('input name="authenticity_token" type="hidden" value="')[1].split('" /></div>')[0]
loginUrl = 'http://' + self.getSiteDomain() + '/user_sessions'
logging.info("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
params['user_session[login]']))
d = self._postUrl(loginUrl, params)
#logging.info(d)
#logger.info(d)
if "Successfully logged in" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['user_session[login]']))
raise exceptions.FailedToLogin(url,params['user_session[login]'])
return False
@ -128,8 +131,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
metaurl = self.url+addurl
url = self.url+'/navigate'+addurl
logging.info("url: "+url)
logging.info("metaurl: "+metaurl)
logger.info("url: "+url)
logger.info("metaurl: "+metaurl)
try:
data = self._fetchUrl(url)
@ -164,7 +167,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
alist = soup.findAll('a', href=re.compile(r"^/users/\w+/pseuds/\w+"))
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
self.story.setMetadata('author','Anonymous')
self.story.setMetadata('authorUrl',self.url)
self.story.setMetadata('authorUrl','http://archiveofourown.org/')
self.story.setMetadata('authorId','0')
else:
for a in alist:
self.story.addToList('authorId',a['href'].split('/')[2])
@ -174,7 +178,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
# Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+"/chapters/\d+$"))
self.story.setMetadata('numChapters',len(chapters))
logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
for x in range(0,len(chapters)):
# just in case there's tags, like <i> in chapter titles.
chapter=chapters[x]
@ -291,7 +295,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
chapter=bs.BeautifulSoup('<div class="story"></div>')
data = self._fetchUrl(url)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -48,7 +49,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
@ -78,7 +79,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -151,7 +152,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
rating.find('br').replaceWith('split')
rating=rating.text.replace("This story is rated",'').split('split')[0]
self.story.setMetadata('rating',rating)
logging.debug(self.story.getMetadata('rating'))
logger.debug(self.story.getMetadata('rating'))
warnings=box.find('ol')
if warnings != None:
@ -177,7 +178,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -94,13 +95,13 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Logout" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -112,7 +113,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -237,7 +238,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -69,7 +70,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -115,13 +116,13 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -148,7 +149,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -182,7 +183,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -320,7 +321,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -71,7 +72,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -117,13 +118,13 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/fanfic/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -145,7 +146,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -293,7 +294,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -86,7 +87,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -221,7 +222,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self._setURL('http://' + self.getSiteDomain() + '/story.php?story='+self.story.getMetadata('storyId'))
@ -95,7 +96,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
e = self._fetchUrl(url)
if "Welcome back," not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['name']))
raise exceptions.FailedToLogin(url,params['name'])
return False
@ -112,7 +113,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -222,7 +223,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -98,13 +99,13 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/elysian/user.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "User Account Page" not in d : #Member Account
logging.info("Failed to login to URL %s as %s, or have no authorization to access the story" % (loginUrl, params['penname']))
logger.info("Failed to login to URL %s as %s, or have no authorization to access the story" % (loginUrl, params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
else:
@ -115,7 +116,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -261,7 +262,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -86,7 +87,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -106,7 +107,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -227,7 +228,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class DokugaComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# www.dokuga.com has two 'sections', shown in URL as
# 'fanfiction' and 'spark' that change how things should be
@ -87,7 +88,7 @@ class DokugaComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -224,7 +225,7 @@ class DokugaComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -145,7 +146,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -282,7 +283,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class DramioneOrgAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -283,7 +284,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -86,7 +87,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -106,7 +107,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -239,7 +240,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
from urllib import unquote_plus
@ -75,12 +76,12 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# metadata and chapter list
url = self.origurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
# use BeautifulSoup HTML parser to make everything easier to find.
try:
data = self._fetchUrl(url)
#print("\n===================\n%s\n===================\n"%data)
#logger.debug("\n===================\n%s\n===================\n"%data)
soup = bs.BeautifulSoup(data)
except urllib2.HTTPError, e:
if e.code == 404:
@ -108,11 +109,11 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
tryurl = "http://%s/s/%s/%d/"%(self.getSiteDomain(),
self.story.getMetadata('storyId'),
chapcount+1)
print('=Trying newer chapter: %s' % tryurl)
logger.debug('=Trying newer chapter: %s' % tryurl)
newdata = self._fetchUrl(tryurl)
if "not found. Please check to see you are not using an outdated url." \
not in newdata:
print('=======Found newer chapter: %s' % tryurl)
logger.debug('=======Found newer chapter: %s' % tryurl)
soup = bs.BeautifulSoup(newdata)
except:
pass
@ -160,7 +161,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
metatext = stripHTML(gui_table1i.find('div', {'style':'color:gray;'})).replace('Hurt/Comfort','Hurt-Comfort')
metalist = metatext.split(" - ")
#print("metatext:(%s)"%metalist)
#logger.debug("metatext:(%s)"%metalist)
# Rated: Fiction K - English - Words: 158,078 - Published: 02-04-11
@ -176,9 +177,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
genrelist = metalist[0].split('/') # Hurt/Comfort already changed above.
goodgenres=True
for g in genrelist:
print("g:(%s)"%g)
#logger.debug("g:(%s)"%g)
if g.strip() not in ffnetgenres:
print("g not in ffnetgenres")
logger.info("g not in ffnetgenres")
goodgenres=False
if goodgenres:
self.story.extendList('genre',genrelist)
@ -240,7 +241,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
return
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
time.sleep(0.5) ## ffnet(and, I assume, fpcom) tends to fail
## more if hit too fast. This is in
## additional to what ever the
@ -265,7 +266,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
div = soup.find('div', {'id' : 'storytextp'})
if None == div:
logging.debug('div id=storytextp not found. data:%s'%data)
logger.debug('div id=storytextp not found. data:%s'%data)
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -48,7 +49,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1')
@ -94,12 +95,12 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
params['submit'] = 'Login...'
loginUrl = 'https://ssl.fanfiktion.de/'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['nickname']))
d = self._postUrl(loginUrl,params)
if "Login erfolgreich" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['nickname']))
raise exceptions.FailedToLogin(url,params['nickname'])
return False
@ -110,7 +111,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -183,7 +184,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
time.sleep(0.5) ## ffde has "floodlock" protection
soup = bs.BeautifulSoup(self._fetchUrl(url),

View file

@ -18,6 +18,7 @@
import time
import datetime
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
from .. import translit
@ -33,6 +34,8 @@ def getClass():
return FicBookNetAdapter
logger = logging.getLogger(__name__)
class FicBookNetAdapter(BaseSiteAdapter):
def __init__(self, config, url):
@ -49,7 +52,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/readfic/'+self.story.getMetadata('storyId'))
@ -75,7 +78,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
url=self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
except urllib2.HTTPError, e:
@ -95,14 +98,14 @@ class FicBookNetAdapter(BaseSiteAdapter):
## Title
a = soup.find('h1')
self.story.setMetadata('title',stripHTML(a))
logging.debug("Title: (%s)"%self.story.getMetadata('title'))
logger.debug("Title: (%s)"%self.story.getMetadata('title'))
# Find authorid and URL from... author url.
a = table.find('a')
self.story.setMetadata('authorId',a.text) # Author's name is unique
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.text)
logging.debug("Author: (%s)"%self.story.getMetadata('author'))
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
# Find the chapters:
chapters = soup.find('div', {'class' : 'part_list'})
@ -123,7 +126,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
pubdate=translit.translit(stripHTML(soup.find('div', {'class' : 'part_added'}).find('span')))
update=pubdate
logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
if not ',' in pubdate:
pubdate=datetime.date.today().strftime(self.dateformat)
@ -207,7 +210,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -44,7 +45,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
if m:
self.story.setMetadata('authorId',m.group('auth'))
self.story.setMetadata('storyId',m.group('id'))
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL(url)
else:
@ -68,7 +69,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
if self.is_adult or self.getConfig("is_adult"):
params={'iamold':'Yes',
'action':'ageanswer'}
logging.info("Attempting to get cookie for %s" % url)
logger.info("Attempting to get cookie for %s" % url)
## posting on list doesn't work, but doesn't hurt, either.
data = self._postUrl(url,params)
else:
@ -79,7 +80,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
## could be either chapter list page or one-shot text page.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._postFetchWithIAmOld(url)
@ -107,7 +108,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
self._setURL(storya['href'])
url=self.url
logging.debug("Normalizing to URL: "+url)
logger.debug("Normalizing to URL: "+url)
## title's right there...
self.story.setMetadata('title',storya.string)
data = self._fetchUrl(url)
@ -193,7 +194,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# find <!-- headerend --> & <!-- footerstart --> and

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import time

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import time
@ -61,12 +62,12 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
params['password'] = self.getConfig("password")
loginUrl = 'http://' + self.getSiteDomain() + '/account/login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['username']))
d = self._postUrl(loginUrl,params)
if "Login attempt failed..." in d:
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['username']))
raise exceptions.FailedToLogin(url,params['username'])
return False
@ -79,7 +80,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# metadata and chapter list
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
# use BeautifulSoup HTML parser to make everything easier to find.
try:
@ -96,7 +97,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# normalize story URL on chapter list.
self.story.setMetadata('storyId',storya['href'].split('/',)[2])
url = "http://"+self.getSiteDomain()+storya['href']
logging.debug("Normalizing to URL: "+url)
logger.debug("Normalizing to URL: "+url)
self._setURL(url)
try:
soup = bs.BeautifulSoup(self._fetchUrl(url))
@ -201,7 +202,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import cookielib as cl
@ -175,7 +176,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'id' : 'chapter_container'})
if soup == None:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -49,7 +50,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2].split(':')[0])
if 'storyInfo' in self.story.getMetadata('storyId'):
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/s/storyInfo.php?id='+self.story.getMetadata('storyId'))
@ -95,13 +96,13 @@ class FineStoriesComAdapter(BaseSiteAdapter):
params['submit'] = 'Login'
loginUrl = 'http://' + self.getSiteDomain() + '/login.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['theusername']))
d = self._fetchUrl(loginUrl, params)
if "My Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['theusername']))
raise exceptions.FailedToLogin(url,params['theusername'])
return False
@ -114,7 +115,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -232,7 +233,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,8 +48,8 @@ class GrangerEnchantedCom(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self.story.setMetadata('section',self.parsedUrl.path.split('/',)[1])
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self.section=self.parsedUrl.path.split('/',)[1]
# normalized story URL.
if "malfoymanor" in self.parsedUrl.netloc:
@ -100,17 +101,17 @@ class GrangerEnchantedCom(BaseSiteAdapter):
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
if "enchant" in self.story.getMetadata('section'):
if "enchant" in self.section:
loginUrl = 'http://grangerenchanted.com/enchant/user.php?action=login'
else:
loginUrl = 'http://malfoymanor.grangerenchanted.com/themanor/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -132,7 +133,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -157,7 +158,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -191,7 +192,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+chapter['href']+addurl))
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.section+'/'+chapter['href']+addurl))
self.story.setMetadata('numChapters',len(self.chapterUrls))
@ -262,7 +263,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+a['href']
series_url = 'http://'+self.host+'/'+self.section+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
@ -284,10 +285,10 @@ class GrangerEnchantedCom(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
div = soup.find('div', {'id' : 'story1'})

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -41,7 +42,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only psid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId'))
@ -72,7 +73,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -181,7 +182,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
## most adapters use BeautifulStoneSoup here, but non-Stone
## allows nested div tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -215,7 +216,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /eff part. Replace all to remove it usually.
@ -79,7 +80,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -180,21 +181,22 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
value = td.nextSibling.string
#print("\nlabel:%s\nvalue:%s\n"%(label,value))
if 'Category' in label:
if 'Category' in label and value:
cats = td.parent.findAll('a',href=re.compile(r'categories.php'))
catstext = [cat.string for cat in cats]
for cat in catstext:
self.story.addToList('category',cat.string)
if 'Characters' in label:
if 'Characters' in label and value: # this site can have Character label with no
# values, apparently. Others as a precaution.
for char in value.split(','):
self.story.addToList('characters',char.strip())
if 'Genre' in label:
if 'Genre' in label and value:
for genre in value.split(','):
self.story.addToList('genre',genre.strip())
if 'Warnings' in label:
if 'Warnings' in label and value:
for warning in value.split(','):
if warning.strip() != 'none':
self.story.addToList('warnings',warning.strip())
@ -208,7 +210,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# There's no good wrapper around the chapter text. :-/

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -202,7 +203,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class IkEternalNetAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -158,7 +159,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -267,7 +268,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -99,7 +100,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -132,7 +133,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -294,7 +295,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
soup = bs.BeautifulStoneSoup(data,

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -48,7 +49,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/a/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -84,7 +85,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -104,7 +105,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -234,7 +235,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -86,7 +87,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -221,7 +222,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -42,7 +43,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('storyId',m.group('id'))
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/fanfic/view_st.php/'+self.story.getMetadata('storyId'))
else:
@ -66,7 +67,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -188,7 +189,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data=self._fetchUrl(url)
soup = bs.BeautifulStoneSoup(data,
@ -214,7 +215,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
return self.utf8FromSoup(url,anchor)
else:
logging.debug('Using kludgey text find for older mediaminer story.')
logger.debug('Using kludgey text find for older mediaminer story.')
## Some older mediaminer stories are unparsable with BeautifulSoup.
## Really nasty formatting. Sooo... Cheat! Parse it ourselves a bit first.
## Story stuff falls between:

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -145,7 +146,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -277,7 +278,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -95,7 +96,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -128,7 +129,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -269,7 +270,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
soup = bs.BeautifulStoneSoup(data,

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -96,13 +97,13 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login&sid='+self.story.getMetadata('storyId')
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -125,7 +126,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -164,7 +165,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -315,7 +316,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only storyid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId'))
@ -80,7 +81,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -177,7 +178,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only storyid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId'))
@ -80,7 +81,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -184,7 +185,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL("http://"+self.getSiteDomain()\
@ -78,7 +79,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -187,7 +188,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -99,7 +100,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -132,7 +133,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -273,7 +274,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -74,7 +75,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -197,7 +198,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -94,13 +95,13 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Logout" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -112,7 +113,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -245,7 +246,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = data.replace('<div align="left"', '<div align="left">')

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -97,13 +98,13 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -125,7 +126,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -253,7 +254,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2, urllib, cookielib
@ -47,7 +48,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/')
@ -90,12 +91,12 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
params['login'] = 'Login'
loginUrl = 'http://' + self.getSiteDomain() + '/users/processlogin.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['txtusername']))
d = self._fetchUrl(loginUrl, params)
if 'Please login to continue.' in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['txtusername']))
raise exceptions.FailedToLogin(url,params['txtusername'])
return False
@ -108,7 +109,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -208,7 +209,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,8 +48,8 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logging.info(self.parsedUrl.netloc)
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.info(self.parsedUrl.netloc)
# normalized story URL.
if "explicit" in self.parsedUrl.netloc:
self._setURL('http://explicit.' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -91,7 +92,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -112,7 +113,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -234,7 +235,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import cookielib as cl
@ -54,7 +55,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/story/'+self.story.getMetadata('storyId'))
@ -81,7 +82,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
# portkey screws around with using a different URL to set the
# cookie and it's a pain. So... cheat!
@ -247,7 +248,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
soup = bs.BeautifulStoneSoup(data,
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -40,7 +41,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -63,7 +64,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -191,7 +192,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/missingpieces/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -89,7 +90,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -109,7 +110,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -237,7 +238,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -202,7 +203,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -0,0 +1,262 @@
# -*- coding: utf-8 -*-
# Copyright 2012 Fanficdownloader team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
from .. import BeautifulSoup as bs
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
from base_adapter import BaseSiteAdapter, makeDate
def getClass():
return QafFicComAdapter
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class QafFicComAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.decode = ["Windows-1252",
"utf8"] # 1252 is a superset of iso-8859-1.
# Most sites that claim to be
# iso-8859-1 (and some that claim to be
# utf8) are really windows-1252.
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/atp/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','atp')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = "%m/%d/%y"
@staticmethod # must be @staticmethod, don't remove it.
def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'www.qaf-fic.com'
def getSiteExampleURLs(self):
return "http://"+self.getSiteDomain()+"/atp/viewstory.php?sid=1234"
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/atp/viewstory.php?sid=")+r"\d+$"
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
if self.is_adult or self.getConfig("is_adult"):
# Weirdly, different sites use different warning numbers.
# If the title search below fails, there's a good chance
# you need a different number. print data at that point
# and see what the 'click here to continue' url says.
addurl = "&warning=NC-17"
else:
addurl=""
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
except urllib2.HTTPError, e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
else:
raise e
m = re.search(r"'viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\s+)'",data)
if m != None:
if self.is_adult or self.getConfig("is_adult"):
# We tried the default and still got a warning, so
# let's pull the warning number from the 'continue'
# link and reload data.
addurl = m.group(1)
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+addurl
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
except urllib2.HTTPError, e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
else:
raise e
else:
raise exceptions.AdultCheckRequired(self.url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
# use BeautifulSoup HTML parser to make everything easier to find.
soup = bs.BeautifulSoup(data)
# print data
# Now go hunting for all the meta data and the chapter list.
## Title and author
a = soup.find('div', {'id' : 'pagetitle'})
aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',aut['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/atp/'+aut['href'])
self.story.setMetadata('author',aut.string)
aut.extract()
self.story.setMetadata('title',a.string[:(len(a.string)-3)])
# Find the chapters:
chapters=soup.find('select')
if chapters != None:
for chapter in chapters.findAll('option'):
# just in case there's tags, like <i> in chapter titles.
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/atp/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value']))
else:
self.chapterUrls.append((self.story.getMetadata('title'),url))
self.story.setMetadata('numChapters',len(self.chapterUrls))
asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl')))
for list in asoup.findAll('div', {'class' : re.compile('listbox\s+')}):
a = list.find('a')
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
break
# eFiction sites don't help us out a lot with their meta data
# formating, so it's a little ugly.
# utility method
def defaultGetattr(d,k):
try:
return d[k]
except:
return ""
# <span class="label">Rated:</span> NC-17<br /> etc
labels = list.findAll('span', {'class' : 'classification'})
for labelspan in labels:
label = labelspan.string
value = labelspan.nextSibling
if 'Summary' in label:
## Everything until the next span class='label'
svalue = ""
while not defaultGetattr(value,'class') == 'classification' and value != None:
if "Featured Stories" not in value:
svalue += str(value)
value = value.nextSibling
self.setDescription(url,svalue)
#self.story.setMetadata('description',stripHTML(svalue))
if 'Rated' in label:
self.story.setMetadata('rating', value[:len(value)-2])
if 'Word count' in label:
self.story.setMetadata('numWords', value)
if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+'))
for cat in cats:
self.story.addToList('category',cat.string)
if 'Characters' in label:
for char in value.string.split(', '):
if not 'None' in char:
self.story.addToList('characters',char)
if 'Genre' in label:
for genre in value.string.split(', '):
if not 'None' in genre:
self.story.addToList('genre',genre)
if 'Warnings' in label:
for warning in value.string.split(', '):
if not 'None' in warning:
self.story.addToList('warnings',warning)
if 'Completed' in label:
if 'Yes' in value:
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
if 'Published' in label:
self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' ::')[0]), self.dateformat))
if 'Updated' in label:
# there's a stray [ at the end.
#value = value[0:-1]
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
try:
if list.find('a', href=re.compile(r"series.php")) != None:
for series in asoup.findAll('a', href=re.compile(r"series.php\?seriesid=\d+")):
# Find Series name from series URL.
series_url = 'http://'+self.host+'/atp/'+series['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
name=seriessoup.find('div', {'id' : 'pagetitle'})
name.find('a').extract()
self.setSeries(name.text.split(' by[')[0], i)
i=0
break
i+=1
if i == 0:
break
except:
# I find it hard to care if the series parsing fails
pass
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -80,7 +81,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -216,7 +217,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -94,7 +95,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -114,7 +115,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -231,7 +232,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,11 +48,11 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self.story.setMetadata('section',self.parsedUrl.path.split('/',)[1])
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self.section=self.parsedUrl.path.split('/',)[1]
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/'+self.story.getMetadata('section')+'/viewstory.php?sid='+self.story.getMetadata('storyId'))
self._setURL('http://' + self.getSiteDomain() + '/'+self.section+'/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','sghp')
@ -59,7 +60,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# If all stories from the site fall into the same category,
# the site itself isn't likely to label them as such, so we
# do. Can't use extracategories, could be Atlantis or SG-1
if 'atlantis' in self.story.getMetadata('section'):
if 'atlantis' in self.section:
self.story.addToList("category","Stargate: Atlantis")
else:
self.story.addToList("category","Stargate: SG-1")
@ -95,7 +96,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -115,7 +116,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -149,7 +150,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+chapter['href']+addurl))
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.section+'/'+chapter['href']+addurl))
self.story.setMetadata('numChapters',len(self.chapterUrls))
@ -220,7 +221,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = 'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+a['href']
series_url = 'http://'+self.host+'/'+self.section+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
@ -242,7 +243,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -85,7 +86,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -105,7 +106,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -235,7 +236,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -48,7 +49,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/siye/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -82,7 +83,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
# sites skip that for one-chapter stories.
# Except it doesn't this time. :-/
url = self.url #+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -224,7 +225,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
# soup = bs.BeautifulSoup(self._fetchUrl(url))
# BeautifulSoup objects to <p> inside <span>, which

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -58,7 +59,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/peja/cgi-bin/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -90,7 +91,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -223,7 +224,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -213,7 +214,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/chapterlistview.asp?SID='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -133,7 +134,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class SVUFictionComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -145,7 +146,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -258,7 +259,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -43,7 +44,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -81,13 +82,13 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -102,7 +103,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -116,7 +117,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
# need to log in for this one.
addurl = "&ageconsent=ok&warning=4"
url = self.url+'&index=1'+addurl
logging.debug("Changing URL: "+url)
logger.debug("Changing URL: "+url)
self.performLogin(url)
data = self._fetchUrl(url)
@ -229,7 +230,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -18,6 +18,7 @@
import datetime
import time
import logging
logger = logging.getLogger(__name__)
from .. import BeautifulSoup as bs
from .. import exceptions
@ -48,7 +49,7 @@ class TestSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
if self.story.getMetadata('storyId') == '665' and not (self.is_adult or self.getConfig("is_adult")):
logging.warn("self.is_adult:%s"%self.is_adult)
logger.warn("self.is_adult:%s"%self.is_adult)
raise exceptions.AdultCheckRequired(self.url)
if self.story.getMetadata('storyId') == '666':
@ -128,7 +129,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
self.story.addToList('genre','Fantasy')
self.story.addToList('genre','Comedy')
self.story.addToList('genre','SF')
self.story.addToList('genre','Sci-Fi')
self.story.addToList('genre','Noir')
self.story.addToList('characters','Bob Smith')
@ -184,7 +185,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
if self.story.getMetadata('storyId') == '667':
raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -198,7 +199,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -80,7 +81,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -182,7 +183,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr','img')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -71,7 +72,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -117,13 +118,13 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/CriminalMinds/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -145,7 +146,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -292,7 +293,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,20 +48,20 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
if self.parsedUrl.path.split('/',)[1] == 'wiktt':
self.story.addToList("category","Harry Potter")
self.story.setMetadata('section','/wiktt/efiction/')
self.section='/wiktt/efiction/'
self.dateformat = "%m/%d/%Y"
else:
self.story.addToList("category","Originals")
self.story.setMetadata('section','/efiction/')
self.section='/efiction/'
self.dateformat = "%b %d, %Y"
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + self.story.getMetadata('section') + 'viewstory.php?sid='+self.story.getMetadata('storyId'))
self._setURL('http://' + self.getSiteDomain() + self.section + 'viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','msq')
@ -98,14 +99,14 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + self.story.getMetadata('section') + 'user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
loginUrl = 'http://' + self.getSiteDomain() + self.section + 'user.php?action=login'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -127,7 +128,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -152,7 +153,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -186,7 +187,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host + self.story.getMetadata('section') + chapter['href']+addurl))
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host + self.section + chapter['href']+addurl))
self.story.setMetadata('numChapters',len(self.chapterUrls))
@ -258,7 +259,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId') +'&i=1')
@ -91,13 +92,13 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "My Account Page" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -110,7 +111,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -223,7 +224,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the part. Replace all to remove it usually.
@ -101,13 +102,13 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -129,7 +130,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -272,7 +273,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -43,7 +44,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/library/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -81,13 +82,13 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/library/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -102,7 +103,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -235,7 +236,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import time
@ -40,7 +41,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('storyId',m.group('id'))
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL("http://"+self.getSiteDomain()\
+"/Story-"+self.story.getMetadata('storyId'))
@ -81,7 +82,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
return
loginUrl = 'http://' + self.getSiteDomain() + '/login.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['urealname']))
## need to pull empty login page first to get ctkn and
@ -98,7 +99,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
d = self._fetchUrl(loginUrl, params)
if "Stories Published" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -110,7 +111,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
# metadata and chapter list
url=self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
# tth won't send you future updates if you aren't 'caught up'
# on the story. Login isn't required for F21, but logging in will
@ -137,7 +138,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
form = soup.find('form', {'id':'sitemaxratingform'})
params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
'sitemaxrating':'5'}
logging.info("Attempting to get rating cookie for %s" % url)
logger.info("Attempting to get rating cookie for %s" % url)
data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
# refetch story page.
data = self._fetchUrl(url)
@ -158,7 +159,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
try:
# going to pull part of the meta data from author list page.
infourl = 'http://'+self.host+ainfo['href']
logging.debug("**StoryInfo** URL: "+infourl)
logger.debug("**StoryInfo** URL: "+infourl)
infodata = self._fetchUrl(infourl)
infosoup = bs.BeautifulSoup(infodata)
@ -175,14 +176,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
try:
# going to pull part of the meta data from *primary* author list page.
logging.debug("**AUTHOR** URL: "+authorurl)
logger.debug("**AUTHOR** URL: "+authorurl)
authordata = self._fetchUrl(authorurl)
descurl=authorurl
authorsoup = bs.BeautifulSoup(authordata)
# author can have several pages, scan until we find it.
while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ):
nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href']
logging.debug("**AUTHOR** nextpage URL: "+nextpage)
logger.debug("**AUTHOR** nextpage URL: "+nextpage)
authordata = self._fetchUrl(nextpage)
descurl=nextpage
authorsoup = bs.BeautifulSoup(authordata)
@ -259,7 +260,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url))
div = soup.find('div', {'id' : 'storyinnerbody'})

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL. http://www.twilightarchives.com/read/9353
self._setURL('http://' + self.getSiteDomain() + '/read/'+self.story.getMetadata('storyId'))
@ -77,7 +78,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -172,7 +173,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -42,7 +43,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -83,13 +84,13 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -99,7 +100,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -225,7 +226,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -43,7 +44,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -84,13 +85,13 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.info("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -109,7 +110,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -252,7 +253,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -45,7 +46,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -84,7 +85,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -215,7 +216,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -56,7 +57,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# - get chapter list, if not one-shot.
url = self.url+'&chapter=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
# use BeautifulSoup HTML parser to make everything easier to find.
try:
@ -69,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# pull title(title) and author from the HTML title.
title = soup.find('title').string
logging.debug('Title: %s' % title)
logger.debug('Title: %s' % title)
title = title.split('::')[1].strip()
self.story.setMetadata('title',title.split(' by ')[0].strip())
self.story.setMetadata('author',title.split(' by ')[1].strip())
@ -109,7 +110,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# <i>Published:</i> 2010.08.15 - <i>Updated:</i> 2010.08.16 - <i>Chapters:</i> 4 - <i>Completed:</i> Yes - <i>Word Count:</i> 4890 </font>
# </td></tr></table>
logging.debug("Author URL: "+self.story.getMetadata('authorUrl'))
logger.debug("Author URL: "+self.story.getMetadata('authorUrl'))
soup = bs.BeautifulStoneSoup(self._fetchUrl(self.story.getMetadata('authorUrl')),
selfClosingTags=('br')) # normalize <br> tags to <br />
# find this story in the list, parse it's metadata based on
@ -212,7 +213,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -145,7 +146,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -286,7 +287,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -77,7 +78,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -203,7 +204,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -48,7 +49,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -86,7 +87,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -209,7 +210,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url))

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -54,7 +55,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,7 +93,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -126,7 +127,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# explicitly put ageconsent because google appengine regexp doesn't include it for some reason.
addurl = addurl.replace("&amp;","&")+'&ageconsent=ok'
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -147,7 +148,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# while len(loopdata) > 0:
# if len(loopdata) < 5000:
# chklen = len(loopdata)
# logging.info("loopdata: %s" % loopdata[:chklen])
# logger.info("loopdata: %s" % loopdata[:chklen])
# loopdata = loopdata[chklen:]
# use BeautifulSoup HTML parser to make everything easier to find.
@ -270,7 +271,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -27,6 +27,8 @@ from functools import partial
from .. import BeautifulSoup as bs
from ..htmlcleanup import stripHTML
logger = logging.getLogger(__name__)
try:
from google.appengine.api import apiproxy_stub_map
def urlfetch_timeout_hook(service, call, request, response):
@ -38,10 +40,10 @@ try:
apiproxy_stub_map.apiproxy.GetPreCallHooks().Append(
'urlfetch_timeout_hook', urlfetch_timeout_hook, 'urlfetch')
logging.info("Hook to make default deadline 10.0 installed.")
logger.info("Hook to make default deadline 10.0 installed.")
except:
pass
#logging.info("Hook to make default deadline 10.0 NOT installed--not using appengine")
#logger.info("Hook to make default deadline 10.0 NOT installed--not using appengine")
from ..story import Story
from ..gziphttp import GZipProcessor
@ -125,7 +127,7 @@ class BaseSiteAdapter(Configurable):
#print code
if code == "auto":
if not chardet:
logging.info("chardet not available, skipping 'auto' encoding")
logger.info("chardet not available, skipping 'auto' encoding")
continue
detected = chardet.detect(data)
#print detected
@ -133,12 +135,11 @@ class BaseSiteAdapter(Configurable):
code=detected['encoding']
else:
continue
logging.debug("try code:"+code)
return data.decode(code)
except:
logging.debug("code failed:"+code)
logger.debug("code failed:"+code)
pass
logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
return "".join([x for x in data if ord(x) < 128])
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
@ -175,10 +176,10 @@ class BaseSiteAdapter(Configurable):
return self._decode(self._fetchUrlRaw(url,parameters))
except Exception, e:
excpt=e
logging.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
logging.error("Giving up on %s" %url)
logging.exception(excpt)
logger.error("Giving up on %s" %url)
logger.exception(excpt)
raise(excpt)
# Limit chapters to download. Input starts at 1, list starts at 0
@ -304,7 +305,7 @@ class BaseSiteAdapter(Configurable):
if not fetch:
fetch=self._fetchUrlRaw
acceptable_attributes = ['href','name']
acceptable_attributes = ['href','name','class','id']
#print("include_images:"+self.getConfig('include_images'))
if self.getConfig('include_images'):
acceptable_attributes.extend(('src','alt','longdesc'))
@ -356,7 +357,6 @@ class BaseSiteAdapter(Configurable):
def cachedfetch(realfetch,cache,url):
if url in cache:
print("cache hit")
return cache[url]
else:
return realfetch(url)

View file

@ -134,7 +134,7 @@ class Configurable(object):
return self.configuration.hasConfig(key)
def getConfig(self, key, default=""):
return self.configuration.getConfig(key)
return self.configuration.getConfig(key,default)
def getConfigList(self, key):
return self.configuration.getConfigList(key)

View file

@ -20,6 +20,8 @@ import urlparse
import string
from math import floor
from functools import partial
import logging
import urlparse as up
import exceptions
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
@ -52,7 +54,7 @@ try:
if export:
return (img.export('JPG'),'jpg','image/jpeg')
else:
print("image used unchanged")
logging.debug("image used unchanged")
return (data,'jpg','image/jpeg')
except:
@ -88,23 +90,34 @@ except:
img.save(outsio,'JPEG')
return (outsio.getvalue(),'jpg','image/jpeg')
else:
print("image used unchanged")
logging.debug("image used unchanged")
return (data,'jpg','image/jpeg')
except:
# No calibre or PIL, simple pass through with mimetype.
imagetypes = {
'jpg':'image/jpeg',
'jpeg':'image/jpeg',
'png':'image/png',
'gif':'image/gif',
'svg':'image/svg+xml',
}
def convert_image(url,data,sizes,grayscale):
ext=url[url.rfind('.')+1:].lower()
return (data,ext,imagetypes[ext])
return no_convert_image(url,data)
imagetypes = {
'jpg':'image/jpeg',
'jpeg':'image/jpeg',
'png':'image/png',
'gif':'image/gif',
'svg':'image/svg+xml',
}
## also used for explicit no image processing.
def no_convert_image(url,data):
parsedUrl = up.urlparse(url)
ext=parsedUrl.path[parsedUrl.path.rfind('.')+1:].lower()
if ext not in imagetypes:
logging.debug("no_convert_image url:%s - no known extension"%url)
# doesn't have extension? use jpg.
ext='jpg'
return (data,ext,imagetypes[ext])
def normalize_format_name(fmt):
if fmt:
@ -240,24 +253,35 @@ class Story(Configurable):
## Three part effect only those key(s) lists.
## pattern=>replacement
## metakey,metakey=>pattern=>replacement
## *Five* part lines. Effect only when trailing conditional key=>regexp matches
## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp]
def setReplace(self,replace):
for line in replace.splitlines():
if "&&" in line:
(line,conditional) = map( lambda x: x.strip(), line.split("&&") )
condparts = map( lambda x: x.strip(), conditional.split("=>") )
else:
condparts=[None,None]
if "=>" in line:
parts = map( lambda x: x.strip(), line.split("=>") )
if len(parts) > 2:
parts[0] = map( lambda x: x.strip(), parts[0].split(",") )
self.replacements.append(parts)
self.replacements.append(parts+condparts)
else:
self.replacements.append([None]+parts)
self.replacements.append([None]+parts+condparts)
def doReplacments(self,value,key):
for (keys,p,v) in self.replacements:
for (keys,regexp,replacement,condkey,condregexp) in self.replacements:
if (keys == None or key in keys) \
and isinstance(value,basestring) \
and re.search(p,value):
#pv=value
value = re.sub(p,v,value)
#print("change:%s => %s === %s => %s "%(p,v,pv,value))
and re.search(regexp,value):
doreplace=True
if condkey:
condval = self.getMetadata(condkey)
doreplace = condval != None and re.search(condregexp,condval)
if doreplace:
value = re.sub(regexp,replacement,value)
return value
def getMetadataRaw(self,key):
@ -280,7 +304,9 @@ class Story(Configurable):
value = commaGroups(value)
if key == "numChapters":
value = commaGroups("%d"%value)
if key in ("dateCreated","datePublished","dateUpdated"):
if key in ("dateCreated"):
value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S"))
if key in ("datePublished","dateUpdated"):
value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))
if doreplacements:
@ -411,11 +437,14 @@ class Story(Configurable):
title = re.sub(self.getConfig('chapter_title_strip_pattern'),"",title)
self.chapters.append( (title,html) )
def getChapters(self):
def getChapters(self,fortoc=False):
"Chapters will be tuples of (title,html)"
retval = []
if self.getConfig('add_chapter_numbers') and \
self.getConfig('chapter_title_add_pattern'):
## only add numbers if more than one chapter.
if len(self.chapters) > 1 and \
(self.getConfig('add_chapter_numbers') == "true" \
or (self.getConfig('add_chapter_numbers') == "toconly" and fortoc)) \
and self.getConfig('chapter_title_add_pattern'):
for index, (title,html) in enumerate(self.chapters):
retval.append( (string.Template(self.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
else:
@ -480,17 +509,22 @@ class Story(Configurable):
prefix='ffdl'
if imgurl not in self.imgurls:
parsedUrl = urlparse.urlparse(imgurl)
try:
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
if self.getConfig('no_image_processing'):
(data,ext,mime) = no_convert_image(imgurl,
fetch(imgurl))
else:
try:
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
except Exception, e:
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
(data,ext,mime) = convert_image(imgurl,
fetch(imgurl),
sizes,
self.getConfig('grayscale_images'))
except Exception, e:
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
try:
(data,ext,mime) = convert_image(imgurl,
fetch(imgurl),
sizes,
self.getConfig('grayscale_images'))
except Exception, e:
print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
logging.info("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
return "failedtoload"
# explicit cover, make the first image.
@ -525,7 +559,7 @@ class Story(Configurable):
ext)
self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})
print("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
logging.debug("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
else:
newsrc = self.imgtuples[self.imgurls.index(imgurl)]['newsrc']

View file

@ -18,6 +18,7 @@
import re
import os.path
import datetime
import string
import StringIO
import zipfile
from zipfile import ZipFile, ZIP_DEFLATED
@ -26,6 +27,8 @@ import logging
from ..configurable import Configurable
from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML
logger = logging.getLogger(__name__)
class BaseStoryWriter(Configurable):
@staticmethod
@ -101,6 +104,22 @@ class BaseStoryWriter(Configurable):
names as Story.metadata, but ENTRY should use label and value.
"""
if self.getConfig("include_titlepage"):
if self.hasConfig("titlepage_start"):
START = string.Template(self.getConfig("titlepage_start"))
if self.hasConfig("titlepage_entry"):
ENTRY = string.Template(self.getConfig("titlepage_entry"))
if self.hasConfig("titlepage_end"):
END = string.Template(self.getConfig("titlepage_end"))
if self.hasConfig("titlepage_wide_entry"):
WIDE_ENTRY = string.Template(self.getConfig("titlepage_wide_entry"))
if self.hasConfig("titlepage_no_title_entry"):
NO_TITLE_ENTRY = string.Template(self.getConfig("titlepage_no_title_entry"))
self._write(out,START.substitute(self.story.getAllMetadata()))
if WIDE_ENTRY==None:
@ -120,11 +139,11 @@ class BaseStoryWriter(Configurable):
if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label")
elif entry in self.titleLabels:
logging.debug("Using fallback label for %s_label"%entry)
logger.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry]
else:
label="%s"%entry.title()
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
logger.debug("No known label for %s, fallback to '%s'"%(entry,label))
# If the label for the title entry is empty, use the
# 'no title' option if there is one.
@ -132,6 +151,7 @@ class BaseStoryWriter(Configurable):
TEMPLATE= NO_TITLE_ENTRY
self._write(out,TEMPLATE.substitute({'label':label,
'id':entry,
'value':self.story.getMetadata(entry)}))
else:
self._write(out, entry)
@ -146,11 +166,22 @@ class BaseStoryWriter(Configurable):
"""
# Only do TOC if there's more than one chapter and it's configured.
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
if self.hasConfig("tocpage_start"):
START = string.Template(self.getConfig("tocpage_start"))
if self.hasConfig("tocpage_entry"):
ENTRY = string.Template(self.getConfig("tocpage_entry"))
if self.hasConfig("tocpage_end"):
END = string.Template(self.getConfig("tocpage_end"))
self._write(out,START.substitute(self.story.getAllMetadata()))
for index, (title,html) in enumerate(self.story.getChapters()):
for index, (title,html) in enumerate(self.story.getChapters(fortoc=True)):
if html:
self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
self._write(out,ENTRY.substitute({'chapter':title,
'number':index+1,
'index':"%04d"%(index+1)}))
self._write(out,END.substitute(self.story.getAllMetadata()))
@ -161,6 +192,8 @@ class BaseStoryWriter(Configurable):
if outfilename == None:
outfilename=self.getOutputFileName()
self.outfilename = outfilename
# minor cheat, tucking css into metadata.
if self.getConfig("output_css"):
self.story.setMetadata("output_css",
@ -171,11 +204,11 @@ class BaseStoryWriter(Configurable):
if not outstream:
close=True
logging.info("Save directly to file: %s" % outfilename)
logger.info("Save directly to file: %s" % outfilename)
if self.getConfig('make_directories'):
path=""
dirs = os.path.dirname(outfilename).split('/')
for dir in dirs:
outputdirs = os.path.dirname(outfilename).split('/')
for dir in outputdirs:
path+=dir+"/"
if not os.path.exists(path):
os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2?
@ -198,7 +231,7 @@ class BaseStoryWriter(Configurable):
outstream = open(outfilename,"wb")
else:
close=False
logging.debug("Save to stream")
logger.debug("Save to stream")
if not metaonly:
self.story = self.adapter.getStory() # get full story now,
@ -209,14 +242,14 @@ class BaseStoryWriter(Configurable):
# fetch once.
if self.getConfig('zip_output'):
out = StringIO.StringIO()
self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
self.writeStoryImpl(out)
zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
zipout.writestr(self.getBaseFileName(),out.getvalue())
self.zipout.writestr(self.getBaseFileName(),out.getvalue())
# declares all the files created by Windows. otherwise, when
# it runs in appengine, windows unzips the files as 000 perms.
for zf in zipout.filelist:
for zf in self.zipout.filelist:
zf.create_system = 0
zipout.close()
self.zipout.close()
out.close()
else:
self.writeStoryImpl(outstream)
@ -224,6 +257,27 @@ class BaseStoryWriter(Configurable):
if close:
outstream.close()
def writeFile(self, filename, data):
logger.debug("writeFile:%s"%filename)
if self.getConfig('zip_output'):
outputdirs = os.path.dirname(self.getBaseFileName())
if outputdirs:
filename=outputdirs+'/'+filename
self.zipout.writestr(filename,data)
else:
outputdirs = os.path.dirname(self.outfilename)
if outputdirs:
filename=outputdirs+'/'+filename
dir = os.path.dirname(filename)
if not os.path.exists(dir):
os.mkdir(dir) ## os.makedirs() doesn't work in 2.5.2?
outstream = open(filename,"wb")
outstream.write(data)
outstream.close()
def writeStoryImpl(self, out):
"Must be overriden by sub classes."
pass

View file

@ -29,6 +29,8 @@ from xml.dom.minidom import parse, parseString, getDOMImplementation
from base_writer import *
from ..htmlcleanup import stripHTML
logger = logging.getLogger(__name__)
class EpubWriter(BaseStoryWriter):
@staticmethod
@ -151,8 +153,16 @@ ${value}<br />
<h3>Update Log</h3>
''')
self.EPUB_LOG_UPDATE_START = string.Template('''
<p class='log_entry'>
''')
self.EPUB_LOG_ENTRY = string.Template('''
<b>${label}:</b> <span id="${id}">${value}</span>
''')
self.EPUB_LOG_UPDATE_END = string.Template('''
</p><hr />
''')
self.EPUB_LOG_PAGE_END = string.Template('''
@ -160,30 +170,50 @@ ${value}<br />
</html>
''')
self.EPUB_LOG_PAGE_END = string.Template('''
</body>
</html>
''')
self.EPUB_COVER = string.Template('''
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"><head><title>Cover</title><style type="text/css" title="override_css">
@page {padding: 0pt; margin:0pt}
body { text-align: center; padding:0pt; margin: 0pt; }
div { margin: 0pt; padding: 0pt; }
</style></head><body><div>
<img src="${coverimg}" alt="cover"/>
</div></body></html>
''')
def writeLogPage(self, out):
"""
XXX
Write the log page, but only include entries that there's
metadata for. START, ENTRY and END are expected to already by
metadata for. START, ENTRY and END are expected to already be
string.Template(). START and END are expected to use the same
names as Story.metadata, but ENTRY should use id, label and value.
"""
if self.getConfig("include_logpage"):
if self.hasConfig("logpage_start"):
START = string.Template(self.getConfig("logpage_start"))
else:
START = self.EPUB_LOG_PAGE_START
# if there's a self.story.logfile, there's an existing log
# to add to.
if self.story.logfile:
print("existing logfile found, appending")
print("existing data:%s"%self._getLastLogData(self.story.logfile))
replace_string = "</body>" # "</h3>"
self._write(out,self.story.logfile.replace(replace_string,self._makeLogEntry(self._getLastLogData(self.story.logfile))+replace_string))
else:
# otherwise, write a new one.
self._write(out,self.EPUB_LOG_PAGE_START.substitute(self.story.getAllMetadata()))
self._write(out,self._makeLogEntry())
self._write(out,self.EPUB_LOG_PAGE_END.substitute(self.story.getAllMetadata()))
if self.hasConfig("logpage_end"):
END = string.Template(self.getConfig("logpage_end"))
else:
END = self.EPUB_LOG_PAGE_END
# if there's a self.story.logfile, there's an existing log
# to add to.
if self.story.logfile:
logger.debug("existing logfile found, appending")
logger.debug("existing data:%s"%self._getLastLogData(self.story.logfile))
replace_string = "</body>" # "</h3>"
self._write(out,self.story.logfile.replace(replace_string,self._makeLogEntry(self._getLastLogData(self.story.logfile))+replace_string))
else:
# otherwise, write a new one.
self._write(out,START.substitute(self.story.getAllMetadata()))
self._write(out,self._makeLogEntry())
self._write(out,END.substitute(self.story.getAllMetadata()))
# self parsing instead of Soup because it should be simple and not
# worth the overhead.
@ -206,7 +236,22 @@ ${value}<br />
return values
def _makeLogEntry(self, oldvalues={}):
retval = "<p class='log_entry'>"
if self.hasConfig("logpage_update_start"):
START = string.Template(self.getConfig("logpage_update_start"))
else:
START = self.EPUB_LOG_UPDATE_START
if self.hasConfig("logpage_entry"):
ENTRY = string.Template(self.getConfig("logpage_entry"))
else:
ENTRY = self.EPUB_LOG_ENTRY
if self.hasConfig("logpage_update_end"):
END = string.Template(self.getConfig("logpage_update_end"))
else:
END = self.EPUB_LOG_UPDATE_END
retval = START.substitute(self.story.getAllMetadata())
for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"):
if self.isValidMetaEntry(entry):
@ -215,22 +260,22 @@ ${value}<br />
if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label")
elif entry in self.titleLabels:
logging.debug("Using fallback label for %s_label"%entry)
logger.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry]
else:
label="%s"%entry.title()
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
logger.debug("No known label for %s, fallback to '%s'"%(entry,label))
retval = retval + self.EPUB_LOG_ENTRY.substitute({'id':entry,
'label':label,
'value':val})
retval = retval + ENTRY.substitute({'id':entry,
'label':label,
'value':val})
else:
# could be useful for introducing extra text, but
# mostly it makes it easy to tell when you get the
# keyword wrong.
retval = retval + entry
retval = retval + "</p><hr />"
retval = retval + END.substitute(self.story.getAllMetadata())
if self.getConfig('replace_hr'):
retval = retval.replace("<hr />","<div class='center'>* * *</div>")
@ -368,9 +413,9 @@ ${value}<br />
guide = None
coverIO = None
imgid = "image0000"
coverimgid = "image0000"
if not self.story.cover and self.story.oldcover:
print("writer_epub: no new cover, has old cover, write image.")
logger.debug("writer_epub: no new cover, has old cover, write image.")
(oldcoverhtmlhref,
oldcoverhtmltype,
oldcoverhtmldata,
@ -380,8 +425,8 @@ ${value}<br />
outputepub.writestr(oldcoverhtmlhref,oldcoverhtmldata)
outputepub.writestr(oldcoverimghref,oldcoverimgdata)
imgid = "image0"
items.append((imgid,
coverimgid = "image0"
items.append((coverimgid,
oldcoverimghref,
oldcoverimgtype,
None))
@ -406,6 +451,10 @@ ${value}<br />
imgmap['mime'],
None))
imgcount+=1
if 'cover' in imgfile:
# make sure coverimgid is set to the cover, not
# just the first image.
coverimgid = items[-1][0]
items.append(("style","OEBPS/stylesheet.css","text/css",None))
@ -417,7 +466,7 @@ ${value}<br />
itemrefs.append("cover")
#
# <meta name="cover" content="cover.jpg"/>
metadata.appendChild(newTag(contentdom,"meta",{"content":"image0000",
metadata.appendChild(newTag(contentdom,"meta",{"content":coverimgid,
"name":"cover"}))
# cover stuff for later:
# at end of <package>:
@ -429,16 +478,12 @@ ${value}<br />
"title":"Cover",
"href":"OEBPS/cover.xhtml"}))
if self.hasConfig("cover_content"):
COVER = string.Template(self.getConfig("cover_content"))
else:
COVER = self.EPUB_COVER
coverIO = StringIO.StringIO()
coverIO.write('''
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"><head><title>Cover</title><style type="text/css" title="override_css">
@page {padding: 0pt; margin:0pt}
body { text-align: center; padding:0pt; margin: 0pt; }
div { margin: 0pt; padding: 0pt; }
</style></head><body><div>
<img src="%s" alt="cover"/>
</div></body></html>
'''%self.story.cover)
coverIO.write(COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))
if self.getConfig("include_titlepage"):
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
@ -447,11 +492,15 @@ div { margin: 0pt; padding: 0pt; }
items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
itemrefs.append("toc_page")
if self.getConfig("include_logpage"):
dologpage = ( self.getConfig("include_logpage") == "smart" and \
(self.story.logfile or self.story.getMetadataRaw("status") == "In-Progress") ) \
or self.getConfig("include_logpage") == "true"
if dologpage:
items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log"))
itemrefs.append("log_page")
for index, (title,html) in enumerate(self.story.getChapters()):
for index, (title,html) in enumerate(self.story.getChapters(fortoc=True)):
if html:
i=index+1
items.append(("file%04d"%i,
@ -483,8 +532,8 @@ div { margin: 0pt; padding: 0pt; }
contentxml = contentdom.toxml(encoding='utf-8')
# tweak for brain damaged Nook STR. Nook insists on name before content.
contentxml = contentxml.replace('<meta content="%s" name="cover"/>'%imgid,
'<meta name="cover" content="%s"/>'%imgid)
contentxml = contentxml.replace('<meta content="%s" name="cover"/>'%coverimgid,
'<meta name="cover" content="%s"/>'%coverimgid)
outputepub.writestr("content.opf",contentxml)
contentdom.unlink()
@ -582,17 +631,28 @@ div { margin: 0pt; padding: 0pt; }
outputepub.writestr("OEBPS/toc_page.xhtml",tocpageIO.getvalue())
tocpageIO.close()
# write log page.
logpageIO = StringIO.StringIO()
self.writeLogPage(logpageIO)
if logpageIO.getvalue(): # will be false if no log page.
if dologpage:
# write log page.
logpageIO = StringIO.StringIO()
self.writeLogPage(logpageIO)
outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
logpageIO.close()
logpageIO.close()
if self.hasConfig('chapter_start'):
CHAPTER_START = string.Template(self.getConfig("chapter_start"))
else:
CHAPTER_START = self.EPUB_CHAPTER_START
if self.hasConfig('chapter_end'):
CHAPTER_END = string.Template(self.getConfig("chapter_end"))
else:
CHAPTER_END = self.EPUB_CHAPTER_END
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
logger.debug('Writing chapter text for: %s' % title)
vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals)
# ffnet(& maybe others) gives the whole chapter text
# as one line. This causes problems for nook(at
# least) when the chapter size starts getting big

View file

@ -46,6 +46,10 @@ ${output_css}
<h1><a href="${storyUrl}">${title}</a> by ${authorHTML}</h1>
''')
self.HTML_COVER = string.Template('''
<img src="${coverimg}" alt="cover" />
''')
self.HTML_TITLE_PAGE_START = string.Template('''
<table class="full">
''')
@ -75,6 +79,8 @@ ${output_css}
<a name="section${index}"><h2>${chapter}</h2></a>
''')
self.HTML_CHAPTER_END = string.Template('')
self.HTML_FILE_END = string.Template('''
</body>
</html>''')
@ -82,8 +88,26 @@ ${output_css}
def writeStoryImpl(self, out):
self._write(out,self.HTML_FILE_START.substitute(self.story.getAllMetadata()))
if self.hasConfig("cover_content"):
COVER = string.Template(self.getConfig("cover_content"))
else:
COVER = self.HTML_COVER
if self.hasConfig('file_start'):
FILE_START = string.Template(self.getConfig("file_start"))
else:
FILE_START = self.HTML_FILE_START
if self.hasConfig('file_end'):
FILE_END = string.Template(self.getConfig("file_end"))
else:
FILE_END = self.HTML_FILE_END
self._write(out,FILE_START.substitute(self.story.getAllMetadata()))
if self.getConfig('include_images') and self.story.cover:
self._write(out,COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))
self.writeTitlePage(out,
self.HTML_TITLE_PAGE_START,
self.HTML_TITLE_ENTRY,
@ -94,10 +118,27 @@ ${output_css}
self.HTML_TOC_ENTRY,
self.HTML_TOC_PAGE_END)
if self.hasConfig('chapter_start'):
CHAPTER_START = string.Template(self.getConfig("chapter_start"))
else:
CHAPTER_START = self.HTML_CHAPTER_START
if self.hasConfig('chapter_end'):
CHAPTER_END = string.Template(self.getConfig("chapter_end"))
else:
CHAPTER_END = self.HTML_CHAPTER_END
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
self._write(out,CHAPTER_START.substitute(vals))
self._write(out,html)
self._write(out,CHAPTER_END.substitute(vals))
self._write(out,self.HTML_FILE_END.substitute(self.story.getAllMetadata()))
self._write(out,FILE_END.substitute(self.story.getAllMetadata()))
if self.getConfig('include_images'):
for imgmap in self.story.getImgUrls():
self.writeFile(imgmap['newsrc'],imgmap['data'])

View file

@ -88,27 +88,6 @@ ${value}<br />
self.MOBI_TABLE_TITLE_PAGE_END = string.Template('''
</table>
</body>
</html>
''')
self.MOBI_TOC_PAGE_START = string.Template('''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>${title} by ${author}</title>
</head>
<body>
<div>
<h3>Table of Contents</h3>
''')
self.MOBI_TOC_ENTRY = string.Template('''
<a href="file${index}.xhtml">${chapter}</a><br />
''')
self.MOBI_TOC_PAGE_END = string.Template('''
</div>
</body>
</html>
''')
@ -169,10 +148,21 @@ ${value}<br />
# files.append(tocpageIO.getvalue())
# tocpageIO.close()
if self.hasConfig('chapter_start'):
CHAPTER_START = string.Template(self.getConfig("chapter_start"))
else:
CHAPTER_START = self.MOBI_CHAPTER_START
if self.hasConfig('chapter_end'):
CHAPTER_END = string.Template(self.getConfig("chapter_end"))
else:
CHAPTER_END = self.MOBI_CHAPTER_END
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals)
# ffnet(& maybe others) gives the whole chapter text
# as one line. This causes problems for nook(at
# least) when the chapter size starts getting big

View file

@ -98,6 +98,7 @@ ${chapter}
\t${chapter}
''')
self.TEXT_CHAPTER_END = string.Template(u'')
self.TEXT_FILE_END = string.Template(u'''
@ -114,7 +115,17 @@ End file.
wrapout = KludgeStringIO()
wrapout.write(self.TEXT_FILE_START.substitute(self.story.getAllMetadata()))
if self.hasConfig("file_start"):
FILE_START = string.Template(self.getConfig("file_start"))
else:
FILE_START = self.TEXT_FILE_START
if self.hasConfig("file_end"):
FILE_END = string.Template(self.getConfig("file_end"))
else:
FILE_END = self.TEXT_FILE_END
wrapout.write(FILE_START.substitute(self.story.getAllMetadata()))
self.writeTitlePage(wrapout,
self.TEXT_TITLE_PAGE_START,
@ -133,13 +144,25 @@ End file.
self._write(out,self.lineends(self.wraplines(towrap)))
if self.hasConfig('chapter_start'):
CHAPTER_START = string.Template(self.getConfig("chapter_start"))
else:
CHAPTER_START = self.TEXT_CHAPTER_START
if self.hasConfig('chapter_end'):
CHAPTER_END = string.Template(self.getConfig("chapter_end"))
else:
CHAPTER_END = self.TEXT_CHAPTER_END
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1})))))
vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
self._write(out,self.lineends(self.wraplines(removeAllEntities(CHAPTER_START.substitute(vals)))))
self._write(out,self.lineends(html2text(html,wrap_width=self.wrap_width)))
self._write(out,self.lineends(self.wraplines(removeAllEntities(CHAPTER_END.substitute(vals)))))
self._write(out,self.lineends(self.wraplines(self.TEXT_FILE_END.substitute(self.story.getAllMetadata()))))
self._write(out,self.lineends(self.wraplines(FILE_END.substitute(self.story.getAllMetadata()))))
def wraplines(self, text):

View file

@ -54,10 +54,6 @@
much easier. </p>
</div>
<!-- put announcements here, h3 is a good title size. -->
<h3>New Fixes</h3>
<p>
New version containing some bug fixes, and a couple metadata features.
</p>
<p>
Questions? Check out our
<a href="http://code.google.com/p/fanficdownloader/wiki/FanFictionDownloaderFAQs">FAQs</a>.
@ -66,7 +62,7 @@
If you have any problems with this application, please
report them in
the <a href="http://groups.google.com/group/fanfic-downloader">FanFictionDownLoader Google Group</a>. The
<a href="http://4-4-25.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
<a href="http://4-4-28.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
</p>
<div id='error'>
{{ error_message }}
@ -543,7 +539,14 @@
Use the URL of the story's chapter list, such as
<br /><a href="http://thehookupzone.net/CriminalMinds/viewstory.php?sid=1234">http://thehookupzone.net/CriminalMinds/viewstory.php?sid=1234</a>
</dd>
<dt>www.qaf-fic.com</dt>
<dd>
Use the URL of the story's chapter list, such as
<br /><a href="http://www.qaf-fic.com/atp/viewstory.php?sid=1234">http://www.qaf-fic.com/atp/viewstory.php?sid=1234</a>
</dd>
</dl>
<p>
A few additional things to know, which will make your life substantially easier:
</p>

View file

@ -131,8 +131,10 @@ extratags: FanFiction
## for regexp details.
## Make sure to keep at least one space at the start of each line and
## to escape % to %%, if used.
## Two or three part lines. Two part effect everything.
## Two, three or five part lines. Two part effect everything.
## Three part effect only those key(s) lists.
## *Five* part lines. Effect only when trailing conditional key=>regexp matches
## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp]
#replace_metadata:
# genre,category=>Sci-Fi=>SF
# Puella Magi Madoka Magica.* => Madoka
@ -140,7 +142,9 @@ extratags: FanFiction
# Crossover: (.*)=>\1
# title=>(.*)Great(.*)=>\1Moderate\2
# .*-Centered=>
# characters=>Sam W\.=>Sam Witwicky&&category=>Transformers
# characters=>Sam W\.=>Sam Winchester&&category=>Supernatural
## Some readers don't show horizontal rule (<hr />) tags correctly.
## This replaces them all with a centered '* * *'. (Note centering
## doesn't work on some devices either.)
@ -168,10 +172,15 @@ keep_summary_html:true
## Don't like the numbers at the start of chapter titles on some
## sites? You can use strip_chapter_numbers to strip them off. Just
## want to make them all look the same? Strip them off, then add them
## back on with add_chapter_numbers. Don't like the way it strips
## numbers or adds them back? See chapter_title_strip_pattern and
## chapter_title_add_pattern.
## back on with add_chapter_numbers:true. Only want them added back
## on for Table of Contents(toc)? Use add_chapter_numbers:toconly.
## (toconly doesn't work on mobi output.) Don't like the way it
## strips numbers or adds them back? See chapter_title_strip_pattern
## and chapter_title_add_pattern.
strip_chapter_numbers:false
## add_chapter_numbers can be true, false or toconly
## (Note number is not added when there's only one chapter.)
add_chapter_numbers:false
## (Two versions of chapter_title_strip_pattern are shown below. You
@ -202,6 +211,17 @@ chapter_title_add_pattern:${index}. ${title}
## Each output format has a section that overrides [defaults]
[html]
## include images from img tags in the body and summary of
## stories. Images will be converted to jpg for size if possible.
## include_images is *only* available in epub and html output formats.
## include_images is *not* available in the web service in any format.
#include_images:false
## This switch prevents FFDL from doing any processing on the images.
## Usually they would be converted to jpg, resized and optionally made
## grayscale.
no_image_processing: true
## output background color--only used by html and epub (and ignored in
## epub by many readers). Included below in output_css--will be
## ignored if not in output_css.
@ -242,13 +262,17 @@ windows_eol: true
## mobi generated from epub by calibre will have a TOC at the end.
include_tocpage: false
## include a Update Log page before the story text. If included, the
## log will be updated each time the epub is an all the metadata
## include a Update Log page before the story text. If 'true', the
## log will be updated each time the epub is and all the metadata
## fields that have changed since the last update (typically
## dateUpdated,numChapters,numWords at a minimum) will be shown.
## Great for tracking when chapters came out and when the description,
## etc changed.
include_logpage: false
## If set to 'smart', logpage will only be included if the story is
## status:In-Progress or already had a logpage. That way you don't
## end up with Completed stories that have just one logpage entry.
#include_logpage: smart
## items to include in the log page Empty metadata entries, or those
## that haven't changed since the last update, will *not* appear, even
@ -301,6 +325,7 @@ output_css:
## include images from img tags in the body and summary of
## stories. Images will be converted to jpg for size if possible.
## include_images is *only* available in epub and html output format.
#include_images:false
## If set, the first image found will be made the cover image. If
@ -355,31 +380,31 @@ nook_img_fix:true
## URLs like: http://test1.com?sid=12345
[test1.com]
extratags: FanFiction,Testing
extracategories:Fafner
extragenres:Romance,Fluff
extracharacters:Reginald Smythe-Smythe,Mokona,Harry P.
extraships:Smythe-Smythe/Mokona
extrawarnings:Extreme Bogosity
# extracategories:Fafner
# extragenres:Romance,Fluff
# extracharacters:Reginald Smythe-Smythe,Mokona,Harry P.
# extraships:Smythe-Smythe/Mokona
# extrawarnings:Extreme Bogosity
extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
# extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
include_in_compositeJ:dateCreated
include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ
include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated,
# include_in_compositeJ:dateCreated
# include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ
# include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated,
extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
extra_subject_tags: metaA,metaB,metaC
# extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
# extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
# extra_subject_tags: metaA,metaB,metaC
replace_metadata:
compositeL=>Val=>VALUE
series,extratags=>Test=>Plan
Puella Magi Madoka Magica.* => Madoka
Comedy=>Humor
Crossover: (.*)=>\1
(.*)Great(.*)=>\1Moderate\2
.*-Centered=>
characters=>Harry P\.=>Harry Potter
# replace_metadata:
# compositeL=>Val=>VALUE
# series,extratags=>Test=>Plan
# Puella Magi Madoka Magica.* => Madoka
# Comedy=>Humor
# Crossover: (.*)=>\1
# (.*)Great(.*)=>\1Moderate\2
# .*-Centered=>
# characters=>Harry P\.=>Harry Potter
## If necessary, you can define [<site>:<format>] sections to
@ -557,9 +582,6 @@ cliches_label:Character Cliches
# themes=>#bcolumn,a
# timeline=>#ccolumn,n
## adds to include_subject_tags instead of replacing it.
#extra_subject_tags: themes,timeline,cliches
[erosnsappho.sycophanthex.com]
## Site dedicated to these categories/characters/ships
extracategories:Harry Potter
@ -1015,6 +1037,15 @@ extracategories:Harry Potter
## Site dedicated to these categories/characters/ships
extracategories:Prison Break
[www.qaf-fic.com]
## Site dedicated to these categories/characters/ships
extracategories:Queer as Folk
## Some sites do not require a login, but do require the user to
## confirm they are adult for adult content. In commandline version,
## this should go in your personal.ini, not defaults.ini.
#is_adult:true
[www.scarvesandcoffee.net]
## Site dedicated to these categories/characters/ships
extracategories:Glee