Redo logging so it's more consistent across web/CLI/PI.

This commit is contained in:
Jim Miller 2012-10-14 14:01:27 -05:00
parent 4d96632b67
commit a0f16425d2
84 changed files with 463 additions and 365 deletions

View file

@ -14,7 +14,6 @@ from StringIO import StringIO
from calibre.utils.ipc.server import Server
from calibre.utils.ipc.job import ParallelJob
from calibre.utils.logging import Log
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
@ -109,9 +108,6 @@ def do_download_for_worker(book,options):
when run as a worker job
'''
try:
# import logging
# logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
book['comment'] = 'Download started...'
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])

View file

@ -16,6 +16,11 @@
#
import logging
# suppresses default logger. Logging is setup in fanficdownload/__init__.py so it works in calibre, too.
logger = logging.getLogger()
loghandler=logging.NullHandler()
loghandler.setFormatter(logging.Formatter("(=====)(levelname)s:%(message)s"))
logger.addHandler(loghandler)
import sys, os
from os.path import normpath, expanduser, isfile, join
@ -79,11 +84,9 @@ def main():
(options, args) = parser.parse_args()
if options.debug:
logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
else:
logging.basicConfig(level=logging.INFO,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
if not options.debug:
logger = logging.getLogger("fanficdownloader")
logger.setLevel(logging.INFO)
if len(args) != 1:
parser.error("incorrect number of arguments")
@ -120,8 +123,6 @@ def main():
logging.debug('reading %s config file(s), if present'%conflist)
configuration.read(conflist)
print("has include_in_tags?%s"%configuration.hasConfig("include_in_tags"))
try:
configuration.add_section("overrides")
except ConfigParser.DuplicateSectionError:

View file

@ -1 +1,15 @@
# -*- coding: utf-8 -*-
try:
# just a way to switch between web service and CLI/PI
import google.appengine.api
except:
import logging
logger = logging.getLogger(__name__)
loghandler=logging.StreamHandler()
loghandler.setFormatter(logging.Formatter("FFDL:%(levelname)s:%(filename)s(%(lineno)d):%(message)s"))
logger.addHandler(loghandler)
loghandler.setLevel(logging.DEBUG)
logger.setLevel(logging.DEBUG)

View file

@ -20,6 +20,8 @@ from os.path import dirname, basename, normpath
import logging
import urlparse as up
logger = logging.getLogger(__name__)
from .. import exceptions as exceptions
## must import each adapter here.
@ -121,9 +123,9 @@ for x in imports():
def getAdapter(config,url):
logging.debug("trying url:"+url)
logger.debug("trying url:"+url)
(cls,fixedurl) = getClassFor(url)
logging.debug("fixedurl:"+fixedurl)
logger.debug("fixedurl:"+fixedurl)
if cls:
adapter = cls(config,fixedurl) # raises InvalidStoryURL
return adapter
@ -161,11 +163,11 @@ def getClassFor(url):
cls = getClassFromList(domain)
if not cls and domain.startswith("www."):
domain = domain.replace("www.","")
logging.debug("trying site:without www: "+domain)
logger.debug("trying site:without www: "+domain)
cls = getClassFromList(domain)
fixedurl = fixedurl.replace("http://www.","http://")
if not cls:
logging.debug("trying site:www."+domain)
logger.debug("trying site:www."+domain)
cls = getClassFromList("www."+domain)
fixedurl = fixedurl.replace("http://","http://www.")

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -41,7 +42,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -65,7 +66,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -204,7 +205,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -30,6 +31,8 @@ def getClass():
return ArchiveOfOurOwnOrgAdapter
logger = logging.getLogger(__name__)
class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
def __init__(self, config, url):
@ -48,13 +51,13 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# get storyId from url--url validation guarantees query correct
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('storyId',m.group('id'))
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId'))
else:
@ -104,14 +107,14 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
params['authenticity_token'] = data.split('input name="authenticity_token" type="hidden" value="')[1].split('" /></div>')[0]
loginUrl = 'http://' + self.getSiteDomain() + '/user_sessions'
logging.info("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
params['user_session[login]']))
d = self._postUrl(loginUrl, params)
#logging.info(d)
#logger.info(d)
if "Successfully logged in" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['user_session[login]']))
raise exceptions.FailedToLogin(url,params['user_session[login]'])
return False
@ -128,8 +131,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
metaurl = self.url+addurl
url = self.url+'/navigate'+addurl
logging.info("url: "+url)
logging.info("metaurl: "+metaurl)
logger.info("url: "+url)
logger.info("metaurl: "+metaurl)
try:
data = self._fetchUrl(url)
@ -175,7 +178,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
# Find the chapters:
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+"/chapters/\d+$"))
self.story.setMetadata('numChapters',len(chapters))
logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
for x in range(0,len(chapters)):
# just in case there's tags, like <i> in chapter titles.
chapter=chapters[x]
@ -292,7 +295,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
chapter=bs.BeautifulSoup('<div class="story"></div>')
data = self._fetchUrl(url)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -48,7 +49,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
@ -78,7 +79,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -151,7 +152,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
rating.find('br').replaceWith('split')
rating=rating.text.replace("This story is rated",'').split('split')[0]
self.story.setMetadata('rating',rating)
logging.debug(self.story.getMetadata('rating'))
logger.debug(self.story.getMetadata('rating'))
warnings=box.find('ol')
if warnings != None:
@ -177,7 +178,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -94,13 +95,13 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Logout" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -112,7 +113,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -237,7 +238,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -69,7 +70,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -115,13 +116,13 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -148,7 +149,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -182,7 +183,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -320,7 +321,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -71,7 +72,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -117,13 +118,13 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/fanfic/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -145,7 +146,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -293,7 +294,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -86,7 +87,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -221,7 +222,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self._setURL('http://' + self.getSiteDomain() + '/story.php?story='+self.story.getMetadata('storyId'))
@ -95,7 +96,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
e = self._fetchUrl(url)
if "Welcome back," not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['name']))
raise exceptions.FailedToLogin(url,params['name'])
return False
@ -112,7 +113,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -222,7 +223,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -98,13 +99,13 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/elysian/user.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "User Account Page" not in d : #Member Account
logging.info("Failed to login to URL %s as %s, or have no authorization to access the story" % (loginUrl, params['penname']))
logger.info("Failed to login to URL %s as %s, or have no authorization to access the story" % (loginUrl, params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
else:
@ -115,7 +116,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -261,7 +262,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -86,7 +87,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -106,7 +107,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -227,7 +228,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class DokugaComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# www.dokuga.com has two 'sections', shown in URL as
# 'fanfiction' and 'spark' that change how things should be
@ -87,7 +88,7 @@ class DokugaComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -224,7 +225,7 @@ class DokugaComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -145,7 +146,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -282,7 +283,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class DramioneOrgAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -283,7 +284,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -86,7 +87,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -106,7 +107,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -239,7 +240,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
from urllib import unquote_plus
@ -75,12 +76,12 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# metadata and chapter list
url = self.origurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
# use BeautifulSoup HTML parser to make everything easier to find.
try:
data = self._fetchUrl(url)
#logging.debug("\n===================\n%s\n===================\n"%data)
#logger.debug("\n===================\n%s\n===================\n"%data)
soup = bs.BeautifulSoup(data)
except urllib2.HTTPError, e:
if e.code == 404:
@ -108,11 +109,11 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
tryurl = "http://%s/s/%s/%d/"%(self.getSiteDomain(),
self.story.getMetadata('storyId'),
chapcount+1)
logging.debug('=Trying newer chapter: %s' % tryurl)
logger.debug('=Trying newer chapter: %s' % tryurl)
newdata = self._fetchUrl(tryurl)
if "not found. Please check to see you are not using an outdated url." \
not in newdata:
logging.debug('=======Found newer chapter: %s' % tryurl)
logger.debug('=======Found newer chapter: %s' % tryurl)
soup = bs.BeautifulSoup(newdata)
except:
pass
@ -160,7 +161,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
metatext = stripHTML(gui_table1i.find('div', {'style':'color:gray;'})).replace('Hurt/Comfort','Hurt-Comfort')
metalist = metatext.split(" - ")
#logging.debug("metatext:(%s)"%metalist)
#logger.debug("metatext:(%s)"%metalist)
# Rated: Fiction K - English - Words: 158,078 - Published: 02-04-11
@ -176,9 +177,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
genrelist = metalist[0].split('/') # Hurt/Comfort already changed above.
goodgenres=True
for g in genrelist:
#logging.debug("g:(%s)"%g)
#logger.debug("g:(%s)"%g)
if g.strip() not in ffnetgenres:
logging.info("g not in ffnetgenres")
logger.info("g not in ffnetgenres")
goodgenres=False
if goodgenres:
self.story.extendList('genre',genrelist)
@ -240,7 +241,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
return
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
time.sleep(0.5) ## ffnet(and, I assume, fpcom) tends to fail
## more if hit too fast. This is in
## additional to what ever the
@ -265,7 +266,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
div = soup.find('div', {'id' : 'storytextp'})
if None == div:
logging.debug('div id=storytextp not found. data:%s'%data)
logger.debug('div id=storytextp not found. data:%s'%data)
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -48,7 +49,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1')
@ -94,12 +95,12 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
params['submit'] = 'Login...'
loginUrl = 'https://ssl.fanfiktion.de/'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['nickname']))
d = self._postUrl(loginUrl,params)
if "Login erfolgreich" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['nickname']))
raise exceptions.FailedToLogin(url,params['nickname'])
return False
@ -110,7 +111,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -183,7 +184,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
time.sleep(0.5) ## ffde has "floodlock" protection
soup = bs.BeautifulSoup(self._fetchUrl(url),

View file

@ -18,6 +18,7 @@
import time
import datetime
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
from .. import translit
@ -33,6 +34,8 @@ def getClass():
return FicBookNetAdapter
logger = logging.getLogger(__name__)
class FicBookNetAdapter(BaseSiteAdapter):
def __init__(self, config, url):
@ -49,7 +52,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/readfic/'+self.story.getMetadata('storyId'))
@ -75,7 +78,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
url=self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
except urllib2.HTTPError, e:
@ -95,14 +98,14 @@ class FicBookNetAdapter(BaseSiteAdapter):
## Title
a = soup.find('h1')
self.story.setMetadata('title',stripHTML(a))
logging.debug("Title: (%s)"%self.story.getMetadata('title'))
logger.debug("Title: (%s)"%self.story.getMetadata('title'))
# Find authorid and URL from... author url.
a = table.find('a')
self.story.setMetadata('authorId',a.text) # Author's name is unique
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.text)
logging.debug("Author: (%s)"%self.story.getMetadata('author'))
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
# Find the chapters:
chapters = soup.find('div', {'class' : 'part_list'})
@ -123,7 +126,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
pubdate=translit.translit(stripHTML(soup.find('div', {'class' : 'part_added'}).find('span')))
update=pubdate
logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
if not ',' in pubdate:
pubdate=datetime.date.today().strftime(self.dateformat)
@ -207,7 +210,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -44,7 +45,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
if m:
self.story.setMetadata('authorId',m.group('auth'))
self.story.setMetadata('storyId',m.group('id'))
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL(url)
else:
@ -68,7 +69,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
if self.is_adult or self.getConfig("is_adult"):
params={'iamold':'Yes',
'action':'ageanswer'}
logging.info("Attempting to get cookie for %s" % url)
logger.info("Attempting to get cookie for %s" % url)
## posting on list doesn't work, but doesn't hurt, either.
data = self._postUrl(url,params)
else:
@ -79,7 +80,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
## could be either chapter list page or one-shot text page.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._postFetchWithIAmOld(url)
@ -107,7 +108,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
self._setURL(storya['href'])
url=self.url
logging.debug("Normalizing to URL: "+url)
logger.debug("Normalizing to URL: "+url)
## title's right there...
self.story.setMetadata('title',storya.string)
data = self._fetchUrl(url)
@ -193,7 +194,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# find <!-- headerend --> & <!-- footerstart --> and

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import time

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import time
@ -61,12 +62,12 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
params['password'] = self.getConfig("password")
loginUrl = 'http://' + self.getSiteDomain() + '/account/login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['username']))
d = self._postUrl(loginUrl,params)
if "Login attempt failed..." in d:
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['username']))
raise exceptions.FailedToLogin(url,params['username'])
return False
@ -79,7 +80,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# metadata and chapter list
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
# use BeautifulSoup HTML parser to make everything easier to find.
try:
@ -96,7 +97,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# normalize story URL on chapter list.
self.story.setMetadata('storyId',storya['href'].split('/',)[2])
url = "http://"+self.getSiteDomain()+storya['href']
logging.debug("Normalizing to URL: "+url)
logger.debug("Normalizing to URL: "+url)
self._setURL(url)
try:
soup = bs.BeautifulSoup(self._fetchUrl(url))
@ -201,7 +202,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import cookielib as cl
@ -175,7 +176,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'id' : 'chapter_container'})
if soup == None:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -49,7 +50,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2].split(':')[0])
if 'storyInfo' in self.story.getMetadata('storyId'):
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/s/storyInfo.php?id='+self.story.getMetadata('storyId'))
@ -95,13 +96,13 @@ class FineStoriesComAdapter(BaseSiteAdapter):
params['submit'] = 'Login'
loginUrl = 'http://' + self.getSiteDomain() + '/login.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['theusername']))
d = self._fetchUrl(loginUrl, params)
if "My Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['theusername']))
raise exceptions.FailedToLogin(url,params['theusername'])
return False
@ -114,7 +115,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -232,7 +233,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self.section=self.parsedUrl.path.split('/',)[1]
# normalized story URL.
@ -104,13 +105,13 @@ class GrangerEnchantedCom(BaseSiteAdapter):
loginUrl = 'http://grangerenchanted.com/enchant/user.php?action=login'
else:
loginUrl = 'http://malfoymanor.grangerenchanted.com/themanor/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -132,7 +133,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -157,7 +158,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -284,7 +285,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -41,7 +42,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only psid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId'))
@ -72,7 +73,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -181,7 +182,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
## most adapters use BeautifulStoneSoup here, but non-Stone
## allows nested div tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -215,7 +216,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /eff part. Replace all to remove it usually.
@ -79,7 +80,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -208,7 +209,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# There's no good wrapper around the chapter text. :-/

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -202,7 +203,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class IkEternalNetAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -158,7 +159,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -267,7 +268,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -99,7 +100,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -132,7 +133,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -294,7 +295,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
soup = bs.BeautifulStoneSoup(data,

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -48,7 +49,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/a/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -84,7 +85,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -104,7 +105,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -234,7 +235,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -86,7 +87,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -221,7 +222,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -42,7 +43,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('storyId',m.group('id'))
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/fanfic/view_st.php/'+self.story.getMetadata('storyId'))
else:
@ -66,7 +67,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -188,7 +189,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data=self._fetchUrl(url)
soup = bs.BeautifulStoneSoup(data,
@ -214,7 +215,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
return self.utf8FromSoup(url,anchor)
else:
logging.debug('Using kludgey text find for older mediaminer story.')
logger.debug('Using kludgey text find for older mediaminer story.')
## Some older mediaminer stories are unparsable with BeautifulSoup.
## Really nasty formatting. Sooo... Cheat! Parse it ourselves a bit first.
## Story stuff falls between:

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -145,7 +146,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -277,7 +278,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -95,7 +96,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -128,7 +129,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -269,7 +270,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
soup = bs.BeautifulStoneSoup(data,

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -96,13 +97,13 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login&sid='+self.story.getMetadata('storyId')
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -125,7 +126,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -164,7 +165,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -315,7 +316,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only storyid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId'))
@ -80,7 +81,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -177,7 +178,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only storyid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId'))
@ -80,7 +81,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -184,7 +185,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL("http://"+self.getSiteDomain()\
@ -78,7 +79,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -187,7 +188,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -99,7 +100,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -132,7 +133,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -273,7 +274,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -74,7 +75,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -197,7 +198,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
@ -94,13 +95,13 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Logout" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -112,7 +113,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -245,7 +246,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = data.replace('<div align="left"', '<div align="left">')

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -97,13 +98,13 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -125,7 +126,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -253,7 +254,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2, urllib, cookielib
@ -47,7 +48,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/')
@ -90,12 +91,12 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
params['login'] = 'Login'
loginUrl = 'http://' + self.getSiteDomain() + '/users/processlogin.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['txtusername']))
d = self._fetchUrl(loginUrl, params)
if 'Please login to continue.' in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['txtusername']))
raise exceptions.FailedToLogin(url,params['txtusername'])
return False
@ -108,7 +109,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -208,7 +209,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,8 +48,8 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logging.info(self.parsedUrl.netloc)
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.info(self.parsedUrl.netloc)
# normalized story URL.
if "explicit" in self.parsedUrl.netloc:
self._setURL('http://explicit.' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -91,7 +92,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -112,7 +113,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -234,7 +235,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import cookielib as cl
@ -54,7 +55,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/story/'+self.story.getMetadata('storyId'))
@ -81,7 +82,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
# portkey screws around with using a different URL to set the
# cookie and it's a pain. So... cheat!
@ -247,7 +248,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
soup = bs.BeautifulStoneSoup(data,
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -40,7 +41,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -63,7 +64,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -191,7 +192,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/missingpieces/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -89,7 +90,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -109,7 +110,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -237,7 +238,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -202,7 +203,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class QafFicComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/atp/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -86,7 +87,7 @@ class QafFicComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -106,7 +107,7 @@ class QafFicComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -248,7 +249,7 @@ class QafFicComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -80,7 +81,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -216,7 +217,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -94,7 +95,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -114,7 +115,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -231,7 +232,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self.section=self.parsedUrl.path.split('/',)[1]
# normalized story URL.
@ -95,7 +96,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -115,7 +116,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -242,7 +243,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -85,7 +86,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -105,7 +106,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -235,7 +236,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -48,7 +49,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/siye/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -82,7 +83,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
# sites skip that for one-chapter stories.
# Except it doesn't this time. :-/
url = self.url #+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -224,7 +225,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
# soup = bs.BeautifulSoup(self._fetchUrl(url))
# BeautifulSoup objects to <p> inside <span>, which

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -58,7 +59,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/peja/cgi-bin/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -90,7 +91,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -223,7 +224,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -213,7 +214,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/chapterlistview.asp?SID='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -133,7 +134,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class SVUFictionComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -145,7 +146,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -258,7 +259,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -43,7 +44,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -81,13 +82,13 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -102,7 +103,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -116,7 +117,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
# need to log in for this one.
addurl = "&ageconsent=ok&warning=4"
url = self.url+'&index=1'+addurl
logging.debug("Changing URL: "+url)
logger.debug("Changing URL: "+url)
self.performLogin(url)
data = self._fetchUrl(url)
@ -229,7 +230,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -18,6 +18,7 @@
import datetime
import time
import logging
logger = logging.getLogger(__name__)
from .. import BeautifulSoup as bs
from .. import exceptions
@ -48,7 +49,7 @@ class TestSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
if self.story.getMetadata('storyId') == '665' and not (self.is_adult or self.getConfig("is_adult")):
logging.warn("self.is_adult:%s"%self.is_adult)
logger.warn("self.is_adult:%s"%self.is_adult)
raise exceptions.AdultCheckRequired(self.url)
if self.story.getMetadata('storyId') == '666':
@ -184,7 +185,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
if self.story.getMetadata('storyId') == '667':
raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url)

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -76,7 +77,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -198,7 +199,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -80,7 +81,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -182,7 +183,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr','img')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -71,7 +72,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@ -117,13 +118,13 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/CriminalMinds/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -145,7 +146,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -292,7 +293,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
if self.parsedUrl.path.split('/',)[1] == 'wiktt':
self.story.addToList("category","Harry Potter")
@ -99,13 +100,13 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + self.section + 'user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -127,7 +128,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -152,7 +153,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -258,7 +259,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId') +'&i=1')
@ -91,13 +92,13 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "My Account Page" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -110,7 +111,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -223,7 +224,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -51,7 +52,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the part. Replace all to remove it usually.
@ -101,13 +102,13 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -129,7 +130,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -272,7 +273,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -43,7 +44,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/library/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -81,13 +82,13 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/library/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -102,7 +103,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -235,7 +236,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import time
@ -40,7 +41,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('storyId',m.group('id'))
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL("http://"+self.getSiteDomain()\
+"/Story-"+self.story.getMetadata('storyId'))
@ -81,7 +82,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
return
loginUrl = 'http://' + self.getSiteDomain() + '/login.php'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['urealname']))
## need to pull empty login page first to get ctkn and
@ -98,7 +99,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
d = self._fetchUrl(loginUrl, params)
if "Stories Published" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -110,7 +111,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
# metadata and chapter list
url=self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
# tth won't send you future updates if you aren't 'caught up'
# on the story. Login isn't required for F21, but logging in will
@ -137,7 +138,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
form = soup.find('form', {'id':'sitemaxratingform'})
params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
'sitemaxrating':'5'}
logging.info("Attempting to get rating cookie for %s" % url)
logger.info("Attempting to get rating cookie for %s" % url)
data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
# refetch story page.
data = self._fetchUrl(url)
@ -158,7 +159,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
try:
# going to pull part of the meta data from author list page.
infourl = 'http://'+self.host+ainfo['href']
logging.debug("**StoryInfo** URL: "+infourl)
logger.debug("**StoryInfo** URL: "+infourl)
infodata = self._fetchUrl(infourl)
infosoup = bs.BeautifulSoup(infodata)
@ -175,14 +176,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
try:
# going to pull part of the meta data from *primary* author list page.
logging.debug("**AUTHOR** URL: "+authorurl)
logger.debug("**AUTHOR** URL: "+authorurl)
authordata = self._fetchUrl(authorurl)
descurl=authorurl
authorsoup = bs.BeautifulSoup(authordata)
# author can have several pages, scan until we find it.
while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ):
nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href']
logging.debug("**AUTHOR** nextpage URL: "+nextpage)
logger.debug("**AUTHOR** nextpage URL: "+nextpage)
authordata = self._fetchUrl(nextpage)
descurl=nextpage
authorsoup = bs.BeautifulSoup(authordata)
@ -259,7 +260,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url))
div = soup.find('div', {'id' : 'storyinnerbody'})

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL. http://www.twilightarchives.com/read/9353
self._setURL('http://' + self.getSiteDomain() + '/read/'+self.story.getMetadata('storyId'))
@ -77,7 +78,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -172,7 +173,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -42,7 +43,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -83,13 +84,13 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -99,7 +100,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -225,7 +226,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@ -43,7 +44,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -84,13 +85,13 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.info("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -109,7 +110,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -252,7 +253,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -45,7 +46,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -84,7 +85,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -215,7 +216,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -56,7 +57,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# - get chapter list, if not one-shot.
url = self.url+'&chapter=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
# use BeautifulSoup HTML parser to make everything easier to find.
try:
@ -69,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# pull title(title) and author from the HTML title.
title = soup.find('title').string
logging.debug('Title: %s' % title)
logger.debug('Title: %s' % title)
title = title.split('::')[1].strip()
self.story.setMetadata('title',title.split(' by ')[0].strip())
self.story.setMetadata('author',title.split(' by ')[1].strip())
@ -109,7 +110,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# <i>Published:</i> 2010.08.15 - <i>Updated:</i> 2010.08.16 - <i>Chapters:</i> 4 - <i>Completed:</i> Yes - <i>Word Count:</i> 4890 </font>
# </td></tr></table>
logging.debug("Author URL: "+self.story.getMetadata('authorUrl'))
logger.debug("Author URL: "+self.story.getMetadata('authorUrl'))
soup = bs.BeautifulStoneSoup(self._fetchUrl(self.story.getMetadata('authorUrl')),
selfClosingTags=('br')) # normalize <br> tags to <br />
# find this story in the list, parse it's metadata based on
@ -212,7 +213,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,13 +93,13 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl,
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@ -120,7 +121,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -145,7 +146,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# correct stupid &amp; error in url.
addurl = addurl.replace("&amp;","&")
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -286,7 +287,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -77,7 +78,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -203,7 +204,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -48,7 +49,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -86,7 +87,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -209,7 +210,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url))

View file

@ -17,6 +17,7 @@
import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -54,7 +55,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -92,7 +93,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -126,7 +127,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# explicitly put ageconsent because google appengine regexp doesn't include it for some reason.
addurl = addurl.replace("&amp;","&")+'&ageconsent=ok'
url = self.url+'&index=1'+addurl
logging.debug("URL 2nd try: "+url)
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@ -147,7 +148,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# while len(loopdata) > 0:
# if len(loopdata) < 5000:
# chklen = len(loopdata)
# logging.info("loopdata: %s" % loopdata[:chklen])
# logger.info("loopdata: %s" % loopdata[:chklen])
# loopdata = loopdata[chklen:]
# use BeautifulSoup HTML parser to make everything easier to find.
@ -270,7 +271,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.

View file

@ -27,6 +27,8 @@ from functools import partial
from .. import BeautifulSoup as bs
from ..htmlcleanup import stripHTML
logger = logging.getLogger(__name__)
try:
from google.appengine.api import apiproxy_stub_map
def urlfetch_timeout_hook(service, call, request, response):
@ -38,10 +40,10 @@ try:
apiproxy_stub_map.apiproxy.GetPreCallHooks().Append(
'urlfetch_timeout_hook', urlfetch_timeout_hook, 'urlfetch')
logging.info("Hook to make default deadline 10.0 installed.")
logger.info("Hook to make default deadline 10.0 installed.")
except:
pass
#logging.info("Hook to make default deadline 10.0 NOT installed--not using appengine")
#logger.info("Hook to make default deadline 10.0 NOT installed--not using appengine")
from ..story import Story
from ..gziphttp import GZipProcessor
@ -125,7 +127,7 @@ class BaseSiteAdapter(Configurable):
#print code
if code == "auto":
if not chardet:
logging.info("chardet not available, skipping 'auto' encoding")
logger.info("chardet not available, skipping 'auto' encoding")
continue
detected = chardet.detect(data)
#print detected
@ -133,12 +135,11 @@ class BaseSiteAdapter(Configurable):
code=detected['encoding']
else:
continue
logging.debug("try code:"+code)
return data.decode(code)
except:
logging.debug("code failed:"+code)
logger.debug("code failed:"+code)
pass
logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
return "".join([x for x in data if ord(x) < 128])
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
@ -175,10 +176,10 @@ class BaseSiteAdapter(Configurable):
return self._decode(self._fetchUrlRaw(url,parameters))
except Exception, e:
excpt=e
logging.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
logging.error("Giving up on %s" %url)
logging.exception(excpt)
logger.error("Giving up on %s" %url)
logger.exception(excpt)
raise(excpt)
# Limit chapters to download. Input starts at 1, list starts at 0
@ -356,7 +357,6 @@ class BaseSiteAdapter(Configurable):
def cachedfetch(realfetch,cache,url):
if url in cache:
print("cache hit")
return cache[url]
else:
return realfetch(url)

View file

@ -27,6 +27,8 @@ import logging
from ..configurable import Configurable
from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML
logger = logging.getLogger(__name__)
class BaseStoryWriter(Configurable):
@staticmethod
@ -137,11 +139,11 @@ class BaseStoryWriter(Configurable):
if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label")
elif entry in self.titleLabels:
logging.debug("Using fallback label for %s_label"%entry)
logger.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry]
else:
label="%s"%entry.title()
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
logger.debug("No known label for %s, fallback to '%s'"%(entry,label))
# If the label for the title entry is empty, use the
# 'no title' option if there is one.
@ -202,7 +204,7 @@ class BaseStoryWriter(Configurable):
if not outstream:
close=True
logging.info("Save directly to file: %s" % outfilename)
logger.info("Save directly to file: %s" % outfilename)
if self.getConfig('make_directories'):
path=""
outputdirs = os.path.dirname(outfilename).split('/')
@ -229,7 +231,7 @@ class BaseStoryWriter(Configurable):
outstream = open(outfilename,"wb")
else:
close=False
logging.debug("Save to stream")
logger.debug("Save to stream")
if not metaonly:
self.story = self.adapter.getStory() # get full story now,
@ -256,7 +258,7 @@ class BaseStoryWriter(Configurable):
outstream.close()
def writeFile(self, filename, data):
logging.debug("writeFile:%s"%filename)
logger.debug("writeFile:%s"%filename)
if self.getConfig('zip_output'):
outputdirs = os.path.dirname(self.getBaseFileName())

View file

@ -29,6 +29,8 @@ from xml.dom.minidom import parse, parseString, getDOMImplementation
from base_writer import *
from ..htmlcleanup import stripHTML
logger = logging.getLogger(__name__)
class EpubWriter(BaseStoryWriter):
@staticmethod
@ -205,8 +207,8 @@ div { margin: 0pt; padding: 0pt; }
# if there's a self.story.logfile, there's an existing log
# to add to.
if self.story.logfile:
logging.debug("existing logfile found, appending")
logging.debug("existing data:%s"%self._getLastLogData(self.story.logfile))
logger.debug("existing logfile found, appending")
logger.debug("existing data:%s"%self._getLastLogData(self.story.logfile))
replace_string = "</body>" # "</h3>"
self._write(out,self.story.logfile.replace(replace_string,self._makeLogEntry(self._getLastLogData(self.story.logfile))+replace_string))
else:
@ -260,11 +262,11 @@ div { margin: 0pt; padding: 0pt; }
if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label")
elif entry in self.titleLabels:
logging.debug("Using fallback label for %s_label"%entry)
logger.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry]
else:
label="%s"%entry.title()
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
logger.debug("No known label for %s, fallback to '%s'"%(entry,label))
retval = retval + ENTRY.substitute({'id':entry,
'label':label,
@ -415,7 +417,7 @@ div { margin: 0pt; padding: 0pt; }
coverimgid = "image0000"
if not self.story.cover and self.story.oldcover:
print("writer_epub: no new cover, has old cover, write image.")
logger.debug("writer_epub: no new cover, has old cover, write image.")
(oldcoverhtmlhref,
oldcoverhtmltype,
oldcoverhtmldata,
@ -646,7 +648,7 @@ div { margin: 0pt; padding: 0pt; }
for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
logger.debug('Writing chapter text for: %s' % title)
vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals)
# ffnet(& maybe others) gives the whole chapter text