mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-30 12:42:30 +01:00
Redo logging so it's more consistent across web/CLI/PI.
This commit is contained in:
parent
4d96632b67
commit
a0f16425d2
84 changed files with 463 additions and 365 deletions
|
|
@ -14,7 +14,6 @@ from StringIO import StringIO
|
|||
|
||||
from calibre.utils.ipc.server import Server
|
||||
from calibre.utils.ipc.job import ParallelJob
|
||||
from calibre.utils.logging import Log
|
||||
|
||||
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
|
||||
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
|
||||
|
|
@ -109,9 +108,6 @@ def do_download_for_worker(book,options):
|
|||
when run as a worker job
|
||||
'''
|
||||
try:
|
||||
# import logging
|
||||
# logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
|
||||
|
||||
book['comment'] = 'Download started...'
|
||||
|
||||
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
|
||||
|
|
|
|||
|
|
@ -16,6 +16,11 @@
|
|||
#
|
||||
|
||||
import logging
|
||||
# suppresses default logger. Logging is setup in fanficdownload/__init__.py so it works in calibre, too.
|
||||
logger = logging.getLogger()
|
||||
loghandler=logging.NullHandler()
|
||||
loghandler.setFormatter(logging.Formatter("(=====)(levelname)s:%(message)s"))
|
||||
logger.addHandler(loghandler)
|
||||
|
||||
import sys, os
|
||||
from os.path import normpath, expanduser, isfile, join
|
||||
|
|
@ -79,11 +84,9 @@ def main():
|
|||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if options.debug:
|
||||
logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
|
||||
|
||||
if not options.debug:
|
||||
logger = logging.getLogger("fanficdownloader")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
if len(args) != 1:
|
||||
parser.error("incorrect number of arguments")
|
||||
|
|
@ -120,8 +123,6 @@ def main():
|
|||
logging.debug('reading %s config file(s), if present'%conflist)
|
||||
configuration.read(conflist)
|
||||
|
||||
print("has include_in_tags?%s"%configuration.hasConfig("include_in_tags"))
|
||||
|
||||
try:
|
||||
configuration.add_section("overrides")
|
||||
except ConfigParser.DuplicateSectionError:
|
||||
|
|
|
|||
|
|
@ -1 +1,15 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
try:
|
||||
# just a way to switch between web service and CLI/PI
|
||||
import google.appengine.api
|
||||
except:
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
loghandler=logging.StreamHandler()
|
||||
loghandler.setFormatter(logging.Formatter("FFDL:%(levelname)s:%(filename)s(%(lineno)d):%(message)s"))
|
||||
logger.addHandler(loghandler)
|
||||
loghandler.setLevel(logging.DEBUG)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ from os.path import dirname, basename, normpath
|
|||
import logging
|
||||
import urlparse as up
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
## must import each adapter here.
|
||||
|
|
@ -121,9 +123,9 @@ for x in imports():
|
|||
|
||||
def getAdapter(config,url):
|
||||
|
||||
logging.debug("trying url:"+url)
|
||||
logger.debug("trying url:"+url)
|
||||
(cls,fixedurl) = getClassFor(url)
|
||||
logging.debug("fixedurl:"+fixedurl)
|
||||
logger.debug("fixedurl:"+fixedurl)
|
||||
if cls:
|
||||
adapter = cls(config,fixedurl) # raises InvalidStoryURL
|
||||
return adapter
|
||||
|
|
@ -161,11 +163,11 @@ def getClassFor(url):
|
|||
cls = getClassFromList(domain)
|
||||
if not cls and domain.startswith("www."):
|
||||
domain = domain.replace("www.","")
|
||||
logging.debug("trying site:without www: "+domain)
|
||||
logger.debug("trying site:without www: "+domain)
|
||||
cls = getClassFromList(domain)
|
||||
fixedurl = fixedurl.replace("http://www.","http://")
|
||||
if not cls:
|
||||
logging.debug("trying site:www."+domain)
|
||||
logger.debug("trying site:www."+domain)
|
||||
cls = getClassFromList("www."+domain)
|
||||
fixedurl = fixedurl.replace("http://","http://www.")
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -41,7 +42,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -65,7 +66,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -204,7 +205,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -30,6 +31,8 @@ def getClass():
|
|||
return ArchiveOfOurOwnOrgAdapter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
|
|
@ -48,13 +51,13 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
|
|
@ -104,14 +107,14 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
params['authenticity_token'] = data.split('input name="authenticity_token" type="hidden" value="')[1].split('" /></div>')[0]
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user_sessions'
|
||||
logging.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['user_session[login]']))
|
||||
|
||||
d = self._postUrl(loginUrl, params)
|
||||
#logging.info(d)
|
||||
#logger.info(d)
|
||||
|
||||
if "Successfully logged in" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['user_session[login]']))
|
||||
raise exceptions.FailedToLogin(url,params['user_session[login]'])
|
||||
return False
|
||||
|
|
@ -128,8 +131,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
metaurl = self.url+addurl
|
||||
url = self.url+'/navigate'+addurl
|
||||
logging.info("url: "+url)
|
||||
logging.info("metaurl: "+metaurl)
|
||||
logger.info("url: "+url)
|
||||
logger.info("metaurl: "+metaurl)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -175,7 +178,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+"/chapters/\d+$"))
|
||||
self.story.setMetadata('numChapters',len(chapters))
|
||||
logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
for x in range(0,len(chapters)):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
chapter=chapters[x]
|
||||
|
|
@ -292,7 +295,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
chapter=bs.BeautifulSoup('<div class="story"></div>')
|
||||
data = self._fetchUrl(url)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -78,7 +79,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -151,7 +152,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
rating.find('br').replaceWith('split')
|
||||
rating=rating.text.replace("This story is rated",'').split('split')[0]
|
||||
self.story.setMetadata('rating',rating)
|
||||
logging.debug(self.story.getMetadata('rating'))
|
||||
logger.debug(self.story.getMetadata('rating'))
|
||||
|
||||
warnings=box.find('ol')
|
||||
if warnings != None:
|
||||
|
|
@ -177,7 +178,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -94,13 +95,13 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Logout" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -112,7 +113,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -237,7 +238,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -69,7 +70,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -115,13 +116,13 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -148,7 +149,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -182,7 +183,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -320,7 +321,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -71,7 +72,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -117,13 +118,13 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fanfic/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -145,7 +146,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -293,7 +294,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -86,7 +87,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -221,7 +222,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story.php?story='+self.story.getMetadata('storyId'))
|
||||
|
||||
|
|
@ -95,7 +96,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
|
|||
e = self._fetchUrl(url)
|
||||
|
||||
if "Welcome back," not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['name']))
|
||||
raise exceptions.FailedToLogin(url,params['name'])
|
||||
return False
|
||||
|
|
@ -112,7 +113,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -222,7 +223,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -98,13 +99,13 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/elysian/user.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "User Account Page" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s, or have no authorization to access the story" % (loginUrl, params['penname']))
|
||||
logger.info("Failed to login to URL %s as %s, or have no authorization to access the story" % (loginUrl, params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
|
|
@ -115,7 +116,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -261,7 +262,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -86,7 +87,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -106,7 +107,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -227,7 +228,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# www.dokuga.com has two 'sections', shown in URL as
|
||||
# 'fanfiction' and 'spark' that change how things should be
|
||||
|
|
@ -87,7 +88,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -224,7 +225,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -145,7 +146,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -282,7 +283,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -283,7 +284,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -86,7 +87,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -106,7 +107,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -239,7 +240,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
from urllib import unquote_plus
|
||||
|
|
@ -75,12 +76,12 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# metadata and chapter list
|
||||
|
||||
url = self.origurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
#logging.debug("\n===================\n%s\n===================\n"%data)
|
||||
#logger.debug("\n===================\n%s\n===================\n"%data)
|
||||
soup = bs.BeautifulSoup(data)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
|
|
@ -108,11 +109,11 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
tryurl = "http://%s/s/%s/%d/"%(self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
chapcount+1)
|
||||
logging.debug('=Trying newer chapter: %s' % tryurl)
|
||||
logger.debug('=Trying newer chapter: %s' % tryurl)
|
||||
newdata = self._fetchUrl(tryurl)
|
||||
if "not found. Please check to see you are not using an outdated url." \
|
||||
not in newdata:
|
||||
logging.debug('=======Found newer chapter: %s' % tryurl)
|
||||
logger.debug('=======Found newer chapter: %s' % tryurl)
|
||||
soup = bs.BeautifulSoup(newdata)
|
||||
except:
|
||||
pass
|
||||
|
|
@ -160,7 +161,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
metatext = stripHTML(gui_table1i.find('div', {'style':'color:gray;'})).replace('Hurt/Comfort','Hurt-Comfort')
|
||||
metalist = metatext.split(" - ")
|
||||
#logging.debug("metatext:(%s)"%metalist)
|
||||
#logger.debug("metatext:(%s)"%metalist)
|
||||
|
||||
# Rated: Fiction K - English - Words: 158,078 - Published: 02-04-11
|
||||
|
||||
|
|
@ -176,9 +177,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
genrelist = metalist[0].split('/') # Hurt/Comfort already changed above.
|
||||
goodgenres=True
|
||||
for g in genrelist:
|
||||
#logging.debug("g:(%s)"%g)
|
||||
#logger.debug("g:(%s)"%g)
|
||||
if g.strip() not in ffnetgenres:
|
||||
logging.info("g not in ffnetgenres")
|
||||
logger.info("g not in ffnetgenres")
|
||||
goodgenres=False
|
||||
if goodgenres:
|
||||
self.story.extendList('genre',genrelist)
|
||||
|
|
@ -240,7 +241,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
return
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
time.sleep(0.5) ## ffnet(and, I assume, fpcom) tends to fail
|
||||
## more if hit too fast. This is in
|
||||
## additional to what ever the
|
||||
|
|
@ -265,7 +266,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
div = soup.find('div', {'id' : 'storytextp'})
|
||||
|
||||
if None == div:
|
||||
logging.debug('div id=storytextp not found. data:%s'%data)
|
||||
logger.debug('div id=storytextp not found. data:%s'%data)
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -48,7 +49,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1')
|
||||
|
|
@ -94,12 +95,12 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Login...'
|
||||
|
||||
loginUrl = 'https://ssl.fanfiktion.de/'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['nickname']))
|
||||
d = self._postUrl(loginUrl,params)
|
||||
|
||||
if "Login erfolgreich" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['nickname']))
|
||||
raise exceptions.FailedToLogin(url,params['nickname'])
|
||||
return False
|
||||
|
|
@ -110,7 +111,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -183,7 +184,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
time.sleep(0.5) ## ffde has "floodlock" protection
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
import time
|
||||
import datetime
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
from .. import translit
|
||||
|
|
@ -33,6 +34,8 @@ def getClass():
|
|||
return FicBookNetAdapter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FicBookNetAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
|
|
@ -49,7 +52,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/readfic/'+self.story.getMetadata('storyId'))
|
||||
|
|
@ -75,7 +78,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url=self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
|
|
@ -95,14 +98,14 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
a = soup.find('h1')
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
logging.debug("Title: (%s)"%self.story.getMetadata('title'))
|
||||
logger.debug("Title: (%s)"%self.story.getMetadata('title'))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = table.find('a')
|
||||
self.story.setMetadata('authorId',a.text) # Author's name is unique
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.text)
|
||||
logging.debug("Author: (%s)"%self.story.getMetadata('author'))
|
||||
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('div', {'class' : 'part_list'})
|
||||
|
|
@ -123,7 +126,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
pubdate=translit.translit(stripHTML(soup.find('div', {'class' : 'part_added'}).find('span')))
|
||||
update=pubdate
|
||||
|
||||
logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
|
||||
if not ',' in pubdate:
|
||||
pubdate=datetime.date.today().strftime(self.dateformat)
|
||||
|
|
@ -207,7 +210,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -44,7 +45,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
if m:
|
||||
self.story.setMetadata('authorId',m.group('auth'))
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
# normalized story URL.
|
||||
self._setURL(url)
|
||||
else:
|
||||
|
|
@ -68,7 +69,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
if self.is_adult or self.getConfig("is_adult"):
|
||||
params={'iamold':'Yes',
|
||||
'action':'ageanswer'}
|
||||
logging.info("Attempting to get cookie for %s" % url)
|
||||
logger.info("Attempting to get cookie for %s" % url)
|
||||
## posting on list doesn't work, but doesn't hurt, either.
|
||||
data = self._postUrl(url,params)
|
||||
else:
|
||||
|
|
@ -79,7 +80,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## could be either chapter list page or one-shot text page.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._postFetchWithIAmOld(url)
|
||||
|
|
@ -107,7 +108,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
|
||||
self._setURL(storya['href'])
|
||||
url=self.url
|
||||
logging.debug("Normalizing to URL: "+url)
|
||||
logger.debug("Normalizing to URL: "+url)
|
||||
## title's right there...
|
||||
self.story.setMetadata('title',storya.string)
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -193,7 +194,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# find <!-- headerend --> & <!-- footerstart --> and
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
|
|
@ -61,12 +62,12 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
params['password'] = self.getConfig("password")
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/account/login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['username']))
|
||||
d = self._postUrl(loginUrl,params)
|
||||
|
||||
if "Login attempt failed..." in d:
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
return False
|
||||
|
|
@ -79,7 +80,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
# metadata and chapter list
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
|
|
@ -96,7 +97,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
# normalize story URL on chapter list.
|
||||
self.story.setMetadata('storyId',storya['href'].split('/',)[2])
|
||||
url = "http://"+self.getSiteDomain()+storya['href']
|
||||
logging.debug("Normalizing to URL: "+url)
|
||||
logger.debug("Normalizing to URL: "+url)
|
||||
self._setURL(url)
|
||||
try:
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
|
|
@ -201,7 +202,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import cookielib as cl
|
||||
|
|
@ -175,7 +176,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'id' : 'chapter_container'})
|
||||
if soup == None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -49,7 +50,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2].split(':')[0])
|
||||
if 'storyInfo' in self.story.getMetadata('storyId'):
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/s/storyInfo.php?id='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -95,13 +96,13 @@ class FineStoriesComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Login'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/login.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['theusername']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "My Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['theusername']))
|
||||
raise exceptions.FailedToLogin(url,params['theusername'])
|
||||
return False
|
||||
|
|
@ -114,7 +115,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -232,7 +233,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
self.section=self.parsedUrl.path.split('/',)[1]
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -104,13 +105,13 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
loginUrl = 'http://grangerenchanted.com/enchant/user.php?action=login'
|
||||
else:
|
||||
loginUrl = 'http://malfoymanor.grangerenchanted.com/themanor/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -132,7 +133,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -157,7 +158,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -284,7 +285,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -41,7 +42,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only psid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -72,7 +73,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -181,7 +182,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
## most adapters use BeautifulStoneSoup here, but non-Stone
|
||||
## allows nested div tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -215,7 +216,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /eff part. Replace all to remove it usually.
|
||||
|
|
@ -79,7 +80,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -208,7 +209,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# There's no good wrapper around the chapter text. :-/
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -202,7 +203,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -158,7 +159,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -267,7 +268,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -99,7 +100,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -132,7 +133,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -294,7 +295,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
soup = bs.BeautifulStoneSoup(data,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/a/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -84,7 +85,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -104,7 +105,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -234,7 +235,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -86,7 +87,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -221,7 +222,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -42,7 +43,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfic/view_st.php/'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
|
|
@ -66,7 +67,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -188,7 +189,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data=self._fetchUrl(url)
|
||||
soup = bs.BeautifulStoneSoup(data,
|
||||
|
|
@ -214,7 +215,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
return self.utf8FromSoup(url,anchor)
|
||||
|
||||
else:
|
||||
logging.debug('Using kludgey text find for older mediaminer story.')
|
||||
logger.debug('Using kludgey text find for older mediaminer story.')
|
||||
## Some older mediaminer stories are unparsable with BeautifulSoup.
|
||||
## Really nasty formatting. Sooo... Cheat! Parse it ourselves a bit first.
|
||||
## Story stuff falls between:
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -145,7 +146,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -277,7 +278,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -95,7 +96,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -128,7 +129,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -269,7 +270,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
soup = bs.BeautifulStoneSoup(data,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -96,13 +97,13 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login&sid='+self.story.getMetadata('storyId')
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -125,7 +126,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -164,7 +165,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -315,7 +316,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only storyid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -80,7 +81,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -177,7 +178,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only storyid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -80,7 +81,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -184,7 +185,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL("http://"+self.getSiteDomain()\
|
||||
|
|
@ -78,7 +79,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -187,7 +188,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -99,7 +100,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -132,7 +133,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -273,7 +274,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -74,7 +75,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
|
|||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -197,7 +198,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -94,13 +95,13 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Logout" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -112,7 +113,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -245,7 +246,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = data.replace('<div align="left"', '<div align="left">')
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -97,13 +98,13 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -125,7 +126,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -253,7 +254,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2, urllib, cookielib
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/')
|
||||
|
|
@ -90,12 +91,12 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
params['login'] = 'Login'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/users/processlogin.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['txtusername']))
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if 'Please login to continue.' in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['txtusername']))
|
||||
raise exceptions.FailedToLogin(url,params['txtusername'])
|
||||
return False
|
||||
|
|
@ -108,7 +109,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -208,7 +209,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,8 +48,8 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logging.info(self.parsedUrl.netloc)
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.info(self.parsedUrl.netloc)
|
||||
# normalized story URL.
|
||||
if "explicit" in self.parsedUrl.netloc:
|
||||
self._setURL('http://explicit.' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -91,7 +92,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -112,7 +113,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -234,7 +235,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import cookielib as cl
|
||||
|
|
@ -54,7 +55,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story/'+self.story.getMetadata('storyId'))
|
||||
|
|
@ -81,7 +82,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# portkey screws around with using a different URL to set the
|
||||
# cookie and it's a pain. So... cheat!
|
||||
|
|
@ -247,7 +248,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
data = self._fetchUrl(url)
|
||||
soup = bs.BeautifulStoneSoup(data,
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -40,7 +41,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -63,7 +64,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -191,7 +192,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/missingpieces/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -89,7 +90,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -109,7 +110,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -237,7 +238,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -202,7 +203,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/atp/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -86,7 +87,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -106,7 +107,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -248,7 +249,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -80,7 +81,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -216,7 +217,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -94,7 +95,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -114,7 +115,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -231,7 +232,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
self.section=self.parsedUrl.path.split('/',)[1]
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -95,7 +96,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -115,7 +116,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -242,7 +243,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -85,7 +86,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -105,7 +106,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -235,7 +236,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/siye/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -82,7 +83,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
# sites skip that for one-chapter stories.
|
||||
# Except it doesn't this time. :-/
|
||||
url = self.url #+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -224,7 +225,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
# soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
# BeautifulSoup objects to <p> inside <span>, which
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -58,7 +59,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/peja/cgi-bin/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -90,7 +91,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -223,7 +224,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -213,7 +214,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/chapterlistview.asp?SID='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -133,7 +134,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class SVUFictionComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -145,7 +146,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -258,7 +259,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -43,7 +44,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -81,13 +82,13 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -102,7 +103,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -116,7 +117,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
# need to log in for this one.
|
||||
addurl = "&ageconsent=ok&warning=4"
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("Changing URL: "+url)
|
||||
logger.debug("Changing URL: "+url)
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
|
|
@ -229,7 +230,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
import datetime
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .. import BeautifulSoup as bs
|
||||
from .. import exceptions
|
||||
|
|
@ -48,7 +49,7 @@ class TestSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.story.getMetadata('storyId') == '665' and not (self.is_adult or self.getConfig("is_adult")):
|
||||
logging.warn("self.is_adult:%s"%self.is_adult)
|
||||
logger.warn("self.is_adult:%s"%self.is_adult)
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if self.story.getMetadata('storyId') == '666':
|
||||
|
|
@ -184,7 +185,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
|
|||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
if self.story.getMetadata('storyId') == '667':
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url)
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -198,7 +199,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -80,7 +81,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -182,7 +183,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr','img')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -71,7 +72,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -117,13 +118,13 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/CriminalMinds/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -145,7 +146,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -292,7 +293,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
if self.parsedUrl.path.split('/',)[1] == 'wiktt':
|
||||
self.story.addToList("category","Harry Potter")
|
||||
|
|
@ -99,13 +100,13 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + self.section + 'user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -127,7 +128,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -152,7 +153,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -258,7 +259,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId') +'&i=1')
|
||||
|
|
@ -91,13 +92,13 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "My Account Page" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -110,7 +111,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -223,7 +224,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the part. Replace all to remove it usually.
|
||||
|
|
@ -101,13 +102,13 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -129,7 +130,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -272,7 +273,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -43,7 +44,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/library/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -81,13 +82,13 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/library/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -102,7 +103,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
|||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -235,7 +236,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
|
|
@ -40,7 +41,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
# normalized story URL.
|
||||
self._setURL("http://"+self.getSiteDomain()\
|
||||
+"/Story-"+self.story.getMetadata('storyId'))
|
||||
|
|
@ -81,7 +82,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
return
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/login.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['urealname']))
|
||||
|
||||
## need to pull empty login page first to get ctkn and
|
||||
|
|
@ -98,7 +99,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Stories Published" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -110,7 +111,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
# metadata and chapter list
|
||||
|
||||
url=self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# tth won't send you future updates if you aren't 'caught up'
|
||||
# on the story. Login isn't required for F21, but logging in will
|
||||
|
|
@ -137,7 +138,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
form = soup.find('form', {'id':'sitemaxratingform'})
|
||||
params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
|
||||
'sitemaxrating':'5'}
|
||||
logging.info("Attempting to get rating cookie for %s" % url)
|
||||
logger.info("Attempting to get rating cookie for %s" % url)
|
||||
data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
|
||||
# refetch story page.
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -158,7 +159,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
try:
|
||||
# going to pull part of the meta data from author list page.
|
||||
infourl = 'http://'+self.host+ainfo['href']
|
||||
logging.debug("**StoryInfo** URL: "+infourl)
|
||||
logger.debug("**StoryInfo** URL: "+infourl)
|
||||
infodata = self._fetchUrl(infourl)
|
||||
infosoup = bs.BeautifulSoup(infodata)
|
||||
|
||||
|
|
@ -175,14 +176,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
try:
|
||||
# going to pull part of the meta data from *primary* author list page.
|
||||
logging.debug("**AUTHOR** URL: "+authorurl)
|
||||
logger.debug("**AUTHOR** URL: "+authorurl)
|
||||
authordata = self._fetchUrl(authorurl)
|
||||
descurl=authorurl
|
||||
authorsoup = bs.BeautifulSoup(authordata)
|
||||
# author can have several pages, scan until we find it.
|
||||
while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ):
|
||||
nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href']
|
||||
logging.debug("**AUTHOR** nextpage URL: "+nextpage)
|
||||
logger.debug("**AUTHOR** nextpage URL: "+nextpage)
|
||||
authordata = self._fetchUrl(nextpage)
|
||||
descurl=nextpage
|
||||
authorsoup = bs.BeautifulSoup(authordata)
|
||||
|
|
@ -259,7 +260,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'storyinnerbody'})
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL. http://www.twilightarchives.com/read/9353
|
||||
self._setURL('http://' + self.getSiteDomain() + '/read/'+self.story.getMetadata('storyId'))
|
||||
|
|
@ -77,7 +78,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -172,7 +173,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -42,7 +43,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -83,13 +84,13 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -99,7 +100,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -225,7 +226,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -43,7 +44,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -84,13 +85,13 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -109,7 +110,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -252,7 +253,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -45,7 +46,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -84,7 +85,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -215,7 +216,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -56,7 +57,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
# - get chapter list, if not one-shot.
|
||||
|
||||
url = self.url+'&chapter=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
|
|
@ -69,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# pull title(title) and author from the HTML title.
|
||||
title = soup.find('title').string
|
||||
logging.debug('Title: %s' % title)
|
||||
logger.debug('Title: %s' % title)
|
||||
title = title.split('::')[1].strip()
|
||||
self.story.setMetadata('title',title.split(' by ')[0].strip())
|
||||
self.story.setMetadata('author',title.split(' by ')[1].strip())
|
||||
|
|
@ -109,7 +110,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
# <i>Published:</i> 2010.08.15 - <i>Updated:</i> 2010.08.16 - <i>Chapters:</i> 4 - <i>Completed:</i> Yes - <i>Word Count:</i> 4890 </font>
|
||||
# </td></tr></table>
|
||||
|
||||
logging.debug("Author URL: "+self.story.getMetadata('authorUrl'))
|
||||
logger.debug("Author URL: "+self.story.getMetadata('authorUrl'))
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(self.story.getMetadata('authorUrl')),
|
||||
selfClosingTags=('br')) # normalize <br> tags to <br />
|
||||
# find this story in the list, parse it's metadata based on
|
||||
|
|
@ -212,7 +213,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -145,7 +146,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -286,7 +287,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -77,7 +78,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -203,7 +204,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -86,7 +87,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -209,7 +210,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -54,7 +55,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,7 +93,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -126,7 +127,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
|
|||
# explicitly put ageconsent because google appengine regexp doesn't include it for some reason.
|
||||
addurl = addurl.replace("&","&")+'&ageconsent=ok'
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -147,7 +148,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
|
|||
# while len(loopdata) > 0:
|
||||
# if len(loopdata) < 5000:
|
||||
# chklen = len(loopdata)
|
||||
# logging.info("loopdata: %s" % loopdata[:chklen])
|
||||
# logger.info("loopdata: %s" % loopdata[:chklen])
|
||||
# loopdata = loopdata[chklen:]
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
|
|
@ -270,7 +271,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@ from functools import partial
|
|||
from .. import BeautifulSoup as bs
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from google.appengine.api import apiproxy_stub_map
|
||||
def urlfetch_timeout_hook(service, call, request, response):
|
||||
|
|
@ -38,10 +40,10 @@ try:
|
|||
|
||||
apiproxy_stub_map.apiproxy.GetPreCallHooks().Append(
|
||||
'urlfetch_timeout_hook', urlfetch_timeout_hook, 'urlfetch')
|
||||
logging.info("Hook to make default deadline 10.0 installed.")
|
||||
logger.info("Hook to make default deadline 10.0 installed.")
|
||||
except:
|
||||
pass
|
||||
#logging.info("Hook to make default deadline 10.0 NOT installed--not using appengine")
|
||||
#logger.info("Hook to make default deadline 10.0 NOT installed--not using appengine")
|
||||
|
||||
from ..story import Story
|
||||
from ..gziphttp import GZipProcessor
|
||||
|
|
@ -125,7 +127,7 @@ class BaseSiteAdapter(Configurable):
|
|||
#print code
|
||||
if code == "auto":
|
||||
if not chardet:
|
||||
logging.info("chardet not available, skipping 'auto' encoding")
|
||||
logger.info("chardet not available, skipping 'auto' encoding")
|
||||
continue
|
||||
detected = chardet.detect(data)
|
||||
#print detected
|
||||
|
|
@ -133,12 +135,11 @@ class BaseSiteAdapter(Configurable):
|
|||
code=detected['encoding']
|
||||
else:
|
||||
continue
|
||||
logging.debug("try code:"+code)
|
||||
return data.decode(code)
|
||||
except:
|
||||
logging.debug("code failed:"+code)
|
||||
logger.debug("code failed:"+code)
|
||||
pass
|
||||
logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
|
||||
logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
|
||||
return "".join([x for x in data if ord(x) < 128])
|
||||
|
||||
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
|
||||
|
|
@ -175,10 +176,10 @@ class BaseSiteAdapter(Configurable):
|
|||
return self._decode(self._fetchUrlRaw(url,parameters))
|
||||
except Exception, e:
|
||||
excpt=e
|
||||
logging.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
|
||||
logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
|
||||
|
||||
logging.error("Giving up on %s" %url)
|
||||
logging.exception(excpt)
|
||||
logger.error("Giving up on %s" %url)
|
||||
logger.exception(excpt)
|
||||
raise(excpt)
|
||||
|
||||
# Limit chapters to download. Input starts at 1, list starts at 0
|
||||
|
|
@ -356,7 +357,6 @@ class BaseSiteAdapter(Configurable):
|
|||
|
||||
def cachedfetch(realfetch,cache,url):
|
||||
if url in cache:
|
||||
print("cache hit")
|
||||
return cache[url]
|
||||
else:
|
||||
return realfetch(url)
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@ import logging
|
|||
from ..configurable import Configurable
|
||||
from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BaseStoryWriter(Configurable):
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -137,11 +139,11 @@ class BaseStoryWriter(Configurable):
|
|||
if self.hasConfig(entry+"_label"):
|
||||
label=self.getConfig(entry+"_label")
|
||||
elif entry in self.titleLabels:
|
||||
logging.debug("Using fallback label for %s_label"%entry)
|
||||
logger.debug("Using fallback label for %s_label"%entry)
|
||||
label=self.titleLabels[entry]
|
||||
else:
|
||||
label="%s"%entry.title()
|
||||
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
|
||||
logger.debug("No known label for %s, fallback to '%s'"%(entry,label))
|
||||
|
||||
# If the label for the title entry is empty, use the
|
||||
# 'no title' option if there is one.
|
||||
|
|
@ -202,7 +204,7 @@ class BaseStoryWriter(Configurable):
|
|||
|
||||
if not outstream:
|
||||
close=True
|
||||
logging.info("Save directly to file: %s" % outfilename)
|
||||
logger.info("Save directly to file: %s" % outfilename)
|
||||
if self.getConfig('make_directories'):
|
||||
path=""
|
||||
outputdirs = os.path.dirname(outfilename).split('/')
|
||||
|
|
@ -229,7 +231,7 @@ class BaseStoryWriter(Configurable):
|
|||
outstream = open(outfilename,"wb")
|
||||
else:
|
||||
close=False
|
||||
logging.debug("Save to stream")
|
||||
logger.debug("Save to stream")
|
||||
|
||||
if not metaonly:
|
||||
self.story = self.adapter.getStory() # get full story now,
|
||||
|
|
@ -256,7 +258,7 @@ class BaseStoryWriter(Configurable):
|
|||
outstream.close()
|
||||
|
||||
def writeFile(self, filename, data):
|
||||
logging.debug("writeFile:%s"%filename)
|
||||
logger.debug("writeFile:%s"%filename)
|
||||
|
||||
if self.getConfig('zip_output'):
|
||||
outputdirs = os.path.dirname(self.getBaseFileName())
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ from xml.dom.minidom import parse, parseString, getDOMImplementation
|
|||
from base_writer import *
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class EpubWriter(BaseStoryWriter):
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -205,8 +207,8 @@ div { margin: 0pt; padding: 0pt; }
|
|||
# if there's a self.story.logfile, there's an existing log
|
||||
# to add to.
|
||||
if self.story.logfile:
|
||||
logging.debug("existing logfile found, appending")
|
||||
logging.debug("existing data:%s"%self._getLastLogData(self.story.logfile))
|
||||
logger.debug("existing logfile found, appending")
|
||||
logger.debug("existing data:%s"%self._getLastLogData(self.story.logfile))
|
||||
replace_string = "</body>" # "</h3>"
|
||||
self._write(out,self.story.logfile.replace(replace_string,self._makeLogEntry(self._getLastLogData(self.story.logfile))+replace_string))
|
||||
else:
|
||||
|
|
@ -260,11 +262,11 @@ div { margin: 0pt; padding: 0pt; }
|
|||
if self.hasConfig(entry+"_label"):
|
||||
label=self.getConfig(entry+"_label")
|
||||
elif entry in self.titleLabels:
|
||||
logging.debug("Using fallback label for %s_label"%entry)
|
||||
logger.debug("Using fallback label for %s_label"%entry)
|
||||
label=self.titleLabels[entry]
|
||||
else:
|
||||
label="%s"%entry.title()
|
||||
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
|
||||
logger.debug("No known label for %s, fallback to '%s'"%(entry,label))
|
||||
|
||||
retval = retval + ENTRY.substitute({'id':entry,
|
||||
'label':label,
|
||||
|
|
@ -415,7 +417,7 @@ div { margin: 0pt; padding: 0pt; }
|
|||
|
||||
coverimgid = "image0000"
|
||||
if not self.story.cover and self.story.oldcover:
|
||||
print("writer_epub: no new cover, has old cover, write image.")
|
||||
logger.debug("writer_epub: no new cover, has old cover, write image.")
|
||||
(oldcoverhtmlhref,
|
||||
oldcoverhtmltype,
|
||||
oldcoverhtmldata,
|
||||
|
|
@ -646,7 +648,7 @@ div { margin: 0pt; padding: 0pt; }
|
|||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
logger.debug('Writing chapter text for: %s' % title)
|
||||
vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
|
||||
fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals)
|
||||
# ffnet(& maybe others) gives the whole chapter text
|
||||
|
|
|
|||
Loading…
Reference in a new issue