diff --git a/app.yaml b/app.yaml index a800ef44..c38a2609 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanfictiondownloader application: fanfictiondownloader -version: 4-4-27 +version: 4-4-29 runtime: python27 api_version: 1 threadsafe: true diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 98649e62..ea81d698 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 6, 11) + version = (1, 6, 14) minimum_calibre_version = (0, 8, 57) #: This field defines the GUI plugin class that contains all the code diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index 4980d357..1d334712 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -496,7 +496,10 @@ make_firstimage_cover:true book['author_sort'] = book['author'] = story.getList("author", removeallentities=True) book['publisher'] = story.getMetadata("site") book['tags'] = story.getSubjectTags(removeallentities=True) - book['comments'] = sanitize_comments_html(story.getMetadata("description")) + if story.getMetadata("description"): + book['comments'] = sanitize_comments_html(story.getMetadata("description")) + else: + book['comments']='' book['series'] = story.getMetadata("series", removeallentities=True) # adapter.opener is the element with a threadlock. But del @@ -538,7 +541,7 @@ make_firstimage_cover:true print("from URL(%s)"%url) # try to find by identifier url first. - searchstr = 'identifiers:"=url:%s"'%url.replace(":","|") + searchstr = 'identifiers:"=url:=%s"'%url.replace(":","|") identicalbooks = db.search_getting_ids(searchstr, None) if len(identicalbooks) < 1: # find dups @@ -634,9 +637,12 @@ make_firstimage_cover:true if book['good']: # there shouldn't be any !'good' books at this point. # if still 'good', make a temp file to write the output to. - tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'], - suffix='.'+options['fileform'], - dir=options['tdir']) + # For HTML format users, make the filename inside the zip something reasonable. + # For crazy long titles/authors, limit it to 200chars. + # For weird/OS-unsafe characters, use file safe only. + tmp = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100], + suffix='.'+options['fileform'], + dir=options['tdir']) print("title:"+book['title']) print("outfile:"+tmp.name) book['outfile'] = tmp.name diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index 2fad2e10..89d390f4 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -14,7 +14,6 @@ from StringIO import StringIO from calibre.utils.ipc.server import Server from calibre.utils.ipc.job import ParallelJob -from calibre.utils.logging import Log from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload, OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY) @@ -109,9 +108,6 @@ def do_download_for_worker(book,options): when run as a worker job ''' try: - # import logging - # logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") - book['comment'] = 'Download started...' configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform']) @@ -123,7 +119,7 @@ def do_download_for_worker(book,options): # images only for epub, even if the user mistakenly turned it # on else where. - if options['fileform'] != "epub": + if options['fileform'] not in ("epub","html"): configuration.set("overrides","include_images","false") adapter = adapters.getAdapter(configuration,book['url']) diff --git a/defaults.ini b/defaults.ini index 0ae23ba9..88c211e8 100644 --- a/defaults.ini +++ b/defaults.ini @@ -161,8 +161,10 @@ extratags: FanFiction ## for regexp details. ## Make sure to keep at least one space at the start of each line and ## to escape % to %%, if used. -## Two or three part lines. Two part effect everything. +## Two, three or five part lines. Two part effect everything. ## Three part effect only those key(s) lists. +## *Five* part lines. Effect only when trailing conditional key=>regexp matches +## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp] #replace_metadata: # genre,category=>Sci-Fi=>SF # Puella Magi Madoka Magica.* => Madoka @@ -170,7 +172,9 @@ extratags: FanFiction # Crossover: (.*)=>\1 # title=>(.*)Great(.*)=>\1Moderate\2 # .*-Centered=> - +# characters=>Sam W\.=>Sam Witwicky&&category=>Transformers +# characters=>Sam W\.=>Sam Winchester&&category=>Supernatural + ## Some readers don't show horizontal rule (
inside , which
diff --git a/fanficdownloader/adapters/adapter_squidgeorgpeja.py b/fanficdownloader/adapters/adapter_squidgeorgpeja.py
index 07d86989..6416d5f9 100644
--- a/fanficdownloader/adapters/adapter_squidgeorgpeja.py
+++ b/fanficdownloader/adapters/adapter_squidgeorgpeja.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -58,7 +59,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/peja/cgi-bin/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -90,7 +91,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -223,7 +224,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_stargateatlantisorg.py b/fanficdownloader/adapters/adapter_stargateatlantisorg.py
index 5cc156c5..f8c52bad 100644
--- a/fanficdownloader/adapters/adapter_stargateatlantisorg.py
+++ b/fanficdownloader/adapters/adapter_stargateatlantisorg.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -47,7 +48,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -76,7 +77,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -213,7 +214,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_storiesofardacom.py b/fanficdownloader/adapters/adapter_storiesofardacom.py
index 7bb999b4..ff44d84e 100644
--- a/fanficdownloader/adapters/adapter_storiesofardacom.py
+++ b/fanficdownloader/adapters/adapter_storiesofardacom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -47,7 +48,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/chapterlistview.asp?SID='+self.story.getMetadata('storyId'))
@@ -76,7 +77,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -133,7 +134,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_svufictioncom.py b/fanficdownloader/adapters/adapter_svufictioncom.py
index 387188ab..2efd863a 100644
--- a/fanficdownloader/adapters/adapter_svufictioncom.py
+++ b/fanficdownloader/adapters/adapter_svufictioncom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -47,7 +48,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -92,13 +93,13 @@ class SVUFictionComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
- logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -120,7 +121,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -145,7 +146,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# correct stupid & error in url.
addurl = addurl.replace("&","&")
url = self.url+'&index=1'+addurl
- logging.debug("URL 2nd try: "+url)
+ logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@@ -258,7 +259,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py
index 6203aa95..b3f7a078 100644
--- a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py
+++ b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@@ -43,7 +44,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -81,13 +82,13 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
- logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -102,7 +103,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -116,7 +117,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
# need to log in for this one.
addurl = "&ageconsent=ok&warning=4"
url = self.url+'&index=1'+addurl
- logging.debug("Changing URL: "+url)
+ logger.debug("Changing URL: "+url)
self.performLogin(url)
data = self._fetchUrl(url)
@@ -229,7 +230,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py
index 222ffb5f..3fdc0bec 100644
--- a/fanficdownloader/adapters/adapter_test1.py
+++ b/fanficdownloader/adapters/adapter_test1.py
@@ -18,6 +18,7 @@
import datetime
import time
import logging
+logger = logging.getLogger(__name__)
from .. import BeautifulSoup as bs
from .. import exceptions
@@ -48,7 +49,7 @@ class TestSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
if self.story.getMetadata('storyId') == '665' and not (self.is_adult or self.getConfig("is_adult")):
- logging.warn("self.is_adult:%s"%self.is_adult)
+ logger.warn("self.is_adult:%s"%self.is_adult)
raise exceptions.AdultCheckRequired(self.url)
if self.story.getMetadata('storyId') == '666':
@@ -128,7 +129,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
self.story.addToList('genre','Fantasy')
self.story.addToList('genre','Comedy')
- self.story.addToList('genre','SF')
+ self.story.addToList('genre','Sci-Fi')
self.story.addToList('genre','Noir')
self.story.addToList('characters','Bob Smith')
@@ -184,7 +185,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
if self.story.getMetadata('storyId') == '667':
raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url)
diff --git a/fanficdownloader/adapters/adapter_thealphagatecom.py b/fanficdownloader/adapters/adapter_thealphagatecom.py
index 98ec91d4..ff6ee8a8 100644
--- a/fanficdownloader/adapters/adapter_thealphagatecom.py
+++ b/fanficdownloader/adapters/adapter_thealphagatecom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -47,7 +48,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -76,7 +77,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -198,7 +199,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_thehexfilesnet.py b/fanficdownloader/adapters/adapter_thehexfilesnet.py
index bc515cb7..c191df74 100644
--- a/fanficdownloader/adapters/adapter_thehexfilesnet.py
+++ b/fanficdownloader/adapters/adapter_thehexfilesnet.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -47,7 +48,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -80,7 +81,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -182,7 +183,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr','img')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_thehookupzonenet.py b/fanficdownloader/adapters/adapter_thehookupzonenet.py
index 93f4b852..a43ecb1e 100644
--- a/fanficdownloader/adapters/adapter_thehookupzonenet.py
+++ b/fanficdownloader/adapters/adapter_thehookupzonenet.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -71,7 +72,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
@@ -117,13 +118,13 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/CriminalMinds/user.php?action=login'
- logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -145,7 +146,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -292,7 +293,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_themasquenet.py b/fanficdownloader/adapters/adapter_themasquenet.py
index cc99f879..f9690103 100644
--- a/fanficdownloader/adapters/adapter_themasquenet.py
+++ b/fanficdownloader/adapters/adapter_themasquenet.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -47,20 +48,20 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
if self.parsedUrl.path.split('/',)[1] == 'wiktt':
self.story.addToList("category","Harry Potter")
- self.story.setMetadata('section','/wiktt/efiction/')
+ self.section='/wiktt/efiction/'
self.dateformat = "%m/%d/%Y"
else:
self.story.addToList("category","Originals")
- self.story.setMetadata('section','/efiction/')
+ self.section='/efiction/'
self.dateformat = "%b %d, %Y"
# normalized story URL.
- self._setURL('http://' + self.getSiteDomain() + self.story.getMetadata('section') + 'viewstory.php?sid='+self.story.getMetadata('storyId'))
+ self._setURL('http://' + self.getSiteDomain() + self.section + 'viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','msq')
@@ -98,14 +99,14 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
- loginUrl = 'http://' + self.getSiteDomain() + self.story.getMetadata('section') + 'user.php?action=login'
- logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ loginUrl = 'http://' + self.getSiteDomain() + self.section + 'user.php?action=login'
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -127,7 +128,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -152,7 +153,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# correct stupid & error in url.
addurl = addurl.replace("&","&")
url = self.url+'&index=1'+addurl
- logging.debug("URL 2nd try: "+url)
+ logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@@ -186,7 +187,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
# just in case there's tags, like in chapter titles.
- self.chapterUrls.append((stripHTML(chapter),'http://'+self.host + self.story.getMetadata('section') + chapter['href']+addurl))
+ self.chapterUrls.append((stripHTML(chapter),'http://'+self.host + self.section + chapter['href']+addurl))
self.story.setMetadata('numChapters',len(self.chapterUrls))
@@ -258,7 +259,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py b/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py
index 11c53cdc..7a7a2c17 100644
--- a/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py
+++ b/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -47,7 +48,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId') +'&i=1')
@@ -91,13 +92,13 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
- logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "My Account Page" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -110,7 +111,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -223,7 +224,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_thequidditchpitchorg.py b/fanficdownloader/adapters/adapter_thequidditchpitchorg.py
index 2c3533d9..fc5d48e9 100644
--- a/fanficdownloader/adapters/adapter_thequidditchpitchorg.py
+++ b/fanficdownloader/adapters/adapter_thequidditchpitchorg.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -51,7 +52,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
# XXX Most sites don't have the part. Replace all to remove it usually.
@@ -101,13 +102,13 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
- logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -129,7 +130,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -272,7 +273,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
diff --git a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py
index c5542050..8ed953d7 100644
--- a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py
+++ b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@@ -43,7 +44,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/library/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -81,13 +82,13 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/library/user.php?action=login'
- logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -102,7 +103,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -235,7 +236,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect
diff --git a/fanficdownloader/adapters/adapter_tthfanficorg.py b/fanficdownloader/adapters/adapter_tthfanficorg.py
index 8c3047e6..8f40568a 100644
--- a/fanficdownloader/adapters/adapter_tthfanficorg.py
+++ b/fanficdownloader/adapters/adapter_tthfanficorg.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
import time
@@ -40,7 +41,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('storyId',m.group('id'))
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL("http://"+self.getSiteDomain()\
+"/Story-"+self.story.getMetadata('storyId'))
@@ -81,7 +82,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
return
loginUrl = 'http://' + self.getSiteDomain() + '/login.php'
- logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['urealname']))
## need to pull empty login page first to get ctkn and
@@ -98,7 +99,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
d = self._fetchUrl(loginUrl, params)
if "Stories Published" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -110,7 +111,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
# metadata and chapter list
url=self.url
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
# tth won't send you future updates if you aren't 'caught up'
# on the story. Login isn't required for F21, but logging in will
@@ -137,7 +138,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
form = soup.find('form', {'id':'sitemaxratingform'})
params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
'sitemaxrating':'5'}
- logging.info("Attempting to get rating cookie for %s" % url)
+ logger.info("Attempting to get rating cookie for %s" % url)
data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
# refetch story page.
data = self._fetchUrl(url)
@@ -158,7 +159,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
try:
# going to pull part of the meta data from author list page.
infourl = 'http://'+self.host+ainfo['href']
- logging.debug("**StoryInfo** URL: "+infourl)
+ logger.debug("**StoryInfo** URL: "+infourl)
infodata = self._fetchUrl(infourl)
infosoup = bs.BeautifulSoup(infodata)
@@ -175,14 +176,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
try:
# going to pull part of the meta data from *primary* author list page.
- logging.debug("**AUTHOR** URL: "+authorurl)
+ logger.debug("**AUTHOR** URL: "+authorurl)
authordata = self._fetchUrl(authorurl)
descurl=authorurl
authorsoup = bs.BeautifulSoup(authordata)
# author can have several pages, scan until we find it.
while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ):
nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href']
- logging.debug("**AUTHOR** nextpage URL: "+nextpage)
+ logger.debug("**AUTHOR** nextpage URL: "+nextpage)
authordata = self._fetchUrl(nextpage)
descurl=nextpage
authorsoup = bs.BeautifulSoup(authordata)
@@ -259,7 +260,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url))
div = soup.find('div', {'id' : 'storyinnerbody'})
diff --git a/fanficdownloader/adapters/adapter_twilightarchivescom.py b/fanficdownloader/adapters/adapter_twilightarchivescom.py
index f9003260..3f2eef7a 100644
--- a/fanficdownloader/adapters/adapter_twilightarchivescom.py
+++ b/fanficdownloader/adapters/adapter_twilightarchivescom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -47,7 +48,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL. http://www.twilightarchives.com/read/9353
self._setURL('http://' + self.getSiteDomain() + '/read/'+self.story.getMetadata('storyId'))
@@ -77,7 +78,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -172,7 +173,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py
index 4e1e2dd1..8f46aa33 100644
--- a/fanficdownloader/adapters/adapter_twilightednet.py
+++ b/fanficdownloader/adapters/adapter_twilightednet.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@@ -42,7 +43,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -83,13 +84,13 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
- logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -99,7 +100,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.url+'&index=1'
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -225,7 +226,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect
diff --git a/fanficdownloader/adapters/adapter_twiwritenet.py b/fanficdownloader/adapters/adapter_twiwritenet.py
index 415b7f5d..fd456ccd 100644
--- a/fanficdownloader/adapters/adapter_twiwritenet.py
+++ b/fanficdownloader/adapters/adapter_twiwritenet.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib
import urllib2
@@ -43,7 +44,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -84,13 +85,13 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
- logging.info("Will now login to URL (%s) as (%s)" % (loginUrl,
+ logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -109,7 +110,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
addurl=""
url = self.url+'&index=1'+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -252,7 +253,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
# problems with some stories, but only in calibre. I suspect
diff --git a/fanficdownloader/adapters/adapter_walkingtheplankorg.py b/fanficdownloader/adapters/adapter_walkingtheplankorg.py
index e3885387..53357da9 100644
--- a/fanficdownloader/adapters/adapter_walkingtheplankorg.py
+++ b/fanficdownloader/adapters/adapter_walkingtheplankorg.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -45,7 +46,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -84,7 +85,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -215,7 +216,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_whoficcom.py b/fanficdownloader/adapters/adapter_whoficcom.py
index cc22267e..6f396f7b 100644
--- a/fanficdownloader/adapters/adapter_whoficcom.py
+++ b/fanficdownloader/adapters/adapter_whoficcom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -56,7 +57,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# - get chapter list, if not one-shot.
url = self.url+'&chapter=1'
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
# use BeautifulSoup HTML parser to make everything easier to find.
try:
@@ -69,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# pull title(title) and author from the HTML title.
title = soup.find('title').string
- logging.debug('Title: %s' % title)
+ logger.debug('Title: %s' % title)
title = title.split('::')[1].strip()
self.story.setMetadata('title',title.split(' by ')[0].strip())
self.story.setMetadata('author',title.split(' by ')[1].strip())
@@ -109,7 +110,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# Published: 2010.08.15 - Updated: 2010.08.16 - Chapters: 4 - Completed: Yes - Word Count: 4890
#
- logging.debug("Author URL: "+self.story.getMetadata('authorUrl'))
+ logger.debug("Author URL: "+self.story.getMetadata('authorUrl'))
soup = bs.BeautifulStoneSoup(self._fetchUrl(self.story.getMetadata('authorUrl')),
selfClosingTags=('br')) # normalize
+''')
+
self.EPUB_LOG_ENTRY = string.Template('''
${label}: ${value}
+''')
+
+ self.EPUB_LOG_UPDATE_END = string.Template('''
+
tags to
# find this story in the list, parse it's metadata based on
@@ -212,7 +213,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_wizardtalesnet.py b/fanficdownloader/adapters/adapter_wizardtalesnet.py
index 419dc548..b00df831 100644
--- a/fanficdownloader/adapters/adapter_wizardtalesnet.py
+++ b/fanficdownloader/adapters/adapter_wizardtalesnet.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -47,7 +48,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -92,13 +93,13 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
- logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
- logging.info("Failed to login to URL %s as %s" % (loginUrl,
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
@@ -120,7 +121,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -145,7 +146,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# correct stupid & error in url.
addurl = addurl.replace("&","&")
url = self.url+'&index=1'+addurl
- logging.debug("URL 2nd try: "+url)
+ logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@@ -286,7 +287,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_wolverineandroguecom.py b/fanficdownloader/adapters/adapter_wolverineandroguecom.py
index 37b09a73..24a1bd03 100644
--- a/fanficdownloader/adapters/adapter_wolverineandroguecom.py
+++ b/fanficdownloader/adapters/adapter_wolverineandroguecom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -47,7 +48,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -77,7 +78,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -203,7 +204,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/adapter_wraithbaitcom.py b/fanficdownloader/adapters/adapter_wraithbaitcom.py
index 9f077a8a..f827d7e0 100644
--- a/fanficdownloader/adapters/adapter_wraithbaitcom.py
+++ b/fanficdownloader/adapters/adapter_wraithbaitcom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -48,7 +49,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -86,7 +87,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -209,7 +210,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url))
diff --git a/fanficdownloader/adapters/adapter_yourfanfictioncom.py b/fanficdownloader/adapters/adapter_yourfanfictioncom.py
index 44864e16..f11e7a3f 100644
--- a/fanficdownloader/adapters/adapter_yourfanfictioncom.py
+++ b/fanficdownloader/adapters/adapter_yourfanfictioncom.py
@@ -17,6 +17,7 @@
import time
import logging
+logger = logging.getLogger(__name__)
import re
import urllib2
@@ -54,7 +55,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
- logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
+ logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
@@ -92,7 +93,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
- logging.debug("URL: "+url)
+ logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@@ -126,7 +127,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# explicitly put ageconsent because google appengine regexp doesn't include it for some reason.
addurl = addurl.replace("&","&")+'&ageconsent=ok'
url = self.url+'&index=1'+addurl
- logging.debug("URL 2nd try: "+url)
+ logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
@@ -147,7 +148,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# while len(loopdata) > 0:
# if len(loopdata) < 5000:
# chklen = len(loopdata)
- # logging.info("loopdata: %s" % loopdata[:chklen])
+ # logger.info("loopdata: %s" % loopdata[:chklen])
# loopdata = loopdata[chklen:]
# use BeautifulSoup HTML parser to make everything easier to find.
@@ -270,7 +271,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
- logging.debug('Getting chapter text from: %s' % url)
+ logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py
index 7ca9686f..3cfc7ef4 100644
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@@ -27,6 +27,8 @@ from functools import partial
from .. import BeautifulSoup as bs
from ..htmlcleanup import stripHTML
+logger = logging.getLogger(__name__)
+
try:
from google.appengine.api import apiproxy_stub_map
def urlfetch_timeout_hook(service, call, request, response):
@@ -38,10 +40,10 @@ try:
apiproxy_stub_map.apiproxy.GetPreCallHooks().Append(
'urlfetch_timeout_hook', urlfetch_timeout_hook, 'urlfetch')
- logging.info("Hook to make default deadline 10.0 installed.")
+ logger.info("Hook to make default deadline 10.0 installed.")
except:
pass
- #logging.info("Hook to make default deadline 10.0 NOT installed--not using appengine")
+ #logger.info("Hook to make default deadline 10.0 NOT installed--not using appengine")
from ..story import Story
from ..gziphttp import GZipProcessor
@@ -125,7 +127,7 @@ class BaseSiteAdapter(Configurable):
#print code
if code == "auto":
if not chardet:
- logging.info("chardet not available, skipping 'auto' encoding")
+ logger.info("chardet not available, skipping 'auto' encoding")
continue
detected = chardet.detect(data)
#print detected
@@ -133,12 +135,11 @@ class BaseSiteAdapter(Configurable):
code=detected['encoding']
else:
continue
- logging.debug("try code:"+code)
return data.decode(code)
except:
- logging.debug("code failed:"+code)
+ logger.debug("code failed:"+code)
pass
- logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
+ logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
return "".join([x for x in data if ord(x) < 128])
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
@@ -175,10 +176,10 @@ class BaseSiteAdapter(Configurable):
return self._decode(self._fetchUrlRaw(url,parameters))
except Exception, e:
excpt=e
- logging.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
+ logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
- logging.error("Giving up on %s" %url)
- logging.exception(excpt)
+ logger.error("Giving up on %s" %url)
+ logger.exception(excpt)
raise(excpt)
# Limit chapters to download. Input starts at 1, list starts at 0
@@ -304,7 +305,7 @@ class BaseSiteAdapter(Configurable):
if not fetch:
fetch=self._fetchUrlRaw
- acceptable_attributes = ['href','name']
+ acceptable_attributes = ['href','name','class','id']
#print("include_images:"+self.getConfig('include_images'))
if self.getConfig('include_images'):
acceptable_attributes.extend(('src','alt','longdesc'))
@@ -356,7 +357,6 @@ class BaseSiteAdapter(Configurable):
def cachedfetch(realfetch,cache,url):
if url in cache:
- print("cache hit")
return cache[url]
else:
return realfetch(url)
diff --git a/fanficdownloader/configurable.py b/fanficdownloader/configurable.py
index a909ec68..24296c4d 100644
--- a/fanficdownloader/configurable.py
+++ b/fanficdownloader/configurable.py
@@ -134,7 +134,7 @@ class Configurable(object):
return self.configuration.hasConfig(key)
def getConfig(self, key, default=""):
- return self.configuration.getConfig(key)
+ return self.configuration.getConfig(key,default)
def getConfigList(self, key):
return self.configuration.getConfigList(key)
diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py
index 733d8358..f176d509 100644
--- a/fanficdownloader/story.py
+++ b/fanficdownloader/story.py
@@ -20,6 +20,8 @@ import urlparse
import string
from math import floor
from functools import partial
+import logging
+import urlparse as up
import exceptions
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
@@ -52,7 +54,7 @@ try:
if export:
return (img.export('JPG'),'jpg','image/jpeg')
else:
- print("image used unchanged")
+ logging.debug("image used unchanged")
return (data,'jpg','image/jpeg')
except:
@@ -88,23 +90,34 @@ except:
img.save(outsio,'JPEG')
return (outsio.getvalue(),'jpg','image/jpeg')
else:
- print("image used unchanged")
+ logging.debug("image used unchanged")
return (data,'jpg','image/jpeg')
except:
-
# No calibre or PIL, simple pass through with mimetype.
- imagetypes = {
- 'jpg':'image/jpeg',
- 'jpeg':'image/jpeg',
- 'png':'image/png',
- 'gif':'image/gif',
- 'svg':'image/svg+xml',
- }
-
def convert_image(url,data,sizes,grayscale):
- ext=url[url.rfind('.')+1:].lower()
- return (data,ext,imagetypes[ext])
+ return no_convert_image(url,data)
+
+imagetypes = {
+ 'jpg':'image/jpeg',
+ 'jpeg':'image/jpeg',
+ 'png':'image/png',
+ 'gif':'image/gif',
+ 'svg':'image/svg+xml',
+ }
+
+## also used for explicit no image processing.
+def no_convert_image(url,data):
+ parsedUrl = up.urlparse(url)
+
+ ext=parsedUrl.path[parsedUrl.path.rfind('.')+1:].lower()
+
+ if ext not in imagetypes:
+ logging.debug("no_convert_image url:%s - no known extension"%url)
+ # doesn't have extension? use jpg.
+ ext='jpg'
+
+ return (data,ext,imagetypes[ext])
def normalize_format_name(fmt):
if fmt:
@@ -240,24 +253,35 @@ class Story(Configurable):
## Three part effect only those key(s) lists.
## pattern=>replacement
## metakey,metakey=>pattern=>replacement
+ ## *Five* part lines. Effect only when trailing conditional key=>regexp matches
+ ## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp]
def setReplace(self,replace):
for line in replace.splitlines():
+ if "&&" in line:
+ (line,conditional) = map( lambda x: x.strip(), line.split("&&") )
+ condparts = map( lambda x: x.strip(), conditional.split("=>") )
+ else:
+ condparts=[None,None]
if "=>" in line:
parts = map( lambda x: x.strip(), line.split("=>") )
if len(parts) > 2:
parts[0] = map( lambda x: x.strip(), parts[0].split(",") )
- self.replacements.append(parts)
+ self.replacements.append(parts+condparts)
else:
- self.replacements.append([None]+parts)
+ self.replacements.append([None]+parts+condparts)
def doReplacments(self,value,key):
- for (keys,p,v) in self.replacements:
+ for (keys,regexp,replacement,condkey,condregexp) in self.replacements:
if (keys == None or key in keys) \
and isinstance(value,basestring) \
- and re.search(p,value):
- #pv=value
- value = re.sub(p,v,value)
- #print("change:%s => %s === %s => %s "%(p,v,pv,value))
+ and re.search(regexp,value):
+ doreplace=True
+ if condkey:
+ condval = self.getMetadata(condkey)
+ doreplace = condval != None and re.search(condregexp,condval)
+
+ if doreplace:
+ value = re.sub(regexp,replacement,value)
return value
def getMetadataRaw(self,key):
@@ -280,7 +304,9 @@ class Story(Configurable):
value = commaGroups(value)
if key == "numChapters":
value = commaGroups("%d"%value)
- if key in ("dateCreated","datePublished","dateUpdated"):
+ if key in ("dateCreated"):
+ value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S"))
+ if key in ("datePublished","dateUpdated"):
value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))
if doreplacements:
@@ -411,11 +437,14 @@ class Story(Configurable):
title = re.sub(self.getConfig('chapter_title_strip_pattern'),"",title)
self.chapters.append( (title,html) )
- def getChapters(self):
+ def getChapters(self,fortoc=False):
"Chapters will be tuples of (title,html)"
retval = []
- if self.getConfig('add_chapter_numbers') and \
- self.getConfig('chapter_title_add_pattern'):
+ ## only add numbers if more than one chapter.
+ if len(self.chapters) > 1 and \
+ (self.getConfig('add_chapter_numbers') == "true" \
+ or (self.getConfig('add_chapter_numbers') == "toconly" and fortoc)) \
+ and self.getConfig('chapter_title_add_pattern'):
for index, (title,html) in enumerate(self.chapters):
retval.append( (string.Template(self.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
else:
@@ -480,17 +509,22 @@ class Story(Configurable):
prefix='ffdl'
if imgurl not in self.imgurls:
parsedUrl = urlparse.urlparse(imgurl)
+
try:
- sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
+ if self.getConfig('no_image_processing'):
+ (data,ext,mime) = no_convert_image(imgurl,
+ fetch(imgurl))
+ else:
+ try:
+ sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
+ except Exception, e:
+ raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
+ (data,ext,mime) = convert_image(imgurl,
+ fetch(imgurl),
+ sizes,
+ self.getConfig('grayscale_images'))
except Exception, e:
- raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
- try:
- (data,ext,mime) = convert_image(imgurl,
- fetch(imgurl),
- sizes,
- self.getConfig('grayscale_images'))
- except Exception, e:
- print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
+ logging.info("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
return "failedtoload"
# explicit cover, make the first image.
@@ -525,7 +559,7 @@ class Story(Configurable):
ext)
self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})
- print("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
+ logging.debug("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
else:
newsrc = self.imgtuples[self.imgurls.index(imgurl)]['newsrc']
diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py
index 98328713..a2d561a0 100644
--- a/fanficdownloader/writers/base_writer.py
+++ b/fanficdownloader/writers/base_writer.py
@@ -18,6 +18,7 @@
import re
import os.path
import datetime
+import string
import StringIO
import zipfile
from zipfile import ZipFile, ZIP_DEFLATED
@@ -26,6 +27,8 @@ import logging
from ..configurable import Configurable
from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML
+logger = logging.getLogger(__name__)
+
class BaseStoryWriter(Configurable):
@staticmethod
@@ -101,6 +104,22 @@ class BaseStoryWriter(Configurable):
names as Story.metadata, but ENTRY should use label and value.
"""
if self.getConfig("include_titlepage"):
+
+ if self.hasConfig("titlepage_start"):
+ START = string.Template(self.getConfig("titlepage_start"))
+
+ if self.hasConfig("titlepage_entry"):
+ ENTRY = string.Template(self.getConfig("titlepage_entry"))
+
+ if self.hasConfig("titlepage_end"):
+ END = string.Template(self.getConfig("titlepage_end"))
+
+ if self.hasConfig("titlepage_wide_entry"):
+ WIDE_ENTRY = string.Template(self.getConfig("titlepage_wide_entry"))
+
+ if self.hasConfig("titlepage_no_title_entry"):
+ NO_TITLE_ENTRY = string.Template(self.getConfig("titlepage_no_title_entry"))
+
self._write(out,START.substitute(self.story.getAllMetadata()))
if WIDE_ENTRY==None:
@@ -120,11 +139,11 @@ class BaseStoryWriter(Configurable):
if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label")
elif entry in self.titleLabels:
- logging.debug("Using fallback label for %s_label"%entry)
+ logger.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry]
else:
label="%s"%entry.title()
- logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
+ logger.debug("No known label for %s, fallback to '%s'"%(entry,label))
# If the label for the title entry is empty, use the
# 'no title' option if there is one.
@@ -132,6 +151,7 @@ class BaseStoryWriter(Configurable):
TEMPLATE= NO_TITLE_ENTRY
self._write(out,TEMPLATE.substitute({'label':label,
+ 'id':entry,
'value':self.story.getMetadata(entry)}))
else:
self._write(out, entry)
@@ -146,11 +166,22 @@ class BaseStoryWriter(Configurable):
"""
# Only do TOC if there's more than one chapter and it's configured.
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
+ if self.hasConfig("tocpage_start"):
+ START = string.Template(self.getConfig("tocpage_start"))
+
+ if self.hasConfig("tocpage_entry"):
+ ENTRY = string.Template(self.getConfig("tocpage_entry"))
+
+ if self.hasConfig("tocpage_end"):
+ END = string.Template(self.getConfig("tocpage_end"))
+
self._write(out,START.substitute(self.story.getAllMetadata()))
- for index, (title,html) in enumerate(self.story.getChapters()):
+ for index, (title,html) in enumerate(self.story.getChapters(fortoc=True)):
if html:
- self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
+ self._write(out,ENTRY.substitute({'chapter':title,
+ 'number':index+1,
+ 'index':"%04d"%(index+1)}))
self._write(out,END.substitute(self.story.getAllMetadata()))
@@ -161,6 +192,8 @@ class BaseStoryWriter(Configurable):
if outfilename == None:
outfilename=self.getOutputFileName()
+ self.outfilename = outfilename
+
# minor cheat, tucking css into metadata.
if self.getConfig("output_css"):
self.story.setMetadata("output_css",
@@ -171,11 +204,11 @@ class BaseStoryWriter(Configurable):
if not outstream:
close=True
- logging.info("Save directly to file: %s" % outfilename)
+ logger.info("Save directly to file: %s" % outfilename)
if self.getConfig('make_directories'):
path=""
- dirs = os.path.dirname(outfilename).split('/')
- for dir in dirs:
+ outputdirs = os.path.dirname(outfilename).split('/')
+ for dir in outputdirs:
path+=dir+"/"
if not os.path.exists(path):
os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2?
@@ -198,7 +231,7 @@ class BaseStoryWriter(Configurable):
outstream = open(outfilename,"wb")
else:
close=False
- logging.debug("Save to stream")
+ logger.debug("Save to stream")
if not metaonly:
self.story = self.adapter.getStory() # get full story now,
@@ -209,14 +242,14 @@ class BaseStoryWriter(Configurable):
# fetch once.
if self.getConfig('zip_output'):
out = StringIO.StringIO()
+ self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
self.writeStoryImpl(out)
- zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
- zipout.writestr(self.getBaseFileName(),out.getvalue())
+ self.zipout.writestr(self.getBaseFileName(),out.getvalue())
# declares all the files created by Windows. otherwise, when
# it runs in appengine, windows unzips the files as 000 perms.
- for zf in zipout.filelist:
+ for zf in self.zipout.filelist:
zf.create_system = 0
- zipout.close()
+ self.zipout.close()
out.close()
else:
self.writeStoryImpl(outstream)
@@ -224,6 +257,27 @@ class BaseStoryWriter(Configurable):
if close:
outstream.close()
+ def writeFile(self, filename, data):
+ logger.debug("writeFile:%s"%filename)
+
+ if self.getConfig('zip_output'):
+ outputdirs = os.path.dirname(self.getBaseFileName())
+ if outputdirs:
+ filename=outputdirs+'/'+filename
+ self.zipout.writestr(filename,data)
+ else:
+ outputdirs = os.path.dirname(self.outfilename)
+ if outputdirs:
+ filename=outputdirs+'/'+filename
+
+ dir = os.path.dirname(filename)
+ if not os.path.exists(dir):
+ os.mkdir(dir) ## os.makedirs() doesn't work in 2.5.2?
+
+ outstream = open(filename,"wb")
+ outstream.write(data)
+ outstream.close()
+
def writeStoryImpl(self, out):
"Must be overriden by sub classes."
pass
diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py
index 4f92643f..c3015e2e 100644
--- a/fanficdownloader/writers/writer_epub.py
+++ b/fanficdownloader/writers/writer_epub.py
@@ -29,6 +29,8 @@ from xml.dom.minidom import parse, parseString, getDOMImplementation
from base_writer import *
from ..htmlcleanup import stripHTML
+logger = logging.getLogger(__name__)
+
class EpubWriter(BaseStoryWriter):
@staticmethod
@@ -151,8 +153,16 @@ ${value}
Update Log
''')
+ self.EPUB_LOG_UPDATE_START = string.Template('''
+
''')
self.EPUB_LOG_PAGE_END = string.Template('''
@@ -160,30 +170,50 @@ ${value}