diff --git a/app.yaml b/app.yaml index a800ef44..c38a2609 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanfictiondownloader application: fanfictiondownloader -version: 4-4-27 +version: 4-4-29 runtime: python27 api_version: 1 threadsafe: true diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 98649e62..ea81d698 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 6, 11) + version = (1, 6, 14) minimum_calibre_version = (0, 8, 57) #: This field defines the GUI plugin class that contains all the code diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index 4980d357..1d334712 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -496,7 +496,10 @@ make_firstimage_cover:true book['author_sort'] = book['author'] = story.getList("author", removeallentities=True) book['publisher'] = story.getMetadata("site") book['tags'] = story.getSubjectTags(removeallentities=True) - book['comments'] = sanitize_comments_html(story.getMetadata("description")) + if story.getMetadata("description"): + book['comments'] = sanitize_comments_html(story.getMetadata("description")) + else: + book['comments']='' book['series'] = story.getMetadata("series", removeallentities=True) # adapter.opener is the element with a threadlock. But del @@ -538,7 +541,7 @@ make_firstimage_cover:true print("from URL(%s)"%url) # try to find by identifier url first. - searchstr = 'identifiers:"=url:%s"'%url.replace(":","|") + searchstr = 'identifiers:"=url:=%s"'%url.replace(":","|") identicalbooks = db.search_getting_ids(searchstr, None) if len(identicalbooks) < 1: # find dups @@ -634,9 +637,12 @@ make_firstimage_cover:true if book['good']: # there shouldn't be any !'good' books at this point. # if still 'good', make a temp file to write the output to. - tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'], - suffix='.'+options['fileform'], - dir=options['tdir']) + # For HTML format users, make the filename inside the zip something reasonable. + # For crazy long titles/authors, limit it to 200chars. + # For weird/OS-unsafe characters, use file safe only. + tmp = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100], + suffix='.'+options['fileform'], + dir=options['tdir']) print("title:"+book['title']) print("outfile:"+tmp.name) book['outfile'] = tmp.name diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index 2fad2e10..89d390f4 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -14,7 +14,6 @@ from StringIO import StringIO from calibre.utils.ipc.server import Server from calibre.utils.ipc.job import ParallelJob -from calibre.utils.logging import Log from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload, OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY) @@ -109,9 +108,6 @@ def do_download_for_worker(book,options): when run as a worker job ''' try: - # import logging - # logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") - book['comment'] = 'Download started...' configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform']) @@ -123,7 +119,7 @@ def do_download_for_worker(book,options): # images only for epub, even if the user mistakenly turned it # on else where. - if options['fileform'] != "epub": + if options['fileform'] not in ("epub","html"): configuration.set("overrides","include_images","false") adapter = adapters.getAdapter(configuration,book['url']) diff --git a/defaults.ini b/defaults.ini index 0ae23ba9..88c211e8 100644 --- a/defaults.ini +++ b/defaults.ini @@ -161,8 +161,10 @@ extratags: FanFiction ## for regexp details. ## Make sure to keep at least one space at the start of each line and ## to escape % to %%, if used. -## Two or three part lines. Two part effect everything. +## Two, three or five part lines. Two part effect everything. ## Three part effect only those key(s) lists. +## *Five* part lines. Effect only when trailing conditional key=>regexp matches +## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp] #replace_metadata: # genre,category=>Sci-Fi=>SF # Puella Magi Madoka Magica.* => Madoka @@ -170,7 +172,9 @@ extratags: FanFiction # Crossover: (.*)=>\1 # title=>(.*)Great(.*)=>\1Moderate\2 # .*-Centered=> - +# characters=>Sam W\.=>Sam Witwicky&&category=>Transformers +# characters=>Sam W\.=>Sam Winchester&&category=>Supernatural + ## Some readers don't show horizontal rule (
) tags correctly. ## This replaces them all with a centered '* * *'. (Note centering ## doesn't work on some devices either.) @@ -184,10 +188,15 @@ keep_summary_html:true ## Don't like the numbers at the start of chapter titles on some ## sites? You can use strip_chapter_numbers to strip them off. Just ## want to make them all look the same? Strip them off, then add them -## back on with add_chapter_numbers. Don't like the way it strips -## numbers or adds them back? See chapter_title_strip_pattern and -## chapter_title_add_pattern. +## back on with add_chapter_numbers:true. Only want them added back +## on for Table of Contents(toc)? Use add_chapter_numbers:toconly. +## (toconly doesn't work on mobi output.) Don't like the way it +## strips numbers or adds them back? See chapter_title_strip_pattern +## and chapter_title_add_pattern. strip_chapter_numbers:false + +## add_chapter_numbers can be true, false or toconly +## (Note number is not added when there's only one chapter.) add_chapter_numbers:false ## (Two versions of chapter_title_strip_pattern are shown below. You @@ -218,6 +227,22 @@ chapter_title_add_pattern:${index}. ${title} ## Each output format has a section that overrides [defaults] [html] +## include images from img tags in the body and summary of +## stories. Images will be converted to jpg for size if possible. +## include_images is *only* available in epub and html output formats. +## include_images is *not* available in the web service in any format. +#include_images:false + +## Note that it's *highly* recommended to use zipfile output or story +## unique destination directories to avoid overwriting images. +#output_filename: books/${author}/${title}/${title}-${siteabbrev}_${authorId}_${storyId}${formatext} +#zip_output: false + +## This switch prevents FFDL from doing any processing on the images. +## Usually they would be converted to jpg, resized and optionally made +## grayscale. +no_image_processing: true + ## output background color--only used by html and epub (and ignored in ## epub by many readers). Included below in output_css--will be ## ignored if not in output_css. @@ -261,13 +286,17 @@ zip_output: false ## mobi generated from epub by calibre will have a TOC at the end. include_tocpage: false -## include a Update Log page before the story text. If included, the -## log will be updated each time the epub is an all the metadata +## include a Update Log page before the story text. If 'true', the +## log will be updated each time the epub is and all the metadata ## fields that have changed since the last update (typically ## dateUpdated,numChapters,numWords at a minimum) will be shown. ## Great for tracking when chapters came out and when the description, ## etc changed. include_logpage: false +## If set to 'smart', logpage will only be included if the story is +## status:In-Progress or already had a logpage. That way you don't +## end up with Completed stories that have just one logpage entry. +#include_logpage: smart ## items to include in the log page Empty metadata entries, or those ## that haven't changed since the last update, will *not* appear, even @@ -320,6 +349,8 @@ output_css: ## include images from img tags in the body and summary of ## stories. Images will be converted to jpg for size if possible. +## include_images is *only* available in epub and html output format. +## include_images is *not* available in the web service in any format. #include_images:false ## If set, the first image found will be made the cover image. If @@ -374,31 +405,31 @@ nook_img_fix:true ## URLs like: http://test1.com?sid=12345 [test1.com] extratags: FanFiction,Testing -extracategories:Fafner -extragenres:Romance,Fluff -extracharacters:Reginald Smythe-Smythe,Mokona,Harry P. -extraships:Smythe-Smythe/Mokona -extrawarnings:Extreme Bogosity +# extracategories:Fafner +# extragenres:Romance,Fluff +# extracharacters:Reginald Smythe-Smythe,Mokona,Harry P. +# extraships:Smythe-Smythe/Mokona +# extrawarnings:Extreme Bogosity -extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL +# extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL -include_in_compositeJ:dateCreated -include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ -include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated, +# include_in_compositeJ:dateCreated +# include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ +# include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated, -extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL -extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL -extra_subject_tags: metaA,metaB,metaC +# extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL +# extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL +# extra_subject_tags: metaA,metaB,metaC -replace_metadata: - compositeL=>Val=>VALUE - series,extratags=>Test=>Plan - Puella Magi Madoka Magica.* => Madoka - Comedy=>Humor - Crossover: (.*)=>\1 - (.*)Great(.*)=>\1Moderate\2 - .*-Centered=> - characters=>Harry P\.=>Harry Potter +# replace_metadata: +# compositeL=>Val=>VALUE +# series,extratags=>Test=>Plan +# Puella Magi Madoka Magica.* => Madoka +# Comedy=>Humor +# Crossover: (.*)=>\1 +# (.*)Great(.*)=>\1Moderate\2 +# .*-Centered=> +# characters=>Harry P\.=>Harry Potter ## If necessary, you can define [:] sections to @@ -556,9 +587,8 @@ cliches_label:Character Cliches ## specific entries to titlepage/logpage without having to copy the ## entire titlepage_entries line. (But if you want them higher than ## the end, you will need to copy titlepage_entries.) -#extra_titlepage_entries: themes,hermiones,dracos,timeline,cliches - -## adds to include_subject_tags instead of replacing it. +#extra_titlepage_entries: themes,timeline,cliches +#extra_logpage_entries: themes,timeline,cliches #extra_subject_tags: themes,timeline,cliches [erosnsappho.sycophanthex.com] @@ -1019,6 +1049,15 @@ extracategories:Harry Potter ## Site dedicated to these categories/characters/ships extracategories:Prison Break +[www.qaf-fic.com] +## Site dedicated to these categories/characters/ships +extracategories:Queer as Folk + +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + [www.scarvesandcoffee.net] ## Site dedicated to these categories/characters/ships extracategories:Glee diff --git a/downloader.py b/downloader.py index c809d191..ca7dc3f6 100644 --- a/downloader.py +++ b/downloader.py @@ -15,8 +15,6 @@ # limitations under the License. # -import logging - import sys, os from os.path import normpath, expanduser, isfile, join from StringIO import StringIO @@ -26,6 +24,14 @@ import string import ConfigParser from subprocess import call +import logging +if sys.version_info >= (2, 7): + # suppresses default logger. Logging is setup in fanficdownload/__init__.py so it works in calibre, too. + rootlogger = logging.getLogger() + loghandler=logging.NullHandler() + loghandler.setFormatter(logging.Formatter("(=====)(levelname)s:%(message)s")) + rootlogger.addHandler(loghandler) + from fanficdownloader import adapters,writers,exceptions from fanficdownloader.configurable import Configuration from fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data @@ -79,11 +85,9 @@ def main(): (options, args) = parser.parse_args() - if options.debug: - logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") - else: - logging.basicConfig(level=logging.INFO,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") - + if not options.debug: + logger = logging.getLogger("fanficdownloader") + logger.setLevel(logging.INFO) if len(args) != 1: parser.error("incorrect number of arguments") @@ -120,8 +124,6 @@ def main(): logging.debug('reading %s config file(s), if present'%conflist) configuration.read(conflist) - print("has include_in_tags?%s"%configuration.hasConfig("include_in_tags")) - try: configuration.add_section("overrides") except ConfigParser.DuplicateSectionError: @@ -138,7 +140,7 @@ def main(): # images only for epub, even if the user mistakenly turned it # on else where. - if options.format != "epub": + if options.format not in ("epub","html"): configuration.set("overrides","include_images","false") if options.options: diff --git a/fanficdownloader/__init__.py b/fanficdownloader/__init__.py index 40a96afc..93a4d070 100644 --- a/fanficdownloader/__init__.py +++ b/fanficdownloader/__init__.py @@ -1 +1,16 @@ # -*- coding: utf-8 -*- + +try: + # just a way to switch between web service and CLI/PI + import google.appengine.api +except: + import sys + if sys.version_info >= (2, 7): + import logging + logger = logging.getLogger(__name__) + loghandler=logging.StreamHandler() + loghandler.setFormatter(logging.Formatter("FFDL:%(levelname)s:%(filename)s(%(lineno)d):%(message)s")) + logger.addHandler(loghandler) + loghandler.setLevel(logging.DEBUG) + logger.setLevel(logging.DEBUG) + diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py index f30264c8..ac426868 100644 --- a/fanficdownloader/adapters/__init__.py +++ b/fanficdownloader/adapters/__init__.py @@ -20,6 +20,8 @@ from os.path import dirname, basename, normpath import logging import urlparse as up +logger = logging.getLogger(__name__) + from .. import exceptions as exceptions ## must import each adapter here. @@ -103,6 +105,7 @@ import adapter_bloodtiesfancom import adapter_indeathnet import adapter_dwiggiecom import adapter_jlaunlimitedcom +import adapter_qafficcom ## This bit of complexity allows adapters to be added by just adding @@ -124,9 +127,9 @@ for x in imports(): def getAdapter(config,url): - logging.debug("trying url:"+url) + logger.debug("trying url:"+url) (cls,fixedurl) = getClassFor(url) - logging.debug("fixedurl:"+fixedurl) + logger.debug("fixedurl:"+fixedurl) if cls: adapter = cls(config,fixedurl) # raises InvalidStoryURL return adapter @@ -164,11 +167,11 @@ def getClassFor(url): cls = getClassFromList(domain) if not cls and domain.startswith("www."): domain = domain.replace("www.","") - logging.debug("trying site:without www: "+domain) + logger.debug("trying site:without www: "+domain) cls = getClassFromList(domain) fixedurl = fixedurl.replace("http://www.","http://") if not cls: - logging.debug("trying site:www."+domain) + logger.debug("trying site:www."+domain) cls = getClassFromList("www."+domain) fixedurl = fixedurl.replace("http://","http://www.") diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py index 3aee4133..aacd7838 100644 --- a/fanficdownloader/adapters/adapter_adastrafanficcom.py +++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib import urllib2 @@ -41,7 +42,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -65,7 +66,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter): addurl="" url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -204,7 +205,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) # problems with some stories, but only in calibre. I suspect diff --git a/fanficdownloader/adapters/adapter_archiveofourownorg.py b/fanficdownloader/adapters/adapter_archiveofourownorg.py index 95a481f1..27628808 100644 --- a/fanficdownloader/adapters/adapter_archiveofourownorg.py +++ b/fanficdownloader/adapters/adapter_archiveofourownorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -30,6 +31,8 @@ def getClass(): return ArchiveOfOurOwnOrgAdapter +logger = logging.getLogger(__name__) + class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): def __init__(self, config, url): @@ -48,13 +51,13 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # get storyId from url--url validation guarantees query correct m = re.match(self.getSiteURLPattern(),url) if m: self.story.setMetadata('storyId',m.group('id')) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId')) else: @@ -104,14 +107,14 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): params['authenticity_token'] = data.split('input name="authenticity_token" type="hidden" value="')[1].split('" />')[0] loginUrl = 'http://' + self.getSiteDomain() + '/user_sessions' - logging.info("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.info("Will now login to URL (%s) as (%s)" % (loginUrl, params['user_session[login]'])) d = self._postUrl(loginUrl, params) - #logging.info(d) + #logger.info(d) if "Successfully logged in" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['user_session[login]'])) raise exceptions.FailedToLogin(url,params['user_session[login]']) return False @@ -128,8 +131,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): metaurl = self.url+addurl url = self.url+'/navigate'+addurl - logging.info("url: "+url) - logging.info("metaurl: "+metaurl) + logger.info("url: "+url) + logger.info("metaurl: "+metaurl) try: data = self._fetchUrl(url) @@ -164,7 +167,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): alist = soup.findAll('a', href=re.compile(r"^/users/\w+/pseuds/\w+")) if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link. self.story.setMetadata('author','Anonymous') - self.story.setMetadata('authorUrl',self.url) + self.story.setMetadata('authorUrl','http://archiveofourown.org/') + self.story.setMetadata('authorId','0') else: for a in alist: self.story.addToList('authorId',a['href'].split('/')[2]) @@ -174,7 +178,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): # Find the chapters: chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+"/chapters/\d+$")) self.story.setMetadata('numChapters',len(chapters)) - logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) + logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) for x in range(0,len(chapters)): # just in case there's tags, like in chapter titles. chapter=chapters[x] @@ -291,7 +295,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) chapter=bs.BeautifulSoup('
') data = self._fetchUrl(url) diff --git a/fanficdownloader/adapters/adapter_archiveskyehawkecom.py b/fanficdownloader/adapters/adapter_archiveskyehawkecom.py index 999d53b4..437ec083 100644 --- a/fanficdownloader/adapters/adapter_archiveskyehawkecom.py +++ b/fanficdownloader/adapters/adapter_archiveskyehawkecom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -48,7 +49,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId')) @@ -78,7 +79,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter): def extractChapterUrlsAndMetadata(self): url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -151,7 +152,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter): rating.find('br').replaceWith('split') rating=rating.text.replace("This story is rated",'').split('split')[0] self.story.setMetadata('rating',rating) - logging.debug(self.story.getMetadata('rating')) + logger.debug(self.story.getMetadata('rating')) warnings=box.find('ol') if warnings != None: @@ -177,7 +178,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py b/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py index 01d1c673..cf3492f8 100644 --- a/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. @@ -94,13 +95,13 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Logout" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -112,7 +113,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -237,7 +238,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) diff --git a/fanficdownloader/adapters/adapter_bloodtiesfancom.py b/fanficdownloader/adapters/adapter_bloodtiesfancom.py index 993a4b65..98c32ccd 100644 --- a/fanficdownloader/adapters/adapter_bloodtiesfancom.py +++ b/fanficdownloader/adapters/adapter_bloodtiesfancom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -69,7 +70,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. @@ -115,13 +116,13 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -148,7 +149,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -182,7 +183,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -320,7 +321,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_castlefansorg.py b/fanficdownloader/adapters/adapter_castlefansorg.py index 1985f4cd..9345d249 100644 --- a/fanficdownloader/adapters/adapter_castlefansorg.py +++ b/fanficdownloader/adapters/adapter_castlefansorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -71,7 +72,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. @@ -117,13 +118,13 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/fanfic/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -145,7 +146,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -293,7 +294,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_chaossycophanthexcom.py b/fanficdownloader/adapters/adapter_chaossycophanthexcom.py index f9f6f50a..2436630c 100644 --- a/fanficdownloader/adapters/adapter_chaossycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_chaossycophanthexcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. @@ -86,7 +87,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -221,7 +222,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_checkmatedcom.py b/fanficdownloader/adapters/adapter_checkmatedcom.py index c529f0bd..4d534dac 100644 --- a/fanficdownloader/adapters/adapter_checkmatedcom.py +++ b/fanficdownloader/adapters/adapter_checkmatedcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class CheckmatedComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) self._setURL('http://' + self.getSiteDomain() + '/story.php?story='+self.story.getMetadata('storyId')) @@ -95,7 +96,7 @@ class CheckmatedComAdapter(BaseSiteAdapter): e = self._fetchUrl(url) if "Welcome back," not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['name'])) raise exceptions.FailedToLogin(url,params['name']) return False @@ -112,7 +113,7 @@ class CheckmatedComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -222,7 +223,7 @@ class CheckmatedComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_darksolaceorg.py b/fanficdownloader/adapters/adapter_darksolaceorg.py index 9ece865b..f58f180f 100644 --- a/fanficdownloader/adapters/adapter_darksolaceorg.py +++ b/fanficdownloader/adapters/adapter_darksolaceorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. @@ -98,13 +99,13 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/elysian/user.php' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "User Account Page" not in d : #Member Account - logging.info("Failed to login to URL %s as %s, or have no authorization to access the story" % (loginUrl, params['penname'])) + logger.info("Failed to login to URL %s as %s, or have no authorization to access the story" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False else: @@ -115,7 +116,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -261,7 +262,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_destinysgatewaycom.py b/fanficdownloader/adapters/adapter_destinysgatewaycom.py index f4757a95..f581f50c 100644 --- a/fanficdownloader/adapters/adapter_destinysgatewaycom.py +++ b/fanficdownloader/adapters/adapter_destinysgatewaycom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -86,7 +87,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -106,7 +107,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -227,7 +228,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_dokugacom.py b/fanficdownloader/adapters/adapter_dokugacom.py index 23efabf3..5225683d 100644 --- a/fanficdownloader/adapters/adapter_dokugacom.py +++ b/fanficdownloader/adapters/adapter_dokugacom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class DokugaComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # www.dokuga.com has two 'sections', shown in URL as # 'fanfiction' and 'spark' that change how things should be @@ -87,7 +88,7 @@ class DokugaComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -224,7 +225,7 @@ class DokugaComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_dracoandginnycom.py b/fanficdownloader/adapters/adapter_dracoandginnycom.py index 6d825223..ffc74570 100644 --- a/fanficdownloader/adapters/adapter_dracoandginnycom.py +++ b/fanficdownloader/adapters/adapter_dracoandginnycom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -92,13 +93,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -120,7 +121,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -145,7 +146,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -282,7 +283,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_dramioneorg.py b/fanficdownloader/adapters/adapter_dramioneorg.py index 6ca73b2b..0b8f6f0b 100644 --- a/fanficdownloader/adapters/adapter_dramioneorg.py +++ b/fanficdownloader/adapters/adapter_dramioneorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class DramioneOrgAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -92,13 +93,13 @@ class DramioneOrgAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -120,7 +121,7 @@ class DramioneOrgAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -283,7 +284,7 @@ class DramioneOrgAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py b/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py index 6ff8fff8..80cee30d 100644 --- a/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. @@ -86,7 +87,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -106,7 +107,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -239,7 +240,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index a4bb1211..62155e17 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 from urllib import unquote_plus @@ -75,12 +76,12 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): # metadata and chapter list url = self.origurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) # use BeautifulSoup HTML parser to make everything easier to find. try: data = self._fetchUrl(url) - #print("\n===================\n%s\n===================\n"%data) + #logger.debug("\n===================\n%s\n===================\n"%data) soup = bs.BeautifulSoup(data) except urllib2.HTTPError, e: if e.code == 404: @@ -108,11 +109,11 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): tryurl = "http://%s/s/%s/%d/"%(self.getSiteDomain(), self.story.getMetadata('storyId'), chapcount+1) - print('=Trying newer chapter: %s' % tryurl) + logger.debug('=Trying newer chapter: %s' % tryurl) newdata = self._fetchUrl(tryurl) if "not found. Please check to see you are not using an outdated url." \ not in newdata: - print('=======Found newer chapter: %s' % tryurl) + logger.debug('=======Found newer chapter: %s' % tryurl) soup = bs.BeautifulSoup(newdata) except: pass @@ -160,7 +161,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): metatext = stripHTML(gui_table1i.find('div', {'style':'color:gray;'})).replace('Hurt/Comfort','Hurt-Comfort') metalist = metatext.split(" - ") - #print("metatext:(%s)"%metalist) + #logger.debug("metatext:(%s)"%metalist) # Rated: Fiction K - English - Words: 158,078 - Published: 02-04-11 @@ -176,9 +177,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): genrelist = metalist[0].split('/') # Hurt/Comfort already changed above. goodgenres=True for g in genrelist: - print("g:(%s)"%g) + #logger.debug("g:(%s)"%g) if g.strip() not in ffnetgenres: - print("g not in ffnetgenres") + logger.info("g not in ffnetgenres") goodgenres=False if goodgenres: self.story.extendList('genre',genrelist) @@ -240,7 +241,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): return def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) time.sleep(0.5) ## ffnet(and, I assume, fpcom) tends to fail ## more if hit too fast. This is in ## additional to what ever the @@ -265,7 +266,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): div = soup.find('div', {'id' : 'storytextp'}) if None == div: - logging.debug('div id=storytextp not found. data:%s'%data) + logger.debug('div id=storytextp not found. data:%s'%data) raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_fanfiktionde.py b/fanficdownloader/adapters/adapter_fanfiktionde.py index 1130f8f9..28d594af 100644 --- a/fanficdownloader/adapters/adapter_fanfiktionde.py +++ b/fanficdownloader/adapters/adapter_fanfiktionde.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib import urllib2 @@ -48,7 +49,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1') @@ -94,12 +95,12 @@ class FanFiktionDeAdapter(BaseSiteAdapter): params['submit'] = 'Login...' loginUrl = 'https://ssl.fanfiktion.de/' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['nickname'])) d = self._postUrl(loginUrl,params) if "Login erfolgreich" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['nickname'])) raise exceptions.FailedToLogin(url,params['nickname']) return False @@ -110,7 +111,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter): def extractChapterUrlsAndMetadata(self): url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -183,7 +184,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) time.sleep(0.5) ## ffde has "floodlock" protection soup = bs.BeautifulSoup(self._fetchUrl(url), diff --git a/fanficdownloader/adapters/adapter_ficbooknet.py b/fanficdownloader/adapters/adapter_ficbooknet.py index de6ea953..ae707706 100644 --- a/fanficdownloader/adapters/adapter_ficbooknet.py +++ b/fanficdownloader/adapters/adapter_ficbooknet.py @@ -18,6 +18,7 @@ import time import datetime import logging +logger = logging.getLogger(__name__) import re import urllib2 from .. import translit @@ -33,6 +34,8 @@ def getClass(): return FicBookNetAdapter +logger = logging.getLogger(__name__) + class FicBookNetAdapter(BaseSiteAdapter): def __init__(self, config, url): @@ -49,7 +52,7 @@ class FicBookNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/readfic/'+self.story.getMetadata('storyId')) @@ -75,7 +78,7 @@ class FicBookNetAdapter(BaseSiteAdapter): ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): url=self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) except urllib2.HTTPError, e: @@ -95,14 +98,14 @@ class FicBookNetAdapter(BaseSiteAdapter): ## Title a = soup.find('h1') self.story.setMetadata('title',stripHTML(a)) - logging.debug("Title: (%s)"%self.story.getMetadata('title')) + logger.debug("Title: (%s)"%self.story.getMetadata('title')) # Find authorid and URL from... author url. a = table.find('a') self.story.setMetadata('authorId',a.text) # Author's name is unique self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) self.story.setMetadata('author',a.text) - logging.debug("Author: (%s)"%self.story.getMetadata('author')) + logger.debug("Author: (%s)"%self.story.getMetadata('author')) # Find the chapters: chapters = soup.find('div', {'class' : 'part_list'}) @@ -123,7 +126,7 @@ class FicBookNetAdapter(BaseSiteAdapter): pubdate=translit.translit(stripHTML(soup.find('div', {'class' : 'part_added'}).find('span'))) update=pubdate - logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) + logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) if not ',' in pubdate: pubdate=datetime.date.today().strftime(self.dateformat) @@ -207,7 +210,7 @@ class FicBookNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_fictionalleyorg.py b/fanficdownloader/adapters/adapter_fictionalleyorg.py index d87a7994..2e671d4b 100644 --- a/fanficdownloader/adapters/adapter_fictionalleyorg.py +++ b/fanficdownloader/adapters/adapter_fictionalleyorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib import urllib2 @@ -44,7 +45,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): if m: self.story.setMetadata('authorId',m.group('auth')) self.story.setMetadata('storyId',m.group('id')) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL(url) else: @@ -68,7 +69,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): if self.is_adult or self.getConfig("is_adult"): params={'iamold':'Yes', 'action':'ageanswer'} - logging.info("Attempting to get cookie for %s" % url) + logger.info("Attempting to get cookie for %s" % url) ## posting on list doesn't work, but doesn't hurt, either. data = self._postUrl(url,params) else: @@ -79,7 +80,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): ## could be either chapter list page or one-shot text page. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._postFetchWithIAmOld(url) @@ -107,7 +108,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1] self._setURL(storya['href']) url=self.url - logging.debug("Normalizing to URL: "+url) + logger.debug("Normalizing to URL: "+url) ## title's right there... self.story.setMetadata('title',storya.string) data = self._fetchUrl(url) @@ -193,7 +194,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) # find & and diff --git a/fanficdownloader/adapters/adapter_fictionpresscom.py b/fanficdownloader/adapters/adapter_fictionpresscom.py index 76b2353a..a959b4a8 100644 --- a/fanficdownloader/adapters/adapter_fictionpresscom.py +++ b/fanficdownloader/adapters/adapter_fictionpresscom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 import time diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py index 964fc31d..3183d542 100644 --- a/fanficdownloader/adapters/adapter_ficwadcom.py +++ b/fanficdownloader/adapters/adapter_ficwadcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 import time @@ -61,12 +62,12 @@ class FicwadComSiteAdapter(BaseSiteAdapter): params['password'] = self.getConfig("password") loginUrl = 'http://' + self.getSiteDomain() + '/account/login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['username'])) d = self._postUrl(loginUrl,params) if "Login attempt failed..." in d: - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['username'])) raise exceptions.FailedToLogin(url,params['username']) return False @@ -79,7 +80,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter): # metadata and chapter list url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) # use BeautifulSoup HTML parser to make everything easier to find. try: @@ -96,7 +97,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter): # normalize story URL on chapter list. self.story.setMetadata('storyId',storya['href'].split('/',)[2]) url = "http://"+self.getSiteDomain()+storya['href'] - logging.debug("Normalizing to URL: "+url) + logger.debug("Normalizing to URL: "+url) self._setURL(url) try: soup = bs.BeautifulSoup(self._fetchUrl(url)) @@ -201,7 +202,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py index d638321f..cf7a22aa 100644 --- a/fanficdownloader/adapters/adapter_fimfictionnet.py +++ b/fanficdownloader/adapters/adapter_fimfictionnet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 import cookielib as cl @@ -175,7 +176,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'id' : 'chapter_container'}) if soup == None: raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) diff --git a/fanficdownloader/adapters/adapter_finestoriescom.py b/fanficdownloader/adapters/adapter_finestoriescom.py index 49357c9f..efdda349 100644 --- a/fanficdownloader/adapters/adapter_finestoriescom.py +++ b/fanficdownloader/adapters/adapter_finestoriescom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -49,7 +50,7 @@ class FineStoriesComAdapter(BaseSiteAdapter): self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2].split(':')[0]) if 'storyInfo' in self.story.getMetadata('storyId'): self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/s/storyInfo.php?id='+self.story.getMetadata('storyId')) @@ -95,13 +96,13 @@ class FineStoriesComAdapter(BaseSiteAdapter): params['submit'] = 'Login' loginUrl = 'http://' + self.getSiteDomain() + '/login.php' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['theusername'])) d = self._fetchUrl(loginUrl, params) if "My Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['theusername'])) raise exceptions.FailedToLogin(url,params['theusername']) return False @@ -114,7 +115,7 @@ class FineStoriesComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -232,7 +233,7 @@ class FineStoriesComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_grangerenchantedcom.py b/fanficdownloader/adapters/adapter_grangerenchantedcom.py index 7df58bf8..dd4723fe 100644 --- a/fanficdownloader/adapters/adapter_grangerenchantedcom.py +++ b/fanficdownloader/adapters/adapter_grangerenchantedcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,8 +48,8 @@ class GrangerEnchantedCom(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) - self.story.setMetadata('section',self.parsedUrl.path.split('/',)[1]) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + self.section=self.parsedUrl.path.split('/',)[1] # normalized story URL. if "malfoymanor" in self.parsedUrl.netloc: @@ -100,17 +101,17 @@ class GrangerEnchantedCom(BaseSiteAdapter): params['cookiecheck'] = '1' params['submit'] = 'Submit' - if "enchant" in self.story.getMetadata('section'): + if "enchant" in self.section: loginUrl = 'http://grangerenchanted.com/enchant/user.php?action=login' else: loginUrl = 'http://malfoymanor.grangerenchanted.com/themanor/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -132,7 +133,7 @@ class GrangerEnchantedCom(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -157,7 +158,7 @@ class GrangerEnchantedCom(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -191,7 +192,7 @@ class GrangerEnchantedCom(BaseSiteAdapter): # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.section+'/'+chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -262,7 +263,7 @@ class GrangerEnchantedCom(BaseSiteAdapter): # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+a['href'] + series_url = 'http://'+self.host+'/'+self.section+'/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) @@ -284,10 +285,10 @@ class GrangerEnchantedCom(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) - soup = bs.BeautifulStoneSoup(self._fetchUrl(url), - selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. div = soup.find('div', {'id' : 'story1'}) diff --git a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py index 8c5ad0a7..4a70b3e8 100644 --- a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py +++ b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib import urllib2 @@ -41,7 +42,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only psid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId')) @@ -72,7 +73,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): def extractChapterUrlsAndMetadata(self): url = self.url+'&index=1' - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -181,7 +182,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) ## most adapters use BeautifulStoneSoup here, but non-Stone ## allows nested div tags. diff --git a/fanficdownloader/adapters/adapter_hlfictionnet.py b/fanficdownloader/adapters/adapter_hlfictionnet.py index c3203ee2..48c0cd08 100644 --- a/fanficdownloader/adapters/adapter_hlfictionnet.py +++ b/fanficdownloader/adapters/adapter_hlfictionnet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class HLFictionNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -76,7 +77,7 @@ class HLFictionNetAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -215,7 +216,7 @@ class HLFictionNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_hpfandomnet.py b/fanficdownloader/adapters/adapter_hpfandomnet.py index dfc7c055..3bce123e 100644 --- a/fanficdownloader/adapters/adapter_hpfandomnet.py +++ b/fanficdownloader/adapters/adapter_hpfandomnet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -51,7 +52,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. # XXX Most sites don't have the /eff part. Replace all to remove it usually. @@ -79,7 +80,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX def extractChapterUrlsAndMetadata(self): url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -180,21 +181,22 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX value = td.nextSibling.string #print("\nlabel:%s\nvalue:%s\n"%(label,value)) - if 'Category' in label: + if 'Category' in label and value: cats = td.parent.findAll('a',href=re.compile(r'categories.php')) catstext = [cat.string for cat in cats] for cat in catstext: self.story.addToList('category',cat.string) - if 'Characters' in label: + if 'Characters' in label and value: # this site can have Character label with no + # values, apparently. Others as a precaution. for char in value.split(','): self.story.addToList('characters',char.strip()) - if 'Genre' in label: + if 'Genre' in label and value: for genre in value.split(','): self.story.addToList('genre',genre.strip()) - if 'Warnings' in label: + if 'Warnings' in label and value: for warning in value.split(','): if warning.strip() != 'none': self.story.addToList('warnings',warning.strip()) @@ -208,7 +210,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) # There's no good wrapper around the chapter text. :-/ diff --git a/fanficdownloader/adapters/adapter_hpfanficarchivecom.py b/fanficdownloader/adapters/adapter_hpfanficarchivecom.py index 9e078c72..1c263bd6 100644 --- a/fanficdownloader/adapters/adapter_hpfanficarchivecom.py +++ b/fanficdownloader/adapters/adapter_hpfanficarchivecom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -76,7 +77,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -202,7 +203,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url), diff --git a/fanficdownloader/adapters/adapter_iketernalnet.py b/fanficdownloader/adapters/adapter_iketernalnet.py index 2b673d7d..91aaa4b4 100644 --- a/fanficdownloader/adapters/adapter_iketernalnet.py +++ b/fanficdownloader/adapters/adapter_iketernalnet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class IkEternalNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -92,13 +93,13 @@ class IkEternalNetAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -120,7 +121,7 @@ class IkEternalNetAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -158,7 +159,7 @@ class IkEternalNetAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -267,7 +268,7 @@ class IkEternalNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_ksarchivecom.py b/fanficdownloader/adapters/adapter_ksarchivecom.py index 11b9b96e..ff979fbe 100644 --- a/fanficdownloader/adapters/adapter_ksarchivecom.py +++ b/fanficdownloader/adapters/adapter_ksarchivecom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -51,7 +52,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. @@ -99,7 +100,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -132,7 +133,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -294,7 +295,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) soup = bs.BeautifulStoneSoup(data, diff --git a/fanficdownloader/adapters/adapter_libraryofmoriacom.py b/fanficdownloader/adapters/adapter_libraryofmoriacom.py index d6851a85..68c3c3af 100644 --- a/fanficdownloader/adapters/adapter_libraryofmoriacom.py +++ b/fanficdownloader/adapters/adapter_libraryofmoriacom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -48,7 +49,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/a/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -84,7 +85,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -104,7 +105,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -234,7 +235,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_lumossycophanthexcom.py b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py index 878a73ac..dd0b0d09 100644 --- a/fanficdownloader/adapters/adapter_lumossycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. @@ -86,7 +87,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -221,7 +222,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_mediaminerorg.py b/fanficdownloader/adapters/adapter_mediaminerorg.py index a77b56ca..c647e90b 100644 --- a/fanficdownloader/adapters/adapter_mediaminerorg.py +++ b/fanficdownloader/adapters/adapter_mediaminerorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib import urllib2 @@ -42,7 +43,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): m = re.match(self.getSiteURLPattern(),url) if m: self.story.setMetadata('storyId',m.group('id')) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/fanfic/view_st.php/'+self.story.getMetadata('storyId')) else: @@ -66,7 +67,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): def extractChapterUrlsAndMetadata(self): url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -188,7 +189,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data=self._fetchUrl(url) soup = bs.BeautifulStoneSoup(data, @@ -214,7 +215,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): return self.utf8FromSoup(url,anchor) else: - logging.debug('Using kludgey text find for older mediaminer story.') + logger.debug('Using kludgey text find for older mediaminer story.') ## Some older mediaminer stories are unparsable with BeautifulSoup. ## Really nasty formatting. Sooo... Cheat! Parse it ourselves a bit first. ## Story stuff falls between: diff --git a/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py b/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py index 6ad4904e..98fd608e 100644 --- a/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py +++ b/fanficdownloader/adapters/adapter_merlinficdtwinscouk.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -92,13 +93,13 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -120,7 +121,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -145,7 +146,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -277,7 +278,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_midnightwhispersca.py b/fanficdownloader/adapters/adapter_midnightwhispersca.py index cbdce553..2a9a8f60 100644 --- a/fanficdownloader/adapters/adapter_midnightwhispersca.py +++ b/fanficdownloader/adapters/adapter_midnightwhispersca.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -51,7 +52,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. @@ -95,7 +96,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -128,7 +129,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -269,7 +270,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) soup = bs.BeautifulStoneSoup(data, diff --git a/fanficdownloader/adapters/adapter_mugglenetcom.py b/fanficdownloader/adapters/adapter_mugglenetcom.py index 26c468f7..dd171281 100644 --- a/fanficdownloader/adapters/adapter_mugglenetcom.py +++ b/fanficdownloader/adapters/adapter_mugglenetcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -51,7 +52,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -96,13 +97,13 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login&sid='+self.story.getMetadata('storyId') - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -125,7 +126,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -164,7 +165,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -315,7 +316,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_nationallibrarynet.py b/fanficdownloader/adapters/adapter_nationallibrarynet.py index 530da234..00303eb5 100644 --- a/fanficdownloader/adapters/adapter_nationallibrarynet.py +++ b/fanficdownloader/adapters/adapter_nationallibrarynet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only storyid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId')) @@ -80,7 +81,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -177,7 +178,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_ncisficcom.py b/fanficdownloader/adapters/adapter_ncisficcom.py index e0fcb3dd..744ef4f4 100644 --- a/fanficdownloader/adapters/adapter_ncisficcom.py +++ b/fanficdownloader/adapters/adapter_ncisficcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class NCISFicComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only storyid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId')) @@ -80,7 +81,7 @@ class NCISFicComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -184,7 +185,7 @@ class NCISFicComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_ncisfictioncom.py b/fanficdownloader/adapters/adapter_ncisfictioncom.py index 6099bada..fde27352 100644 --- a/fanficdownloader/adapters/adapter_ncisfictioncom.py +++ b/fanficdownloader/adapters/adapter_ncisfictioncom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class NCISFictionComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL("http://"+self.getSiteDomain()\ @@ -78,7 +79,7 @@ class NCISFictionComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -187,7 +188,7 @@ class NCISFictionComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_nfacommunitycom.py b/fanficdownloader/adapters/adapter_nfacommunitycom.py index 1c497740..8c4a275f 100644 --- a/fanficdownloader/adapters/adapter_nfacommunitycom.py +++ b/fanficdownloader/adapters/adapter_nfacommunitycom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -51,7 +52,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. @@ -99,7 +100,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -132,7 +133,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -273,7 +274,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_nhamagicalworldsus.py b/fanficdownloader/adapters/adapter_nhamagicalworldsus.py index 6f4aea46..8eea2201 100644 --- a/fanficdownloader/adapters/adapter_nhamagicalworldsus.py +++ b/fanficdownloader/adapters/adapter_nhamagicalworldsus.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. @@ -74,7 +75,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter): ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -197,7 +198,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) diff --git a/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py b/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py index ea1119f1..99d4cfd8 100644 --- a/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. @@ -94,13 +95,13 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Logout" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -112,7 +113,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -245,7 +246,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) data = data.replace('
') diff --git a/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py b/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py index cc57de5a..736f992e 100644 --- a/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py +++ b/fanficdownloader/adapters/adapter_onedirectionfanfictioncom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -97,13 +98,13 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -125,7 +126,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -253,7 +254,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_phoenixsongnet.py b/fanficdownloader/adapters/adapter_phoenixsongnet.py index 00f1a5de..cd1cf012 100644 --- a/fanficdownloader/adapters/adapter_phoenixsongnet.py +++ b/fanficdownloader/adapters/adapter_phoenixsongnet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2, urllib, cookielib @@ -47,7 +48,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/') @@ -90,12 +91,12 @@ class PhoenixSongNetAdapter(BaseSiteAdapter): params['login'] = 'Login' loginUrl = 'http://' + self.getSiteDomain() + '/users/processlogin.php' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['txtusername'])) d = self._fetchUrl(loginUrl, params) if 'Please login to continue.' in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['txtusername'])) raise exceptions.FailedToLogin(url,params['txtusername']) return False @@ -108,7 +109,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -208,7 +209,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py b/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py index c40fb2ba..ee54a835 100644 --- a/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py +++ b/fanficdownloader/adapters/adapter_ponyfictionarchivenet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,8 +48,8 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) - logging.info(self.parsedUrl.netloc) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.info(self.parsedUrl.netloc) # normalized story URL. if "explicit" in self.parsedUrl.netloc: self._setURL('http://explicit.' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -91,7 +92,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -112,7 +113,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -234,7 +235,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_portkeyorg.py b/fanficdownloader/adapters/adapter_portkeyorg.py index b14a101b..75a5b8e2 100644 --- a/fanficdownloader/adapters/adapter_portkeyorg.py +++ b/fanficdownloader/adapters/adapter_portkeyorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 import cookielib as cl @@ -54,7 +55,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/story/'+self.story.getMetadata('storyId')) @@ -81,7 +82,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX def extractChapterUrlsAndMetadata(self): url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) # portkey screws around with using a different URL to set the # cookie and it's a pain. So... cheat! @@ -247,7 +248,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) soup = bs.BeautifulStoneSoup(data, selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py index a8dd85ea..377fa36e 100644 --- a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py +++ b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib import urllib2 @@ -40,7 +41,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -63,7 +64,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): def extractChapterUrlsAndMetadata(self): url = self.url+'&index=1' - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -191,7 +192,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_pretendercentrecom.py b/fanficdownloader/adapters/adapter_pretendercentrecom.py index 022efa1a..c5c8a5d2 100644 --- a/fanficdownloader/adapters/adapter_pretendercentrecom.py +++ b/fanficdownloader/adapters/adapter_pretendercentrecom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/missingpieces/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -89,7 +90,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -109,7 +110,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -237,7 +238,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_prisonbreakficnet.py b/fanficdownloader/adapters/adapter_prisonbreakficnet.py index 0c4f066e..e125108f 100644 --- a/fanficdownloader/adapters/adapter_prisonbreakficnet.py +++ b/fanficdownloader/adapters/adapter_prisonbreakficnet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -76,7 +77,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1' - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -202,7 +203,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_qafficcom.py b/fanficdownloader/adapters/adapter_qafficcom.py new file mode 100644 index 00000000..d6b55271 --- /dev/null +++ b/fanficdownloader/adapters/adapter_qafficcom.py @@ -0,0 +1,262 @@ +# -*- coding: utf-8 -*- + +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +logger = logging.getLogger(__name__) +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return QafFicComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class QafFicComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/atp/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','atp') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.qaf-fic.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/atp/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/atp/viewstory.php?sid=")+r"\d+$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&warning=NC-17" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+addurl + logger.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\s+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+addurl + logger.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title and author + a = soup.find('div', {'id' : 'pagetitle'}) + + aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',aut['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/atp/'+aut['href']) + self.story.setMetadata('author',aut.string) + aut.extract() + + self.story.setMetadata('title',a.string[:(len(a.string)-3)]) + + # Find the chapters: + chapters=soup.find('select') + if chapters != None: + for chapter in chapters.findAll('option'): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/atp/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + for list in asoup.findAll('div', {'class' : re.compile('listbox\s+')}): + a = list.find('a') + if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']: + break + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
etc + labels = list.findAll('span', {'class' : 'classification'}) + for labelspan in labels: + label = labelspan.string + value = labelspan.nextSibling + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'classification' and value != None: + if "Featured Stories" not in value: + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value[:len(value)-2]) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + for char in value.string.split(', '): + if not 'None' in char: + self.story.addToList('characters',char) + + if 'Genre' in label: + for genre in value.string.split(', '): + if not 'None' in genre: + self.story.addToList('genre',genre) + + if 'Warnings' in label: + for warning in value.string.split(', '): + if not 'None' in warning: + self.story.addToList('warnings',warning) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' ::')[0]), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + if list.find('a', href=re.compile(r"series.php")) != None: + for series in asoup.findAll('a', href=re.compile(r"series.php\?seriesid=\d+")): + # Find Series name from series URL. + series_url = 'http://'+self.host+'/atp/'+series['href'] + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + name=seriessoup.find('div', {'id' : 'pagetitle'}) + name.find('a').extract() + self.setSeries(name.text.split(' by[')[0], i) + i=0 + break + i+=1 + if i == 0: + break + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logger.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_samdeanarchivenu.py b/fanficdownloader/adapters/adapter_samdeanarchivenu.py index c79645ae..55dcf1cf 100644 --- a/fanficdownloader/adapters/adapter_samdeanarchivenu.py +++ b/fanficdownloader/adapters/adapter_samdeanarchivenu.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -80,7 +81,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1' - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -216,7 +217,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py b/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py index 9d415bb9..fc04e89b 100644 --- a/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py +++ b/fanficdownloader/adapters/adapter_scarvesandcoffeenet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -94,7 +95,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -114,7 +115,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -231,7 +232,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_sg1heliopoliscom.py b/fanficdownloader/adapters/adapter_sg1heliopoliscom.py index b067ec6d..6852eb81 100644 --- a/fanficdownloader/adapters/adapter_sg1heliopoliscom.py +++ b/fanficdownloader/adapters/adapter_sg1heliopoliscom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,11 +48,11 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) - self.story.setMetadata('section',self.parsedUrl.path.split('/',)[1]) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + self.section=self.parsedUrl.path.split('/',)[1] # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/'+self.story.getMetadata('section')+'/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('http://' + self.getSiteDomain() + '/'+self.section+'/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','sghp') @@ -59,7 +60,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter): # If all stories from the site fall into the same category, # the site itself isn't likely to label them as such, so we # do. Can't use extracategories, could be Atlantis or SG-1 - if 'atlantis' in self.story.getMetadata('section'): + if 'atlantis' in self.section: self.story.addToList("category","Stargate: Atlantis") else: self.story.addToList("category","Stargate: SG-1") @@ -95,7 +96,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -115,7 +116,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -149,7 +150,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter): # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.section+'/'+chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -220,7 +221,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter): # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+a['href'] + series_url = 'http://'+self.host+'/'+self.section+'/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) @@ -242,7 +243,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_sinfuldesireorg.py b/fanficdownloader/adapters/adapter_sinfuldesireorg.py index d4874082..372196c2 100644 --- a/fanficdownloader/adapters/adapter_sinfuldesireorg.py +++ b/fanficdownloader/adapters/adapter_sinfuldesireorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -85,7 +86,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -105,7 +106,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -235,7 +236,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_siyecouk.py b/fanficdownloader/adapters/adapter_siyecouk.py index dc95559b..5f9af641 100644 --- a/fanficdownloader/adapters/adapter_siyecouk.py +++ b/fanficdownloader/adapters/adapter_siyecouk.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -48,7 +49,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/siye/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -82,7 +83,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX # sites skip that for one-chapter stories. # Except it doesn't this time. :-/ url = self.url #+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -224,7 +225,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) # soup = bs.BeautifulSoup(self._fetchUrl(url)) # BeautifulSoup objects to

inside , which diff --git a/fanficdownloader/adapters/adapter_squidgeorgpeja.py b/fanficdownloader/adapters/adapter_squidgeorgpeja.py index 07d86989..6416d5f9 100644 --- a/fanficdownloader/adapters/adapter_squidgeorgpeja.py +++ b/fanficdownloader/adapters/adapter_squidgeorgpeja.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -58,7 +59,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/peja/cgi-bin/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -90,7 +91,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter): def extractChapterUrlsAndMetadata(self): url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -223,7 +224,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_stargateatlantisorg.py b/fanficdownloader/adapters/adapter_stargateatlantisorg.py index 5cc156c5..f8c52bad 100644 --- a/fanficdownloader/adapters/adapter_stargateatlantisorg.py +++ b/fanficdownloader/adapters/adapter_stargateatlantisorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -76,7 +77,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1' - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -213,7 +214,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_storiesofardacom.py b/fanficdownloader/adapters/adapter_storiesofardacom.py index 7bb999b4..ff44d84e 100644 --- a/fanficdownloader/adapters/adapter_storiesofardacom.py +++ b/fanficdownloader/adapters/adapter_storiesofardacom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/chapterlistview.asp?SID='+self.story.getMetadata('storyId')) @@ -76,7 +77,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -133,7 +134,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_svufictioncom.py b/fanficdownloader/adapters/adapter_svufictioncom.py index 387188ab..2efd863a 100644 --- a/fanficdownloader/adapters/adapter_svufictioncom.py +++ b/fanficdownloader/adapters/adapter_svufictioncom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class SVUFictionComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -92,13 +93,13 @@ class SVUFictionComAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -120,7 +121,7 @@ class SVUFictionComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -145,7 +146,7 @@ class SVUFictionComAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -258,7 +259,7 @@ class SVUFictionComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py index 6203aa95..b3f7a078 100644 --- a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py +++ b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib import urllib2 @@ -43,7 +44,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -81,13 +82,13 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -102,7 +103,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): addurl="" url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -116,7 +117,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): # need to log in for this one. addurl = "&ageconsent=ok&warning=4" url = self.url+'&index=1'+addurl - logging.debug("Changing URL: "+url) + logger.debug("Changing URL: "+url) self.performLogin(url) data = self._fetchUrl(url) @@ -229,7 +230,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 222ffb5f..3fdc0bec 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -18,6 +18,7 @@ import datetime import time import logging +logger = logging.getLogger(__name__) from .. import BeautifulSoup as bs from .. import exceptions @@ -48,7 +49,7 @@ class TestSiteAdapter(BaseSiteAdapter): def extractChapterUrlsAndMetadata(self): if self.story.getMetadata('storyId') == '665' and not (self.is_adult or self.getConfig("is_adult")): - logging.warn("self.is_adult:%s"%self.is_adult) + logger.warn("self.is_adult:%s"%self.is_adult) raise exceptions.AdultCheckRequired(self.url) if self.story.getMetadata('storyId') == '666': @@ -128,7 +129,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" self.story.addToList('genre','Fantasy') self.story.addToList('genre','Comedy') - self.story.addToList('genre','SF') + self.story.addToList('genre','Sci-Fi') self.story.addToList('genre','Noir') self.story.addToList('characters','Bob Smith') @@ -184,7 +185,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) if self.story.getMetadata('storyId') == '667': raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url) diff --git a/fanficdownloader/adapters/adapter_thealphagatecom.py b/fanficdownloader/adapters/adapter_thealphagatecom.py index 98ec91d4..ff6ee8a8 100644 --- a/fanficdownloader/adapters/adapter_thealphagatecom.py +++ b/fanficdownloader/adapters/adapter_thealphagatecom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -76,7 +77,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1' - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -198,7 +199,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_thehexfilesnet.py b/fanficdownloader/adapters/adapter_thehexfilesnet.py index bc515cb7..c191df74 100644 --- a/fanficdownloader/adapters/adapter_thehexfilesnet.py +++ b/fanficdownloader/adapters/adapter_thehexfilesnet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -80,7 +81,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -182,7 +183,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr','img')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_thehookupzonenet.py b/fanficdownloader/adapters/adapter_thehookupzonenet.py index 93f4b852..a43ecb1e 100644 --- a/fanficdownloader/adapters/adapter_thehookupzonenet.py +++ b/fanficdownloader/adapters/adapter_thehookupzonenet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -71,7 +72,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. @@ -117,13 +118,13 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/CriminalMinds/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -145,7 +146,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -292,7 +293,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_themasquenet.py b/fanficdownloader/adapters/adapter_themasquenet.py index cc99f879..f9690103 100644 --- a/fanficdownloader/adapters/adapter_themasquenet.py +++ b/fanficdownloader/adapters/adapter_themasquenet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,20 +48,20 @@ class TheMasqueNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) if self.parsedUrl.path.split('/',)[1] == 'wiktt': self.story.addToList("category","Harry Potter") - self.story.setMetadata('section','/wiktt/efiction/') + self.section='/wiktt/efiction/' self.dateformat = "%m/%d/%Y" else: self.story.addToList("category","Originals") - self.story.setMetadata('section','/efiction/') + self.section='/efiction/' self.dateformat = "%b %d, %Y" # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + self.story.getMetadata('section') + 'viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('http://' + self.getSiteDomain() + self.section + 'viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','msq') @@ -98,14 +99,14 @@ class TheMasqueNetAdapter(BaseSiteAdapter): params['cookiecheck'] = '1' params['submit'] = 'Submit' - loginUrl = 'http://' + self.getSiteDomain() + self.story.getMetadata('section') + 'user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + loginUrl = 'http://' + self.getSiteDomain() + self.section + 'user.php?action=login' + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -127,7 +128,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -152,7 +153,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -186,7 +187,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter): # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host + self.story.getMetadata('section') + chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host + self.section + chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -258,7 +259,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py b/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py index 11c53cdc..7a7a2c17 100644 --- a/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py +++ b/fanficdownloader/adapters/adapter_thepetulantpoetesscom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId') +'&i=1') @@ -91,13 +92,13 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "My Account Page" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -110,7 +111,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -223,7 +224,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_thequidditchpitchorg.py b/fanficdownloader/adapters/adapter_thequidditchpitchorg.py index 2c3533d9..fc5d48e9 100644 --- a/fanficdownloader/adapters/adapter_thequidditchpitchorg.py +++ b/fanficdownloader/adapters/adapter_thequidditchpitchorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -51,7 +52,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. # XXX Most sites don't have the part. Replace all to remove it usually. @@ -101,13 +102,13 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -129,7 +130,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -272,7 +273,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), diff --git a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py index c5542050..8ed953d7 100644 --- a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py +++ b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib import urllib2 @@ -43,7 +44,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/library/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -81,13 +82,13 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/library/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -102,7 +103,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): addurl="" url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -235,7 +236,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) # problems with some stories, but only in calibre. I suspect diff --git a/fanficdownloader/adapters/adapter_tthfanficorg.py b/fanficdownloader/adapters/adapter_tthfanficorg.py index 8c3047e6..8f40568a 100644 --- a/fanficdownloader/adapters/adapter_tthfanficorg.py +++ b/fanficdownloader/adapters/adapter_tthfanficorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 import time @@ -40,7 +41,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): m = re.match(self.getSiteURLPattern(),url) if m: self.story.setMetadata('storyId',m.group('id')) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL("http://"+self.getSiteDomain()\ +"/Story-"+self.story.getMetadata('storyId')) @@ -81,7 +82,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): return loginUrl = 'http://' + self.getSiteDomain() + '/login.php' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['urealname'])) ## need to pull empty login page first to get ctkn and @@ -98,7 +99,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): d = self._fetchUrl(loginUrl, params) if "Stories Published" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -110,7 +111,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): # metadata and chapter list url=self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) # tth won't send you future updates if you aren't 'caught up' # on the story. Login isn't required for F21, but logging in will @@ -137,7 +138,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): form = soup.find('form', {'id':'sitemaxratingform'}) params={'ctkn':form.find('input', {'name':'ctkn'})['value'], 'sitemaxrating':'5'} - logging.info("Attempting to get rating cookie for %s" % url) + logger.info("Attempting to get rating cookie for %s" % url) data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params) # refetch story page. data = self._fetchUrl(url) @@ -158,7 +159,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): try: # going to pull part of the meta data from author list page. infourl = 'http://'+self.host+ainfo['href'] - logging.debug("**StoryInfo** URL: "+infourl) + logger.debug("**StoryInfo** URL: "+infourl) infodata = self._fetchUrl(infourl) infosoup = bs.BeautifulSoup(infodata) @@ -175,14 +176,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): try: # going to pull part of the meta data from *primary* author list page. - logging.debug("**AUTHOR** URL: "+authorurl) + logger.debug("**AUTHOR** URL: "+authorurl) authordata = self._fetchUrl(authorurl) descurl=authorurl authorsoup = bs.BeautifulSoup(authordata) # author can have several pages, scan until we find it. while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ): nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href'] - logging.debug("**AUTHOR** nextpage URL: "+nextpage) + logger.debug("**AUTHOR** nextpage URL: "+nextpage) authordata = self._fetchUrl(nextpage) descurl=nextpage authorsoup = bs.BeautifulSoup(authordata) @@ -259,7 +260,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url)) div = soup.find('div', {'id' : 'storyinnerbody'}) diff --git a/fanficdownloader/adapters/adapter_twilightarchivescom.py b/fanficdownloader/adapters/adapter_twilightarchivescom.py index f9003260..3f2eef7a 100644 --- a/fanficdownloader/adapters/adapter_twilightarchivescom.py +++ b/fanficdownloader/adapters/adapter_twilightarchivescom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. http://www.twilightarchives.com/read/9353 self._setURL('http://' + self.getSiteDomain() + '/read/'+self.story.getMetadata('storyId')) @@ -77,7 +78,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -172,7 +173,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index 4e1e2dd1..8f46aa33 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib import urllib2 @@ -42,7 +43,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -83,13 +84,13 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -99,7 +100,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): def extractChapterUrlsAndMetadata(self): url = self.url+'&index=1' - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -225,7 +226,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) # problems with some stories, but only in calibre. I suspect diff --git a/fanficdownloader/adapters/adapter_twiwritenet.py b/fanficdownloader/adapters/adapter_twiwritenet.py index 415b7f5d..fd456ccd 100644 --- a/fanficdownloader/adapters/adapter_twiwritenet.py +++ b/fanficdownloader/adapters/adapter_twiwritenet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib import urllib2 @@ -43,7 +44,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -84,13 +85,13 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.info("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.info("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -109,7 +110,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter): addurl="" url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -252,7 +253,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) data = self._fetchUrl(url) # problems with some stories, but only in calibre. I suspect diff --git a/fanficdownloader/adapters/adapter_walkingtheplankorg.py b/fanficdownloader/adapters/adapter_walkingtheplankorg.py index e3885387..53357da9 100644 --- a/fanficdownloader/adapters/adapter_walkingtheplankorg.py +++ b/fanficdownloader/adapters/adapter_walkingtheplankorg.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -45,7 +46,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -84,7 +85,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -215,7 +216,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_whoficcom.py b/fanficdownloader/adapters/adapter_whoficcom.py index cc22267e..6f396f7b 100644 --- a/fanficdownloader/adapters/adapter_whoficcom.py +++ b/fanficdownloader/adapters/adapter_whoficcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -56,7 +57,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter): # - get chapter list, if not one-shot. url = self.url+'&chapter=1' - logging.debug("URL: "+url) + logger.debug("URL: "+url) # use BeautifulSoup HTML parser to make everything easier to find. try: @@ -69,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter): # pull title(title) and author from the HTML title. title = soup.find('title').string - logging.debug('Title: %s' % title) + logger.debug('Title: %s' % title) title = title.split('::')[1].strip() self.story.setMetadata('title',title.split(' by ')[0].strip()) self.story.setMetadata('author',title.split(' by ')[1].strip()) @@ -109,7 +110,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter): # Published: 2010.08.15 - Updated: 2010.08.16 - Chapters: 4 - Completed: Yes - Word Count: 4890 # - logging.debug("Author URL: "+self.story.getMetadata('authorUrl')) + logger.debug("Author URL: "+self.story.getMetadata('authorUrl')) soup = bs.BeautifulStoneSoup(self._fetchUrl(self.story.getMetadata('authorUrl')), selfClosingTags=('br')) # normalize
tags to
# find this story in the list, parse it's metadata based on @@ -212,7 +213,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_wizardtalesnet.py b/fanficdownloader/adapters/adapter_wizardtalesnet.py index 419dc548..b00df831 100644 --- a/fanficdownloader/adapters/adapter_wizardtalesnet.py +++ b/fanficdownloader/adapters/adapter_wizardtalesnet.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -92,13 +93,13 @@ class WizardTalesNetAdapter(BaseSiteAdapter): params['submit'] = 'Submit' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' - logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account - logging.info("Failed to login to URL %s as %s" % (loginUrl, + logger.info("Failed to login to URL %s as %s" % (loginUrl, params['penname'])) raise exceptions.FailedToLogin(url,params['penname']) return False @@ -120,7 +121,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -145,7 +146,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter): # correct stupid & error in url. addurl = addurl.replace("&","&") url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -286,7 +287,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_wolverineandroguecom.py b/fanficdownloader/adapters/adapter_wolverineandroguecom.py index 37b09a73..24a1bd03 100644 --- a/fanficdownloader/adapters/adapter_wolverineandroguecom.py +++ b/fanficdownloader/adapters/adapter_wolverineandroguecom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -77,7 +78,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1' - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -203,7 +204,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/adapter_wraithbaitcom.py b/fanficdownloader/adapters/adapter_wraithbaitcom.py index 9f077a8a..f827d7e0 100644 --- a/fanficdownloader/adapters/adapter_wraithbaitcom.py +++ b/fanficdownloader/adapters/adapter_wraithbaitcom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -48,7 +49,7 @@ class WraithBaitComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -86,7 +87,7 @@ class WraithBaitComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -209,7 +210,7 @@ class WraithBaitComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url)) diff --git a/fanficdownloader/adapters/adapter_yourfanfictioncom.py b/fanficdownloader/adapters/adapter_yourfanfictioncom.py index 44864e16..f11e7a3f 100644 --- a/fanficdownloader/adapters/adapter_yourfanfictioncom.py +++ b/fanficdownloader/adapters/adapter_yourfanfictioncom.py @@ -17,6 +17,7 @@ import time import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -54,7 +55,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -92,7 +93,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter): # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -126,7 +127,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter): # explicitly put ageconsent because google appengine regexp doesn't include it for some reason. addurl = addurl.replace("&","&")+'&ageconsent=ok' url = self.url+'&index=1'+addurl - logging.debug("URL 2nd try: "+url) + logger.debug("URL 2nd try: "+url) try: data = self._fetchUrl(url) @@ -147,7 +148,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter): # while len(loopdata) > 0: # if len(loopdata) < 5000: # chklen = len(loopdata) - # logging.info("loopdata: %s" % loopdata[:chklen]) + # logger.info("loopdata: %s" % loopdata[:chklen]) # loopdata = loopdata[chklen:] # use BeautifulSoup HTML parser to make everything easier to find. @@ -270,7 +271,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index 7ca9686f..3cfc7ef4 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -27,6 +27,8 @@ from functools import partial from .. import BeautifulSoup as bs from ..htmlcleanup import stripHTML +logger = logging.getLogger(__name__) + try: from google.appengine.api import apiproxy_stub_map def urlfetch_timeout_hook(service, call, request, response): @@ -38,10 +40,10 @@ try: apiproxy_stub_map.apiproxy.GetPreCallHooks().Append( 'urlfetch_timeout_hook', urlfetch_timeout_hook, 'urlfetch') - logging.info("Hook to make default deadline 10.0 installed.") + logger.info("Hook to make default deadline 10.0 installed.") except: pass - #logging.info("Hook to make default deadline 10.0 NOT installed--not using appengine") + #logger.info("Hook to make default deadline 10.0 NOT installed--not using appengine") from ..story import Story from ..gziphttp import GZipProcessor @@ -125,7 +127,7 @@ class BaseSiteAdapter(Configurable): #print code if code == "auto": if not chardet: - logging.info("chardet not available, skipping 'auto' encoding") + logger.info("chardet not available, skipping 'auto' encoding") continue detected = chardet.detect(data) #print detected @@ -133,12 +135,11 @@ class BaseSiteAdapter(Configurable): code=detected['encoding'] else: continue - logging.debug("try code:"+code) return data.decode(code) except: - logging.debug("code failed:"+code) + logger.debug("code failed:"+code) pass - logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode) + logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode) return "".join([x for x in data if ord(x) < 128]) # Assumes application/x-www-form-urlencoded. parameters, headers are dict()s @@ -175,10 +176,10 @@ class BaseSiteAdapter(Configurable): return self._decode(self._fetchUrlRaw(url,parameters)) except Exception, e: excpt=e - logging.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e))) + logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e))) - logging.error("Giving up on %s" %url) - logging.exception(excpt) + logger.error("Giving up on %s" %url) + logger.exception(excpt) raise(excpt) # Limit chapters to download. Input starts at 1, list starts at 0 @@ -304,7 +305,7 @@ class BaseSiteAdapter(Configurable): if not fetch: fetch=self._fetchUrlRaw - acceptable_attributes = ['href','name'] + acceptable_attributes = ['href','name','class','id'] #print("include_images:"+self.getConfig('include_images')) if self.getConfig('include_images'): acceptable_attributes.extend(('src','alt','longdesc')) @@ -356,7 +357,6 @@ class BaseSiteAdapter(Configurable): def cachedfetch(realfetch,cache,url): if url in cache: - print("cache hit") return cache[url] else: return realfetch(url) diff --git a/fanficdownloader/configurable.py b/fanficdownloader/configurable.py index a909ec68..24296c4d 100644 --- a/fanficdownloader/configurable.py +++ b/fanficdownloader/configurable.py @@ -134,7 +134,7 @@ class Configurable(object): return self.configuration.hasConfig(key) def getConfig(self, key, default=""): - return self.configuration.getConfig(key) + return self.configuration.getConfig(key,default) def getConfigList(self, key): return self.configuration.getConfigList(key) diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index 733d8358..f176d509 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -20,6 +20,8 @@ import urlparse import string from math import floor from functools import partial +import logging +import urlparse as up import exceptions from htmlcleanup import conditionalRemoveEntities, removeAllEntities @@ -52,7 +54,7 @@ try: if export: return (img.export('JPG'),'jpg','image/jpeg') else: - print("image used unchanged") + logging.debug("image used unchanged") return (data,'jpg','image/jpeg') except: @@ -88,23 +90,34 @@ except: img.save(outsio,'JPEG') return (outsio.getvalue(),'jpg','image/jpeg') else: - print("image used unchanged") + logging.debug("image used unchanged") return (data,'jpg','image/jpeg') except: - # No calibre or PIL, simple pass through with mimetype. - imagetypes = { - 'jpg':'image/jpeg', - 'jpeg':'image/jpeg', - 'png':'image/png', - 'gif':'image/gif', - 'svg':'image/svg+xml', - } - def convert_image(url,data,sizes,grayscale): - ext=url[url.rfind('.')+1:].lower() - return (data,ext,imagetypes[ext]) + return no_convert_image(url,data) + +imagetypes = { + 'jpg':'image/jpeg', + 'jpeg':'image/jpeg', + 'png':'image/png', + 'gif':'image/gif', + 'svg':'image/svg+xml', + } + +## also used for explicit no image processing. +def no_convert_image(url,data): + parsedUrl = up.urlparse(url) + + ext=parsedUrl.path[parsedUrl.path.rfind('.')+1:].lower() + + if ext not in imagetypes: + logging.debug("no_convert_image url:%s - no known extension"%url) + # doesn't have extension? use jpg. + ext='jpg' + + return (data,ext,imagetypes[ext]) def normalize_format_name(fmt): if fmt: @@ -240,24 +253,35 @@ class Story(Configurable): ## Three part effect only those key(s) lists. ## pattern=>replacement ## metakey,metakey=>pattern=>replacement + ## *Five* part lines. Effect only when trailing conditional key=>regexp matches + ## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp] def setReplace(self,replace): for line in replace.splitlines(): + if "&&" in line: + (line,conditional) = map( lambda x: x.strip(), line.split("&&") ) + condparts = map( lambda x: x.strip(), conditional.split("=>") ) + else: + condparts=[None,None] if "=>" in line: parts = map( lambda x: x.strip(), line.split("=>") ) if len(parts) > 2: parts[0] = map( lambda x: x.strip(), parts[0].split(",") ) - self.replacements.append(parts) + self.replacements.append(parts+condparts) else: - self.replacements.append([None]+parts) + self.replacements.append([None]+parts+condparts) def doReplacments(self,value,key): - for (keys,p,v) in self.replacements: + for (keys,regexp,replacement,condkey,condregexp) in self.replacements: if (keys == None or key in keys) \ and isinstance(value,basestring) \ - and re.search(p,value): - #pv=value - value = re.sub(p,v,value) - #print("change:%s => %s === %s => %s "%(p,v,pv,value)) + and re.search(regexp,value): + doreplace=True + if condkey: + condval = self.getMetadata(condkey) + doreplace = condval != None and re.search(condregexp,condval) + + if doreplace: + value = re.sub(regexp,replacement,value) return value def getMetadataRaw(self,key): @@ -280,7 +304,9 @@ class Story(Configurable): value = commaGroups(value) if key == "numChapters": value = commaGroups("%d"%value) - if key in ("dateCreated","datePublished","dateUpdated"): + if key in ("dateCreated"): + value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S")) + if key in ("datePublished","dateUpdated"): value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d")) if doreplacements: @@ -411,11 +437,14 @@ class Story(Configurable): title = re.sub(self.getConfig('chapter_title_strip_pattern'),"",title) self.chapters.append( (title,html) ) - def getChapters(self): + def getChapters(self,fortoc=False): "Chapters will be tuples of (title,html)" retval = [] - if self.getConfig('add_chapter_numbers') and \ - self.getConfig('chapter_title_add_pattern'): + ## only add numbers if more than one chapter. + if len(self.chapters) > 1 and \ + (self.getConfig('add_chapter_numbers') == "true" \ + or (self.getConfig('add_chapter_numbers') == "toconly" and fortoc)) \ + and self.getConfig('chapter_title_add_pattern'): for index, (title,html) in enumerate(self.chapters): retval.append( (string.Template(self.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) ) else: @@ -480,17 +509,22 @@ class Story(Configurable): prefix='ffdl' if imgurl not in self.imgurls: parsedUrl = urlparse.urlparse(imgurl) + try: - sizes = [ int(x) for x in self.getConfigList('image_max_size') ] + if self.getConfig('no_image_processing'): + (data,ext,mime) = no_convert_image(imgurl, + fetch(imgurl)) + else: + try: + sizes = [ int(x) for x in self.getConfigList('image_max_size') ] + except Exception, e: + raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e)) + (data,ext,mime) = convert_image(imgurl, + fetch(imgurl), + sizes, + self.getConfig('grayscale_images')) except Exception, e: - raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e)) - try: - (data,ext,mime) = convert_image(imgurl, - fetch(imgurl), - sizes, - self.getConfig('grayscale_images')) - except Exception, e: - print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e)) + logging.info("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e)) return "failedtoload" # explicit cover, make the first image. @@ -525,7 +559,7 @@ class Story(Configurable): ext) self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data}) - print("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data))) + logging.debug("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data))) else: newsrc = self.imgtuples[self.imgurls.index(imgurl)]['newsrc'] diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index 98328713..a2d561a0 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -18,6 +18,7 @@ import re import os.path import datetime +import string import StringIO import zipfile from zipfile import ZipFile, ZIP_DEFLATED @@ -26,6 +27,8 @@ import logging from ..configurable import Configurable from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML +logger = logging.getLogger(__name__) + class BaseStoryWriter(Configurable): @staticmethod @@ -101,6 +104,22 @@ class BaseStoryWriter(Configurable): names as Story.metadata, but ENTRY should use label and value. """ if self.getConfig("include_titlepage"): + + if self.hasConfig("titlepage_start"): + START = string.Template(self.getConfig("titlepage_start")) + + if self.hasConfig("titlepage_entry"): + ENTRY = string.Template(self.getConfig("titlepage_entry")) + + if self.hasConfig("titlepage_end"): + END = string.Template(self.getConfig("titlepage_end")) + + if self.hasConfig("titlepage_wide_entry"): + WIDE_ENTRY = string.Template(self.getConfig("titlepage_wide_entry")) + + if self.hasConfig("titlepage_no_title_entry"): + NO_TITLE_ENTRY = string.Template(self.getConfig("titlepage_no_title_entry")) + self._write(out,START.substitute(self.story.getAllMetadata())) if WIDE_ENTRY==None: @@ -120,11 +139,11 @@ class BaseStoryWriter(Configurable): if self.hasConfig(entry+"_label"): label=self.getConfig(entry+"_label") elif entry in self.titleLabels: - logging.debug("Using fallback label for %s_label"%entry) + logger.debug("Using fallback label for %s_label"%entry) label=self.titleLabels[entry] else: label="%s"%entry.title() - logging.debug("No known label for %s, fallback to '%s'"%(entry,label)) + logger.debug("No known label for %s, fallback to '%s'"%(entry,label)) # If the label for the title entry is empty, use the # 'no title' option if there is one. @@ -132,6 +151,7 @@ class BaseStoryWriter(Configurable): TEMPLATE= NO_TITLE_ENTRY self._write(out,TEMPLATE.substitute({'label':label, + 'id':entry, 'value':self.story.getMetadata(entry)})) else: self._write(out, entry) @@ -146,11 +166,22 @@ class BaseStoryWriter(Configurable): """ # Only do TOC if there's more than one chapter and it's configured. if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : + if self.hasConfig("tocpage_start"): + START = string.Template(self.getConfig("tocpage_start")) + + if self.hasConfig("tocpage_entry"): + ENTRY = string.Template(self.getConfig("tocpage_entry")) + + if self.hasConfig("tocpage_end"): + END = string.Template(self.getConfig("tocpage_end")) + self._write(out,START.substitute(self.story.getAllMetadata())) - for index, (title,html) in enumerate(self.story.getChapters()): + for index, (title,html) in enumerate(self.story.getChapters(fortoc=True)): if html: - self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)})) + self._write(out,ENTRY.substitute({'chapter':title, + 'number':index+1, + 'index':"%04d"%(index+1)})) self._write(out,END.substitute(self.story.getAllMetadata())) @@ -161,6 +192,8 @@ class BaseStoryWriter(Configurable): if outfilename == None: outfilename=self.getOutputFileName() + self.outfilename = outfilename + # minor cheat, tucking css into metadata. if self.getConfig("output_css"): self.story.setMetadata("output_css", @@ -171,11 +204,11 @@ class BaseStoryWriter(Configurable): if not outstream: close=True - logging.info("Save directly to file: %s" % outfilename) + logger.info("Save directly to file: %s" % outfilename) if self.getConfig('make_directories'): path="" - dirs = os.path.dirname(outfilename).split('/') - for dir in dirs: + outputdirs = os.path.dirname(outfilename).split('/') + for dir in outputdirs: path+=dir+"/" if not os.path.exists(path): os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2? @@ -198,7 +231,7 @@ class BaseStoryWriter(Configurable): outstream = open(outfilename,"wb") else: close=False - logging.debug("Save to stream") + logger.debug("Save to stream") if not metaonly: self.story = self.adapter.getStory() # get full story now, @@ -209,14 +242,14 @@ class BaseStoryWriter(Configurable): # fetch once. if self.getConfig('zip_output'): out = StringIO.StringIO() + self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED) self.writeStoryImpl(out) - zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED) - zipout.writestr(self.getBaseFileName(),out.getvalue()) + self.zipout.writestr(self.getBaseFileName(),out.getvalue()) # declares all the files created by Windows. otherwise, when # it runs in appengine, windows unzips the files as 000 perms. - for zf in zipout.filelist: + for zf in self.zipout.filelist: zf.create_system = 0 - zipout.close() + self.zipout.close() out.close() else: self.writeStoryImpl(outstream) @@ -224,6 +257,27 @@ class BaseStoryWriter(Configurable): if close: outstream.close() + def writeFile(self, filename, data): + logger.debug("writeFile:%s"%filename) + + if self.getConfig('zip_output'): + outputdirs = os.path.dirname(self.getBaseFileName()) + if outputdirs: + filename=outputdirs+'/'+filename + self.zipout.writestr(filename,data) + else: + outputdirs = os.path.dirname(self.outfilename) + if outputdirs: + filename=outputdirs+'/'+filename + + dir = os.path.dirname(filename) + if not os.path.exists(dir): + os.mkdir(dir) ## os.makedirs() doesn't work in 2.5.2? + + outstream = open(filename,"wb") + outstream.write(data) + outstream.close() + def writeStoryImpl(self, out): "Must be overriden by sub classes." pass diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index 4f92643f..c3015e2e 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -29,6 +29,8 @@ from xml.dom.minidom import parse, parseString, getDOMImplementation from base_writer import * from ..htmlcleanup import stripHTML +logger = logging.getLogger(__name__) + class EpubWriter(BaseStoryWriter): @staticmethod @@ -151,8 +153,16 @@ ${value}

Update Log

''') + self.EPUB_LOG_UPDATE_START = string.Template(''' +

+''') + self.EPUB_LOG_ENTRY = string.Template(''' ${label}: ${value} +''') + + self.EPUB_LOG_UPDATE_END = string.Template(''' +


''') self.EPUB_LOG_PAGE_END = string.Template(''' @@ -160,30 +170,50 @@ ${value}
''') + self.EPUB_LOG_PAGE_END = string.Template(''' + + +''') + + self.EPUB_COVER = string.Template(''' +Cover
+cover +
+''') + def writeLogPage(self, out): """ - XXX - - Write the log page, but only include entries that there's - metadata for. START, ENTRY and END are expected to already by + metadata for. START, ENTRY and END are expected to already be string.Template(). START and END are expected to use the same names as Story.metadata, but ENTRY should use id, label and value. """ - if self.getConfig("include_logpage"): + if self.hasConfig("logpage_start"): + START = string.Template(self.getConfig("logpage_start")) + else: + START = self.EPUB_LOG_PAGE_START - # if there's a self.story.logfile, there's an existing log - # to add to. - if self.story.logfile: - print("existing logfile found, appending") - print("existing data:%s"%self._getLastLogData(self.story.logfile)) - replace_string = "" # "" - self._write(out,self.story.logfile.replace(replace_string,self._makeLogEntry(self._getLastLogData(self.story.logfile))+replace_string)) - else: - # otherwise, write a new one. - self._write(out,self.EPUB_LOG_PAGE_START.substitute(self.story.getAllMetadata())) - self._write(out,self._makeLogEntry()) - self._write(out,self.EPUB_LOG_PAGE_END.substitute(self.story.getAllMetadata())) + if self.hasConfig("logpage_end"): + END = string.Template(self.getConfig("logpage_end")) + else: + END = self.EPUB_LOG_PAGE_END + + # if there's a self.story.logfile, there's an existing log + # to add to. + if self.story.logfile: + logger.debug("existing logfile found, appending") + logger.debug("existing data:%s"%self._getLastLogData(self.story.logfile)) + replace_string = "" # "" + self._write(out,self.story.logfile.replace(replace_string,self._makeLogEntry(self._getLastLogData(self.story.logfile))+replace_string)) + else: + # otherwise, write a new one. + self._write(out,START.substitute(self.story.getAllMetadata())) + self._write(out,self._makeLogEntry()) + self._write(out,END.substitute(self.story.getAllMetadata())) # self parsing instead of Soup because it should be simple and not # worth the overhead. @@ -206,7 +236,22 @@ ${value}
return values def _makeLogEntry(self, oldvalues={}): - retval = "

" + if self.hasConfig("logpage_update_start"): + START = string.Template(self.getConfig("logpage_update_start")) + else: + START = self.EPUB_LOG_UPDATE_START + + if self.hasConfig("logpage_entry"): + ENTRY = string.Template(self.getConfig("logpage_entry")) + else: + ENTRY = self.EPUB_LOG_ENTRY + + if self.hasConfig("logpage_update_end"): + END = string.Template(self.getConfig("logpage_update_end")) + else: + END = self.EPUB_LOG_UPDATE_END + + retval = START.substitute(self.story.getAllMetadata()) for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"): if self.isValidMetaEntry(entry): @@ -215,22 +260,22 @@ ${value}
if self.hasConfig(entry+"_label"): label=self.getConfig(entry+"_label") elif entry in self.titleLabels: - logging.debug("Using fallback label for %s_label"%entry) + logger.debug("Using fallback label for %s_label"%entry) label=self.titleLabels[entry] else: label="%s"%entry.title() - logging.debug("No known label for %s, fallback to '%s'"%(entry,label)) + logger.debug("No known label for %s, fallback to '%s'"%(entry,label)) - retval = retval + self.EPUB_LOG_ENTRY.substitute({'id':entry, - 'label':label, - 'value':val}) + retval = retval + ENTRY.substitute({'id':entry, + 'label':label, + 'value':val}) else: # could be useful for introducing extra text, but # mostly it makes it easy to tell when you get the # keyword wrong. retval = retval + entry - retval = retval + "


" + retval = retval + END.substitute(self.story.getAllMetadata()) if self.getConfig('replace_hr'): retval = retval.replace("
","
* * *
") @@ -368,9 +413,9 @@ ${value}
guide = None coverIO = None - imgid = "image0000" + coverimgid = "image0000" if not self.story.cover and self.story.oldcover: - print("writer_epub: no new cover, has old cover, write image.") + logger.debug("writer_epub: no new cover, has old cover, write image.") (oldcoverhtmlhref, oldcoverhtmltype, oldcoverhtmldata, @@ -380,8 +425,8 @@ ${value}
outputepub.writestr(oldcoverhtmlhref,oldcoverhtmldata) outputepub.writestr(oldcoverimghref,oldcoverimgdata) - imgid = "image0" - items.append((imgid, + coverimgid = "image0" + items.append((coverimgid, oldcoverimghref, oldcoverimgtype, None)) @@ -406,6 +451,10 @@ ${value}
imgmap['mime'], None)) imgcount+=1 + if 'cover' in imgfile: + # make sure coverimgid is set to the cover, not + # just the first image. + coverimgid = items[-1][0] items.append(("style","OEBPS/stylesheet.css","text/css",None)) @@ -417,7 +466,7 @@ ${value}
itemrefs.append("cover") # # - metadata.appendChild(newTag(contentdom,"meta",{"content":"image0000", + metadata.appendChild(newTag(contentdom,"meta",{"content":coverimgid, "name":"cover"})) # cover stuff for later: # at end of : @@ -429,16 +478,12 @@ ${value}
"title":"Cover", "href":"OEBPS/cover.xhtml"})) + if self.hasConfig("cover_content"): + COVER = string.Template(self.getConfig("cover_content")) + else: + COVER = self.EPUB_COVER coverIO = StringIO.StringIO() - coverIO.write(''' -Cover
-cover -
-'''%self.story.cover) + coverIO.write(COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items()))) if self.getConfig("include_titlepage"): items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page")) @@ -447,11 +492,15 @@ div { margin: 0pt; padding: 0pt; } items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents")) itemrefs.append("toc_page") - if self.getConfig("include_logpage"): + dologpage = ( self.getConfig("include_logpage") == "smart" and \ + (self.story.logfile or self.story.getMetadataRaw("status") == "In-Progress") ) \ + or self.getConfig("include_logpage") == "true" + + if dologpage: items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log")) itemrefs.append("log_page") - for index, (title,html) in enumerate(self.story.getChapters()): + for index, (title,html) in enumerate(self.story.getChapters(fortoc=True)): if html: i=index+1 items.append(("file%04d"%i, @@ -483,8 +532,8 @@ div { margin: 0pt; padding: 0pt; } contentxml = contentdom.toxml(encoding='utf-8') # tweak for brain damaged Nook STR. Nook insists on name before content. - contentxml = contentxml.replace(''%imgid, - ''%imgid) + contentxml = contentxml.replace(''%coverimgid, + ''%coverimgid) outputepub.writestr("content.opf",contentxml) contentdom.unlink() @@ -582,17 +631,28 @@ div { margin: 0pt; padding: 0pt; } outputepub.writestr("OEBPS/toc_page.xhtml",tocpageIO.getvalue()) tocpageIO.close() - # write log page. - logpageIO = StringIO.StringIO() - self.writeLogPage(logpageIO) - if logpageIO.getvalue(): # will be false if no log page. + if dologpage: + # write log page. + logpageIO = StringIO.StringIO() + self.writeLogPage(logpageIO) outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue()) - logpageIO.close() + logpageIO.close() + + if self.hasConfig('chapter_start'): + CHAPTER_START = string.Template(self.getConfig("chapter_start")) + else: + CHAPTER_START = self.EPUB_CHAPTER_START + + if self.hasConfig('chapter_end'): + CHAPTER_END = string.Template(self.getConfig("chapter_end")) + else: + CHAPTER_END = self.EPUB_CHAPTER_END for index, (title,html) in enumerate(self.story.getChapters()): if html: - logging.debug('Writing chapter text for: %s' % title) - fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) + logger.debug('Writing chapter text for: %s' % title) + vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1} + fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals) # ffnet(& maybe others) gives the whole chapter text # as one line. This causes problems for nook(at # least) when the chapter size starts getting big diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py index 85ba1fa5..0224dfcc 100644 --- a/fanficdownloader/writers/writer_html.py +++ b/fanficdownloader/writers/writer_html.py @@ -46,6 +46,10 @@ ${output_css}

${title} by ${authorHTML}

''') + self.HTML_COVER = string.Template(''' +cover +''') + self.HTML_TITLE_PAGE_START = string.Template(''' ''') @@ -75,6 +79,8 @@ ${output_css}

${chapter}

''') + self.HTML_CHAPTER_END = string.Template('') + self.HTML_FILE_END = string.Template(''' ''') @@ -82,8 +88,26 @@ ${output_css} def writeStoryImpl(self, out): - self._write(out,self.HTML_FILE_START.substitute(self.story.getAllMetadata())) + if self.hasConfig("cover_content"): + COVER = string.Template(self.getConfig("cover_content")) + else: + COVER = self.HTML_COVER + if self.hasConfig('file_start'): + FILE_START = string.Template(self.getConfig("file_start")) + else: + FILE_START = self.HTML_FILE_START + + if self.hasConfig('file_end'): + FILE_END = string.Template(self.getConfig("file_end")) + else: + FILE_END = self.HTML_FILE_END + + self._write(out,FILE_START.substitute(self.story.getAllMetadata())) + + if self.getConfig('include_images') and self.story.cover: + self._write(out,COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items()))) + self.writeTitlePage(out, self.HTML_TITLE_PAGE_START, self.HTML_TITLE_ENTRY, @@ -94,10 +118,27 @@ ${output_css} self.HTML_TOC_ENTRY, self.HTML_TOC_PAGE_END) + if self.hasConfig('chapter_start'): + CHAPTER_START = string.Template(self.getConfig("chapter_start")) + else: + CHAPTER_START = self.HTML_CHAPTER_START + + if self.hasConfig('chapter_end'): + CHAPTER_END = string.Template(self.getConfig("chapter_end")) + else: + CHAPTER_END = self.HTML_CHAPTER_END + for index, (title,html) in enumerate(self.story.getChapters()): if html: logging.debug('Writing chapter text for: %s' % title) - self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)})) + vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1} + self._write(out,CHAPTER_START.substitute(vals)) self._write(out,html) + self._write(out,CHAPTER_END.substitute(vals)) - self._write(out,self.HTML_FILE_END.substitute(self.story.getAllMetadata())) + self._write(out,FILE_END.substitute(self.story.getAllMetadata())) + + if self.getConfig('include_images'): + for imgmap in self.story.getImgUrls(): + self.writeFile(imgmap['newsrc'],imgmap['data']) + diff --git a/fanficdownloader/writers/writer_mobi.py b/fanficdownloader/writers/writer_mobi.py index 8a443b88..6b5d8e65 100644 --- a/fanficdownloader/writers/writer_mobi.py +++ b/fanficdownloader/writers/writer_mobi.py @@ -88,27 +88,6 @@ ${value}
self.MOBI_TABLE_TITLE_PAGE_END = string.Template('''
- - -''') - - self.MOBI_TOC_PAGE_START = string.Template(''' - - - -${title} by ${author} - - -
-

Table of Contents

-''') - - self.MOBI_TOC_ENTRY = string.Template(''' -${chapter}
-''') - - self.MOBI_TOC_PAGE_END = string.Template(''' -
''') @@ -169,10 +148,21 @@ ${value}
# files.append(tocpageIO.getvalue()) # tocpageIO.close() + if self.hasConfig('chapter_start'): + CHAPTER_START = string.Template(self.getConfig("chapter_start")) + else: + CHAPTER_START = self.MOBI_CHAPTER_START + + if self.hasConfig('chapter_end'): + CHAPTER_END = string.Template(self.getConfig("chapter_end")) + else: + CHAPTER_END = self.MOBI_CHAPTER_END + for index, (title,html) in enumerate(self.story.getChapters()): if html: logging.debug('Writing chapter text for: %s' % title) - fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) + vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1} + fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals) # ffnet(& maybe others) gives the whole chapter text # as one line. This causes problems for nook(at # least) when the chapter size starts getting big diff --git a/fanficdownloader/writers/writer_txt.py b/fanficdownloader/writers/writer_txt.py index 6b9f35b0..388d4d47 100644 --- a/fanficdownloader/writers/writer_txt.py +++ b/fanficdownloader/writers/writer_txt.py @@ -98,6 +98,7 @@ ${chapter} \t${chapter} ''') + self.TEXT_CHAPTER_END = string.Template(u'') self.TEXT_FILE_END = string.Template(u''' @@ -114,7 +115,17 @@ End file. wrapout = KludgeStringIO() - wrapout.write(self.TEXT_FILE_START.substitute(self.story.getAllMetadata())) + if self.hasConfig("file_start"): + FILE_START = string.Template(self.getConfig("file_start")) + else: + FILE_START = self.TEXT_FILE_START + + if self.hasConfig("file_end"): + FILE_END = string.Template(self.getConfig("file_end")) + else: + FILE_END = self.TEXT_FILE_END + + wrapout.write(FILE_START.substitute(self.story.getAllMetadata())) self.writeTitlePage(wrapout, self.TEXT_TITLE_PAGE_START, @@ -133,13 +144,25 @@ End file. self._write(out,self.lineends(self.wraplines(towrap))) + if self.hasConfig('chapter_start'): + CHAPTER_START = string.Template(self.getConfig("chapter_start")) + else: + CHAPTER_START = self.TEXT_CHAPTER_START + + if self.hasConfig('chapter_end'): + CHAPTER_END = string.Template(self.getConfig("chapter_end")) + else: + CHAPTER_END = self.TEXT_CHAPTER_END + for index, (title,html) in enumerate(self.story.getChapters()): if html: logging.debug('Writing chapter text for: %s' % title) - self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1}))))) + vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1} + self._write(out,self.lineends(self.wraplines(removeAllEntities(CHAPTER_START.substitute(vals))))) self._write(out,self.lineends(html2text(html,wrap_width=self.wrap_width))) + self._write(out,self.lineends(self.wraplines(removeAllEntities(CHAPTER_END.substitute(vals))))) - self._write(out,self.lineends(self.wraplines(self.TEXT_FILE_END.substitute(self.story.getAllMetadata())))) + self._write(out,self.lineends(self.wraplines(FILE_END.substitute(self.story.getAllMetadata())))) def wraplines(self, text): diff --git a/index.html b/index.html index 099473f5..1da7f746 100644 --- a/index.html +++ b/index.html @@ -54,10 +54,6 @@ much easier.

-

New Fixes

-

- New version containing some bug fixes, and a couple metadata features. -

Questions? Check out our FAQs. @@ -66,7 +62,7 @@ If you have any problems with this application, please report them in the FanFictionDownLoader Google Group. The - Previous Version is also available for you to use if necessary. + Previous Version is also available for you to use if necessary.

{{ error_message }} @@ -543,7 +539,14 @@ Use the URL of the story's chapter list, such as
http://thehookupzone.net/CriminalMinds/viewstory.php?sid=1234 +
www.qaf-fic.com
+
+ Use the URL of the story's chapter list, such as +
http://www.qaf-fic.com/atp/viewstory.php?sid=1234 +
+ +

A few additional things to know, which will make your life substantially easier:

diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 3a7f6af8..c34bf8a1 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -131,8 +131,10 @@ extratags: FanFiction ## for regexp details. ## Make sure to keep at least one space at the start of each line and ## to escape % to %%, if used. -## Two or three part lines. Two part effect everything. +## Two, three or five part lines. Two part effect everything. ## Three part effect only those key(s) lists. +## *Five* part lines. Effect only when trailing conditional key=>regexp matches +## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp] #replace_metadata: # genre,category=>Sci-Fi=>SF # Puella Magi Madoka Magica.* => Madoka @@ -140,7 +142,9 @@ extratags: FanFiction # Crossover: (.*)=>\1 # title=>(.*)Great(.*)=>\1Moderate\2 # .*-Centered=> - +# characters=>Sam W\.=>Sam Witwicky&&category=>Transformers +# characters=>Sam W\.=>Sam Winchester&&category=>Supernatural + ## Some readers don't show horizontal rule (
) tags correctly. ## This replaces them all with a centered '* * *'. (Note centering ## doesn't work on some devices either.) @@ -168,10 +172,15 @@ keep_summary_html:true ## Don't like the numbers at the start of chapter titles on some ## sites? You can use strip_chapter_numbers to strip them off. Just ## want to make them all look the same? Strip them off, then add them -## back on with add_chapter_numbers. Don't like the way it strips -## numbers or adds them back? See chapter_title_strip_pattern and -## chapter_title_add_pattern. +## back on with add_chapter_numbers:true. Only want them added back +## on for Table of Contents(toc)? Use add_chapter_numbers:toconly. +## (toconly doesn't work on mobi output.) Don't like the way it +## strips numbers or adds them back? See chapter_title_strip_pattern +## and chapter_title_add_pattern. strip_chapter_numbers:false + +## add_chapter_numbers can be true, false or toconly +## (Note number is not added when there's only one chapter.) add_chapter_numbers:false ## (Two versions of chapter_title_strip_pattern are shown below. You @@ -202,6 +211,17 @@ chapter_title_add_pattern:${index}. ${title} ## Each output format has a section that overrides [defaults] [html] +## include images from img tags in the body and summary of +## stories. Images will be converted to jpg for size if possible. +## include_images is *only* available in epub and html output formats. +## include_images is *not* available in the web service in any format. +#include_images:false + +## This switch prevents FFDL from doing any processing on the images. +## Usually they would be converted to jpg, resized and optionally made +## grayscale. +no_image_processing: true + ## output background color--only used by html and epub (and ignored in ## epub by many readers). Included below in output_css--will be ## ignored if not in output_css. @@ -242,13 +262,17 @@ windows_eol: true ## mobi generated from epub by calibre will have a TOC at the end. include_tocpage: false -## include a Update Log page before the story text. If included, the -## log will be updated each time the epub is an all the metadata +## include a Update Log page before the story text. If 'true', the +## log will be updated each time the epub is and all the metadata ## fields that have changed since the last update (typically ## dateUpdated,numChapters,numWords at a minimum) will be shown. ## Great for tracking when chapters came out and when the description, ## etc changed. include_logpage: false +## If set to 'smart', logpage will only be included if the story is +## status:In-Progress or already had a logpage. That way you don't +## end up with Completed stories that have just one logpage entry. +#include_logpage: smart ## items to include in the log page Empty metadata entries, or those ## that haven't changed since the last update, will *not* appear, even @@ -301,6 +325,7 @@ output_css: ## include images from img tags in the body and summary of ## stories. Images will be converted to jpg for size if possible. +## include_images is *only* available in epub and html output format. #include_images:false ## If set, the first image found will be made the cover image. If @@ -355,31 +380,31 @@ nook_img_fix:true ## URLs like: http://test1.com?sid=12345 [test1.com] extratags: FanFiction,Testing -extracategories:Fafner -extragenres:Romance,Fluff -extracharacters:Reginald Smythe-Smythe,Mokona,Harry P. -extraships:Smythe-Smythe/Mokona -extrawarnings:Extreme Bogosity +# extracategories:Fafner +# extragenres:Romance,Fluff +# extracharacters:Reginald Smythe-Smythe,Mokona,Harry P. +# extraships:Smythe-Smythe/Mokona +# extrawarnings:Extreme Bogosity -extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL +# extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL -include_in_compositeJ:dateCreated -include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ -include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated, +# include_in_compositeJ:dateCreated +# include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ +# include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated, -extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL -extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL -extra_subject_tags: metaA,metaB,metaC +# extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL +# extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL +# extra_subject_tags: metaA,metaB,metaC -replace_metadata: - compositeL=>Val=>VALUE - series,extratags=>Test=>Plan - Puella Magi Madoka Magica.* => Madoka - Comedy=>Humor - Crossover: (.*)=>\1 - (.*)Great(.*)=>\1Moderate\2 - .*-Centered=> - characters=>Harry P\.=>Harry Potter +# replace_metadata: +# compositeL=>Val=>VALUE +# series,extratags=>Test=>Plan +# Puella Magi Madoka Magica.* => Madoka +# Comedy=>Humor +# Crossover: (.*)=>\1 +# (.*)Great(.*)=>\1Moderate\2 +# .*-Centered=> +# characters=>Harry P\.=>Harry Potter ## If necessary, you can define [:] sections to @@ -557,9 +582,6 @@ cliches_label:Character Cliches # themes=>#bcolumn,a # timeline=>#ccolumn,n -## adds to include_subject_tags instead of replacing it. -#extra_subject_tags: themes,timeline,cliches - [erosnsappho.sycophanthex.com] ## Site dedicated to these categories/characters/ships extracategories:Harry Potter @@ -1015,6 +1037,15 @@ extracategories:Harry Potter ## Site dedicated to these categories/characters/ships extracategories:Prison Break +[www.qaf-fic.com] +## Site dedicated to these categories/characters/ships +extracategories:Queer as Folk + +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + [www.scarvesandcoffee.net] ## Site dedicated to these categories/characters/ships extracategories:Glee