mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-08 12:36:11 +02:00
Clean up on adapters
This commit is contained in:
commit
55fac6a7e3
95 changed files with 1216 additions and 575 deletions
2
app.yaml
2
app.yaml
|
|
@ -1,6 +1,6 @@
|
|||
# ffd-retief-hrd fanfictiondownloader
|
||||
application: fanfictiondownloader
|
||||
version: 4-4-27
|
||||
version: 4-4-29
|
||||
runtime: python27
|
||||
api_version: 1
|
||||
threadsafe: true
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
|
|||
description = 'UI plugin to download FanFiction stories from various sites.'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'Jim Miller'
|
||||
version = (1, 6, 11)
|
||||
version = (1, 6, 14)
|
||||
minimum_calibre_version = (0, 8, 57)
|
||||
|
||||
#: This field defines the GUI plugin class that contains all the code
|
||||
|
|
|
|||
|
|
@ -496,7 +496,10 @@ make_firstimage_cover:true
|
|||
book['author_sort'] = book['author'] = story.getList("author", removeallentities=True)
|
||||
book['publisher'] = story.getMetadata("site")
|
||||
book['tags'] = story.getSubjectTags(removeallentities=True)
|
||||
book['comments'] = sanitize_comments_html(story.getMetadata("description"))
|
||||
if story.getMetadata("description"):
|
||||
book['comments'] = sanitize_comments_html(story.getMetadata("description"))
|
||||
else:
|
||||
book['comments']=''
|
||||
book['series'] = story.getMetadata("series", removeallentities=True)
|
||||
|
||||
# adapter.opener is the element with a threadlock. But del
|
||||
|
|
@ -538,7 +541,7 @@ make_firstimage_cover:true
|
|||
print("from URL(%s)"%url)
|
||||
|
||||
# try to find by identifier url first.
|
||||
searchstr = 'identifiers:"=url:%s"'%url.replace(":","|")
|
||||
searchstr = 'identifiers:"=url:=%s"'%url.replace(":","|")
|
||||
identicalbooks = db.search_getting_ids(searchstr, None)
|
||||
if len(identicalbooks) < 1:
|
||||
# find dups
|
||||
|
|
@ -634,9 +637,12 @@ make_firstimage_cover:true
|
|||
|
||||
if book['good']: # there shouldn't be any !'good' books at this point.
|
||||
# if still 'good', make a temp file to write the output to.
|
||||
tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'],
|
||||
suffix='.'+options['fileform'],
|
||||
dir=options['tdir'])
|
||||
# For HTML format users, make the filename inside the zip something reasonable.
|
||||
# For crazy long titles/authors, limit it to 200chars.
|
||||
# For weird/OS-unsafe characters, use file safe only.
|
||||
tmp = PersistentTemporaryFile(prefix=story.formatFileName("${title}-${author}-",allowunsafefilename=False)[:100],
|
||||
suffix='.'+options['fileform'],
|
||||
dir=options['tdir'])
|
||||
print("title:"+book['title'])
|
||||
print("outfile:"+tmp.name)
|
||||
book['outfile'] = tmp.name
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ from StringIO import StringIO
|
|||
|
||||
from calibre.utils.ipc.server import Server
|
||||
from calibre.utils.ipc.job import ParallelJob
|
||||
from calibre.utils.logging import Log
|
||||
|
||||
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
|
||||
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
|
||||
|
|
@ -109,9 +108,6 @@ def do_download_for_worker(book,options):
|
|||
when run as a worker job
|
||||
'''
|
||||
try:
|
||||
# import logging
|
||||
# logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
|
||||
|
||||
book['comment'] = 'Download started...'
|
||||
|
||||
configuration = Configuration(adapters.getConfigSectionFor(book['url']),options['fileform'])
|
||||
|
|
@ -123,7 +119,7 @@ def do_download_for_worker(book,options):
|
|||
|
||||
# images only for epub, even if the user mistakenly turned it
|
||||
# on else where.
|
||||
if options['fileform'] != "epub":
|
||||
if options['fileform'] not in ("epub","html"):
|
||||
configuration.set("overrides","include_images","false")
|
||||
|
||||
adapter = adapters.getAdapter(configuration,book['url'])
|
||||
|
|
|
|||
101
defaults.ini
101
defaults.ini
|
|
@ -161,8 +161,10 @@ extratags: FanFiction
|
|||
## for regexp details.
|
||||
## Make sure to keep at least one space at the start of each line and
|
||||
## to escape % to %%, if used.
|
||||
## Two or three part lines. Two part effect everything.
|
||||
## Two, three or five part lines. Two part effect everything.
|
||||
## Three part effect only those key(s) lists.
|
||||
## *Five* part lines. Effect only when trailing conditional key=>regexp matches
|
||||
## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp]
|
||||
#replace_metadata:
|
||||
# genre,category=>Sci-Fi=>SF
|
||||
# Puella Magi Madoka Magica.* => Madoka
|
||||
|
|
@ -170,7 +172,9 @@ extratags: FanFiction
|
|||
# Crossover: (.*)=>\1
|
||||
# title=>(.*)Great(.*)=>\1Moderate\2
|
||||
# .*-Centered=>
|
||||
|
||||
# characters=>Sam W\.=>Sam Witwicky&&category=>Transformers
|
||||
# characters=>Sam W\.=>Sam Winchester&&category=>Supernatural
|
||||
|
||||
## Some readers don't show horizontal rule (<hr />) tags correctly.
|
||||
## This replaces them all with a centered '* * *'. (Note centering
|
||||
## doesn't work on some devices either.)
|
||||
|
|
@ -184,10 +188,15 @@ keep_summary_html:true
|
|||
## Don't like the numbers at the start of chapter titles on some
|
||||
## sites? You can use strip_chapter_numbers to strip them off. Just
|
||||
## want to make them all look the same? Strip them off, then add them
|
||||
## back on with add_chapter_numbers. Don't like the way it strips
|
||||
## numbers or adds them back? See chapter_title_strip_pattern and
|
||||
## chapter_title_add_pattern.
|
||||
## back on with add_chapter_numbers:true. Only want them added back
|
||||
## on for Table of Contents(toc)? Use add_chapter_numbers:toconly.
|
||||
## (toconly doesn't work on mobi output.) Don't like the way it
|
||||
## strips numbers or adds them back? See chapter_title_strip_pattern
|
||||
## and chapter_title_add_pattern.
|
||||
strip_chapter_numbers:false
|
||||
|
||||
## add_chapter_numbers can be true, false or toconly
|
||||
## (Note number is not added when there's only one chapter.)
|
||||
add_chapter_numbers:false
|
||||
|
||||
## (Two versions of chapter_title_strip_pattern are shown below. You
|
||||
|
|
@ -218,6 +227,22 @@ chapter_title_add_pattern:${index}. ${title}
|
|||
## Each output format has a section that overrides [defaults]
|
||||
[html]
|
||||
|
||||
## include images from img tags in the body and summary of
|
||||
## stories. Images will be converted to jpg for size if possible.
|
||||
## include_images is *only* available in epub and html output formats.
|
||||
## include_images is *not* available in the web service in any format.
|
||||
#include_images:false
|
||||
|
||||
## Note that it's *highly* recommended to use zipfile output or story
|
||||
## unique destination directories to avoid overwriting images.
|
||||
#output_filename: books/${author}/${title}/${title}-${siteabbrev}_${authorId}_${storyId}${formatext}
|
||||
#zip_output: false
|
||||
|
||||
## This switch prevents FFDL from doing any processing on the images.
|
||||
## Usually they would be converted to jpg, resized and optionally made
|
||||
## grayscale.
|
||||
no_image_processing: true
|
||||
|
||||
## output background color--only used by html and epub (and ignored in
|
||||
## epub by many readers). Included below in output_css--will be
|
||||
## ignored if not in output_css.
|
||||
|
|
@ -261,13 +286,17 @@ zip_output: false
|
|||
## mobi generated from epub by calibre will have a TOC at the end.
|
||||
include_tocpage: false
|
||||
|
||||
## include a Update Log page before the story text. If included, the
|
||||
## log will be updated each time the epub is an all the metadata
|
||||
## include a Update Log page before the story text. If 'true', the
|
||||
## log will be updated each time the epub is and all the metadata
|
||||
## fields that have changed since the last update (typically
|
||||
## dateUpdated,numChapters,numWords at a minimum) will be shown.
|
||||
## Great for tracking when chapters came out and when the description,
|
||||
## etc changed.
|
||||
include_logpage: false
|
||||
## If set to 'smart', logpage will only be included if the story is
|
||||
## status:In-Progress or already had a logpage. That way you don't
|
||||
## end up with Completed stories that have just one logpage entry.
|
||||
#include_logpage: smart
|
||||
|
||||
## items to include in the log page Empty metadata entries, or those
|
||||
## that haven't changed since the last update, will *not* appear, even
|
||||
|
|
@ -320,6 +349,8 @@ output_css:
|
|||
|
||||
## include images from img tags in the body and summary of
|
||||
## stories. Images will be converted to jpg for size if possible.
|
||||
## include_images is *only* available in epub and html output format.
|
||||
## include_images is *not* available in the web service in any format.
|
||||
#include_images:false
|
||||
|
||||
## If set, the first image found will be made the cover image. If
|
||||
|
|
@ -374,31 +405,31 @@ nook_img_fix:true
|
|||
## URLs like: http://test1.com?sid=12345
|
||||
[test1.com]
|
||||
extratags: FanFiction,Testing
|
||||
extracategories:Fafner
|
||||
extragenres:Romance,Fluff
|
||||
extracharacters:Reginald Smythe-Smythe,Mokona,Harry P.
|
||||
extraships:Smythe-Smythe/Mokona
|
||||
extrawarnings:Extreme Bogosity
|
||||
# extracategories:Fafner
|
||||
# extragenres:Romance,Fluff
|
||||
# extracharacters:Reginald Smythe-Smythe,Mokona,Harry P.
|
||||
# extraships:Smythe-Smythe/Mokona
|
||||
# extrawarnings:Extreme Bogosity
|
||||
|
||||
extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
# extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
|
||||
include_in_compositeJ:dateCreated
|
||||
include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ
|
||||
include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated,
|
||||
# include_in_compositeJ:dateCreated
|
||||
# include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ
|
||||
# include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated,
|
||||
|
||||
extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
extra_subject_tags: metaA,metaB,metaC
|
||||
# extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
# extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
# extra_subject_tags: metaA,metaB,metaC
|
||||
|
||||
replace_metadata:
|
||||
compositeL=>Val=>VALUE
|
||||
series,extratags=>Test=>Plan
|
||||
Puella Magi Madoka Magica.* => Madoka
|
||||
Comedy=>Humor
|
||||
Crossover: (.*)=>\1
|
||||
(.*)Great(.*)=>\1Moderate\2
|
||||
.*-Centered=>
|
||||
characters=>Harry P\.=>Harry Potter
|
||||
# replace_metadata:
|
||||
# compositeL=>Val=>VALUE
|
||||
# series,extratags=>Test=>Plan
|
||||
# Puella Magi Madoka Magica.* => Madoka
|
||||
# Comedy=>Humor
|
||||
# Crossover: (.*)=>\1
|
||||
# (.*)Great(.*)=>\1Moderate\2
|
||||
# .*-Centered=>
|
||||
# characters=>Harry P\.=>Harry Potter
|
||||
|
||||
|
||||
## If necessary, you can define [<site>:<format>] sections to
|
||||
|
|
@ -556,9 +587,8 @@ cliches_label:Character Cliches
|
|||
## specific entries to titlepage/logpage without having to copy the
|
||||
## entire titlepage_entries line. (But if you want them higher than
|
||||
## the end, you will need to copy titlepage_entries.)
|
||||
#extra_titlepage_entries: themes,hermiones,dracos,timeline,cliches
|
||||
|
||||
## adds to include_subject_tags instead of replacing it.
|
||||
#extra_titlepage_entries: themes,timeline,cliches
|
||||
#extra_logpage_entries: themes,timeline,cliches
|
||||
#extra_subject_tags: themes,timeline,cliches
|
||||
|
||||
[erosnsappho.sycophanthex.com]
|
||||
|
|
@ -1019,6 +1049,15 @@ extracategories:Harry Potter
|
|||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Prison Break
|
||||
|
||||
[www.qaf-fic.com]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Queer as Folk
|
||||
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
## this should go in your personal.ini, not defaults.ini.
|
||||
#is_adult:true
|
||||
|
||||
[www.scarvesandcoffee.net]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Glee
|
||||
|
|
|
|||
|
|
@ -15,8 +15,6 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
import logging
|
||||
|
||||
import sys, os
|
||||
from os.path import normpath, expanduser, isfile, join
|
||||
from StringIO import StringIO
|
||||
|
|
@ -26,6 +24,14 @@ import string
|
|||
import ConfigParser
|
||||
from subprocess import call
|
||||
|
||||
import logging
|
||||
if sys.version_info >= (2, 7):
|
||||
# suppresses default logger. Logging is setup in fanficdownload/__init__.py so it works in calibre, too.
|
||||
rootlogger = logging.getLogger()
|
||||
loghandler=logging.NullHandler()
|
||||
loghandler.setFormatter(logging.Formatter("(=====)(levelname)s:%(message)s"))
|
||||
rootlogger.addHandler(loghandler)
|
||||
|
||||
from fanficdownloader import adapters,writers,exceptions
|
||||
from fanficdownloader.configurable import Configuration
|
||||
from fanficdownloader.epubutils import get_dcsource_chaptercount, get_update_data
|
||||
|
|
@ -79,11 +85,9 @@ def main():
|
|||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if options.debug:
|
||||
logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s")
|
||||
|
||||
if not options.debug:
|
||||
logger = logging.getLogger("fanficdownloader")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
if len(args) != 1:
|
||||
parser.error("incorrect number of arguments")
|
||||
|
|
@ -120,8 +124,6 @@ def main():
|
|||
logging.debug('reading %s config file(s), if present'%conflist)
|
||||
configuration.read(conflist)
|
||||
|
||||
print("has include_in_tags?%s"%configuration.hasConfig("include_in_tags"))
|
||||
|
||||
try:
|
||||
configuration.add_section("overrides")
|
||||
except ConfigParser.DuplicateSectionError:
|
||||
|
|
@ -138,7 +140,7 @@ def main():
|
|||
|
||||
# images only for epub, even if the user mistakenly turned it
|
||||
# on else where.
|
||||
if options.format != "epub":
|
||||
if options.format not in ("epub","html"):
|
||||
configuration.set("overrides","include_images","false")
|
||||
|
||||
if options.options:
|
||||
|
|
|
|||
|
|
@ -1 +1,16 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
try:
|
||||
# just a way to switch between web service and CLI/PI
|
||||
import google.appengine.api
|
||||
except:
|
||||
import sys
|
||||
if sys.version_info >= (2, 7):
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
loghandler=logging.StreamHandler()
|
||||
loghandler.setFormatter(logging.Formatter("FFDL:%(levelname)s:%(filename)s(%(lineno)d):%(message)s"))
|
||||
logger.addHandler(loghandler)
|
||||
loghandler.setLevel(logging.DEBUG)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ from os.path import dirname, basename, normpath
|
|||
import logging
|
||||
import urlparse as up
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
## must import each adapter here.
|
||||
|
|
@ -103,6 +105,7 @@ import adapter_bloodtiesfancom
|
|||
import adapter_indeathnet
|
||||
import adapter_dwiggiecom
|
||||
import adapter_jlaunlimitedcom
|
||||
import adapter_qafficcom
|
||||
|
||||
|
||||
## This bit of complexity allows adapters to be added by just adding
|
||||
|
|
@ -124,9 +127,9 @@ for x in imports():
|
|||
|
||||
def getAdapter(config,url):
|
||||
|
||||
logging.debug("trying url:"+url)
|
||||
logger.debug("trying url:"+url)
|
||||
(cls,fixedurl) = getClassFor(url)
|
||||
logging.debug("fixedurl:"+fixedurl)
|
||||
logger.debug("fixedurl:"+fixedurl)
|
||||
if cls:
|
||||
adapter = cls(config,fixedurl) # raises InvalidStoryURL
|
||||
return adapter
|
||||
|
|
@ -164,11 +167,11 @@ def getClassFor(url):
|
|||
cls = getClassFromList(domain)
|
||||
if not cls and domain.startswith("www."):
|
||||
domain = domain.replace("www.","")
|
||||
logging.debug("trying site:without www: "+domain)
|
||||
logger.debug("trying site:without www: "+domain)
|
||||
cls = getClassFromList(domain)
|
||||
fixedurl = fixedurl.replace("http://www.","http://")
|
||||
if not cls:
|
||||
logging.debug("trying site:www."+domain)
|
||||
logger.debug("trying site:www."+domain)
|
||||
cls = getClassFromList("www."+domain)
|
||||
fixedurl = fixedurl.replace("http://","http://www.")
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -41,7 +42,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -65,7 +66,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -204,7 +205,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -30,6 +31,8 @@ def getClass():
|
|||
return ArchiveOfOurOwnOrgAdapter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
|
|
@ -48,13 +51,13 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
|
|
@ -104,14 +107,14 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
params['authenticity_token'] = data.split('input name="authenticity_token" type="hidden" value="')[1].split('" /></div>')[0]
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user_sessions'
|
||||
logging.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['user_session[login]']))
|
||||
|
||||
d = self._postUrl(loginUrl, params)
|
||||
#logging.info(d)
|
||||
#logger.info(d)
|
||||
|
||||
if "Successfully logged in" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['user_session[login]']))
|
||||
raise exceptions.FailedToLogin(url,params['user_session[login]'])
|
||||
return False
|
||||
|
|
@ -128,8 +131,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
metaurl = self.url+addurl
|
||||
url = self.url+'/navigate'+addurl
|
||||
logging.info("url: "+url)
|
||||
logging.info("metaurl: "+metaurl)
|
||||
logger.info("url: "+url)
|
||||
logger.info("metaurl: "+metaurl)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -164,7 +167,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
alist = soup.findAll('a', href=re.compile(r"^/users/\w+/pseuds/\w+"))
|
||||
if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
|
||||
self.story.setMetadata('author','Anonymous')
|
||||
self.story.setMetadata('authorUrl',self.url)
|
||||
self.story.setMetadata('authorUrl','http://archiveofourown.org/')
|
||||
self.story.setMetadata('authorId','0')
|
||||
else:
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[2])
|
||||
|
|
@ -174,7 +178,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
# Find the chapters:
|
||||
chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+"/chapters/\d+$"))
|
||||
self.story.setMetadata('numChapters',len(chapters))
|
||||
logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
for x in range(0,len(chapters)):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
chapter=chapters[x]
|
||||
|
|
@ -291,7 +295,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
chapter=bs.BeautifulSoup('<div class="story"></div>')
|
||||
data = self._fetchUrl(url)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -78,7 +79,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -151,7 +152,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
rating.find('br').replaceWith('split')
|
||||
rating=rating.text.replace("This story is rated",'').split('split')[0]
|
||||
self.story.setMetadata('rating',rating)
|
||||
logging.debug(self.story.getMetadata('rating'))
|
||||
logger.debug(self.story.getMetadata('rating'))
|
||||
|
||||
warnings=box.find('ol')
|
||||
if warnings != None:
|
||||
|
|
@ -177,7 +178,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -94,13 +95,13 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Logout" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -112,7 +113,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -237,7 +238,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -69,7 +70,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -115,13 +116,13 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -148,7 +149,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -182,7 +183,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -320,7 +321,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -71,7 +72,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -117,13 +118,13 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fanfic/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -145,7 +146,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -293,7 +294,7 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -86,7 +87,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -221,7 +222,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story.php?story='+self.story.getMetadata('storyId'))
|
||||
|
||||
|
|
@ -95,7 +96,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
|
|||
e = self._fetchUrl(url)
|
||||
|
||||
if "Welcome back," not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['name']))
|
||||
raise exceptions.FailedToLogin(url,params['name'])
|
||||
return False
|
||||
|
|
@ -112,7 +113,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -222,7 +223,7 @@ class CheckmatedComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -98,13 +99,13 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/elysian/user.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "User Account Page" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s, or have no authorization to access the story" % (loginUrl, params['penname']))
|
||||
logger.info("Failed to login to URL %s as %s, or have no authorization to access the story" % (loginUrl, params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
|
|
@ -115,7 +116,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -261,7 +262,7 @@ class DarkSolaceOrgAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -86,7 +87,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -106,7 +107,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -227,7 +228,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# www.dokuga.com has two 'sections', shown in URL as
|
||||
# 'fanfiction' and 'spark' that change how things should be
|
||||
|
|
@ -87,7 +88,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -224,7 +225,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -145,7 +146,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -282,7 +283,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -283,7 +284,7 @@ class DramioneOrgAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -86,7 +87,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -106,7 +107,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -239,7 +240,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
from urllib import unquote_plus
|
||||
|
|
@ -75,12 +76,12 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
# metadata and chapter list
|
||||
|
||||
url = self.origurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
#print("\n===================\n%s\n===================\n"%data)
|
||||
#logger.debug("\n===================\n%s\n===================\n"%data)
|
||||
soup = bs.BeautifulSoup(data)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
|
|
@ -108,11 +109,11 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
tryurl = "http://%s/s/%s/%d/"%(self.getSiteDomain(),
|
||||
self.story.getMetadata('storyId'),
|
||||
chapcount+1)
|
||||
print('=Trying newer chapter: %s' % tryurl)
|
||||
logger.debug('=Trying newer chapter: %s' % tryurl)
|
||||
newdata = self._fetchUrl(tryurl)
|
||||
if "not found. Please check to see you are not using an outdated url." \
|
||||
not in newdata:
|
||||
print('=======Found newer chapter: %s' % tryurl)
|
||||
logger.debug('=======Found newer chapter: %s' % tryurl)
|
||||
soup = bs.BeautifulSoup(newdata)
|
||||
except:
|
||||
pass
|
||||
|
|
@ -160,7 +161,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
metatext = stripHTML(gui_table1i.find('div', {'style':'color:gray;'})).replace('Hurt/Comfort','Hurt-Comfort')
|
||||
metalist = metatext.split(" - ")
|
||||
#print("metatext:(%s)"%metalist)
|
||||
#logger.debug("metatext:(%s)"%metalist)
|
||||
|
||||
# Rated: Fiction K - English - Words: 158,078 - Published: 02-04-11
|
||||
|
||||
|
|
@ -176,9 +177,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
genrelist = metalist[0].split('/') # Hurt/Comfort already changed above.
|
||||
goodgenres=True
|
||||
for g in genrelist:
|
||||
print("g:(%s)"%g)
|
||||
#logger.debug("g:(%s)"%g)
|
||||
if g.strip() not in ffnetgenres:
|
||||
print("g not in ffnetgenres")
|
||||
logger.info("g not in ffnetgenres")
|
||||
goodgenres=False
|
||||
if goodgenres:
|
||||
self.story.extendList('genre',genrelist)
|
||||
|
|
@ -240,7 +241,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
return
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
time.sleep(0.5) ## ffnet(and, I assume, fpcom) tends to fail
|
||||
## more if hit too fast. This is in
|
||||
## additional to what ever the
|
||||
|
|
@ -265,7 +266,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
div = soup.find('div', {'id' : 'storytextp'})
|
||||
|
||||
if None == div:
|
||||
logging.debug('div id=storytextp not found. data:%s'%data)
|
||||
logger.debug('div id=storytextp not found. data:%s'%data)
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -48,7 +49,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1')
|
||||
|
|
@ -94,12 +95,12 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Login...'
|
||||
|
||||
loginUrl = 'https://ssl.fanfiktion.de/'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['nickname']))
|
||||
d = self._postUrl(loginUrl,params)
|
||||
|
||||
if "Login erfolgreich" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['nickname']))
|
||||
raise exceptions.FailedToLogin(url,params['nickname'])
|
||||
return False
|
||||
|
|
@ -110,7 +111,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -183,7 +184,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
time.sleep(0.5) ## ffde has "floodlock" protection
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
import time
|
||||
import datetime
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
from .. import translit
|
||||
|
|
@ -33,6 +34,8 @@ def getClass():
|
|||
return FicBookNetAdapter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FicBookNetAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
|
|
@ -49,7 +52,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/readfic/'+self.story.getMetadata('storyId'))
|
||||
|
|
@ -75,7 +78,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url=self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
|
|
@ -95,14 +98,14 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
## Title
|
||||
a = soup.find('h1')
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
logging.debug("Title: (%s)"%self.story.getMetadata('title'))
|
||||
logger.debug("Title: (%s)"%self.story.getMetadata('title'))
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = table.find('a')
|
||||
self.story.setMetadata('authorId',a.text) # Author's name is unique
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.text)
|
||||
logging.debug("Author: (%s)"%self.story.getMetadata('author'))
|
||||
logger.debug("Author: (%s)"%self.story.getMetadata('author'))
|
||||
|
||||
# Find the chapters:
|
||||
chapters = soup.find('div', {'class' : 'part_list'})
|
||||
|
|
@ -123,7 +126,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
pubdate=translit.translit(stripHTML(soup.find('div', {'class' : 'part_added'}).find('span')))
|
||||
update=pubdate
|
||||
|
||||
logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
|
||||
if not ',' in pubdate:
|
||||
pubdate=datetime.date.today().strftime(self.dateformat)
|
||||
|
|
@ -207,7 +210,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -44,7 +45,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
if m:
|
||||
self.story.setMetadata('authorId',m.group('auth'))
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
# normalized story URL.
|
||||
self._setURL(url)
|
||||
else:
|
||||
|
|
@ -68,7 +69,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
if self.is_adult or self.getConfig("is_adult"):
|
||||
params={'iamold':'Yes',
|
||||
'action':'ageanswer'}
|
||||
logging.info("Attempting to get cookie for %s" % url)
|
||||
logger.info("Attempting to get cookie for %s" % url)
|
||||
## posting on list doesn't work, but doesn't hurt, either.
|
||||
data = self._postUrl(url,params)
|
||||
else:
|
||||
|
|
@ -79,7 +80,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## could be either chapter list page or one-shot text page.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._postFetchWithIAmOld(url)
|
||||
|
|
@ -107,7 +108,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1]
|
||||
self._setURL(storya['href'])
|
||||
url=self.url
|
||||
logging.debug("Normalizing to URL: "+url)
|
||||
logger.debug("Normalizing to URL: "+url)
|
||||
## title's right there...
|
||||
self.story.setMetadata('title',storya.string)
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -193,7 +194,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# find <!-- headerend --> & <!-- footerstart --> and
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
|
|
@ -61,12 +62,12 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
params['password'] = self.getConfig("password")
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/account/login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['username']))
|
||||
d = self._postUrl(loginUrl,params)
|
||||
|
||||
if "Login attempt failed..." in d:
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
return False
|
||||
|
|
@ -79,7 +80,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
# metadata and chapter list
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
|
|
@ -96,7 +97,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
# normalize story URL on chapter list.
|
||||
self.story.setMetadata('storyId',storya['href'].split('/',)[2])
|
||||
url = "http://"+self.getSiteDomain()+storya['href']
|
||||
logging.debug("Normalizing to URL: "+url)
|
||||
logger.debug("Normalizing to URL: "+url)
|
||||
self._setURL(url)
|
||||
try:
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
|
|
@ -201,7 +202,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import cookielib as cl
|
||||
|
|
@ -175,7 +176,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'id' : 'chapter_container'})
|
||||
if soup == None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -49,7 +50,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2].split(':')[0])
|
||||
if 'storyInfo' in self.story.getMetadata('storyId'):
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/s/storyInfo.php?id='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -95,13 +96,13 @@ class FineStoriesComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Login'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/login.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['theusername']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "My Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['theusername']))
|
||||
raise exceptions.FailedToLogin(url,params['theusername'])
|
||||
return False
|
||||
|
|
@ -114,7 +115,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -232,7 +233,7 @@ class FineStoriesComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,8 +48,8 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
self.story.setMetadata('section',self.parsedUrl.path.split('/',)[1])
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
self.section=self.parsedUrl.path.split('/',)[1]
|
||||
|
||||
# normalized story URL.
|
||||
if "malfoymanor" in self.parsedUrl.netloc:
|
||||
|
|
@ -100,17 +101,17 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
if "enchant" in self.story.getMetadata('section'):
|
||||
if "enchant" in self.section:
|
||||
loginUrl = 'http://grangerenchanted.com/enchant/user.php?action=login'
|
||||
else:
|
||||
loginUrl = 'http://malfoymanor.grangerenchanted.com/themanor/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -132,7 +133,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -157,7 +158,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -191,7 +192,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.section+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -262,7 +263,7 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+a['href']
|
||||
series_url = 'http://'+self.host+'/'+self.section+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
|
||||
|
|
@ -284,10 +285,10 @@ class GrangerEnchantedCom(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
||||
div = soup.find('div', {'id' : 'story1'})
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -41,7 +42,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only psid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -72,7 +73,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -181,7 +182,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
## most adapters use BeautifulStoneSoup here, but non-Stone
|
||||
## allows nested div tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -215,7 +216,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /eff part. Replace all to remove it usually.
|
||||
|
|
@ -79,7 +80,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -180,21 +181,22 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
|
|||
value = td.nextSibling.string
|
||||
#print("\nlabel:%s\nvalue:%s\n"%(label,value))
|
||||
|
||||
if 'Category' in label:
|
||||
if 'Category' in label and value:
|
||||
cats = td.parent.findAll('a',href=re.compile(r'categories.php'))
|
||||
catstext = [cat.string for cat in cats]
|
||||
for cat in catstext:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
if 'Characters' in label and value: # this site can have Character label with no
|
||||
# values, apparently. Others as a precaution.
|
||||
for char in value.split(','):
|
||||
self.story.addToList('characters',char.strip())
|
||||
|
||||
if 'Genre' in label:
|
||||
if 'Genre' in label and value:
|
||||
for genre in value.split(','):
|
||||
self.story.addToList('genre',genre.strip())
|
||||
|
||||
if 'Warnings' in label:
|
||||
if 'Warnings' in label and value:
|
||||
for warning in value.split(','):
|
||||
if warning.strip() != 'none':
|
||||
self.story.addToList('warnings',warning.strip())
|
||||
|
|
@ -208,7 +210,7 @@ class HPFandomNetAdapterAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# There's no good wrapper around the chapter text. :-/
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -202,7 +203,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -158,7 +159,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -267,7 +268,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -99,7 +100,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -132,7 +133,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -294,7 +295,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
soup = bs.BeautifulStoneSoup(data,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/a/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -84,7 +85,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -104,7 +105,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -234,7 +235,7 @@ class LibraryOfMoriaComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -86,7 +87,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -221,7 +222,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -42,7 +43,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfic/view_st.php/'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
|
|
@ -66,7 +67,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -188,7 +189,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data=self._fetchUrl(url)
|
||||
soup = bs.BeautifulStoneSoup(data,
|
||||
|
|
@ -214,7 +215,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
return self.utf8FromSoup(url,anchor)
|
||||
|
||||
else:
|
||||
logging.debug('Using kludgey text find for older mediaminer story.')
|
||||
logger.debug('Using kludgey text find for older mediaminer story.')
|
||||
## Some older mediaminer stories are unparsable with BeautifulSoup.
|
||||
## Really nasty formatting. Sooo... Cheat! Parse it ourselves a bit first.
|
||||
## Story stuff falls between:
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -145,7 +146,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -277,7 +278,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -95,7 +96,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -128,7 +129,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -269,7 +270,7 @@ class MidnightwhispersCaAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
soup = bs.BeautifulStoneSoup(data,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -96,13 +97,13 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login&sid='+self.story.getMetadata('storyId')
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -125,7 +126,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -164,7 +165,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -315,7 +316,7 @@ class MuggleNetComAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only storyid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -80,7 +81,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -177,7 +178,7 @@ class NationalLibraryNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only storyid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?storyid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -80,7 +81,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -184,7 +185,7 @@ class NCISFicComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL("http://"+self.getSiteDomain()\
|
||||
|
|
@ -78,7 +79,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -187,7 +188,7 @@ class NCISFictionComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -99,7 +100,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -132,7 +133,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -273,7 +274,7 @@ class NfaCommunityComAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -74,7 +75,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
|
|||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -197,7 +198,7 @@ class NHAMagicalWorldsUsAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
|
|
@ -94,13 +95,13 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Logout" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -112,7 +113,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -245,7 +246,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = data.replace('<div align="left"', '<div align="left">')
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -97,13 +98,13 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -125,7 +126,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -253,7 +254,7 @@ class OneDirectionFanfictionComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2, urllib, cookielib
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/')
|
||||
|
|
@ -90,12 +91,12 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
params['login'] = 'Login'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/users/processlogin.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['txtusername']))
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if 'Please login to continue.' in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['txtusername']))
|
||||
raise exceptions.FailedToLogin(url,params['txtusername'])
|
||||
return False
|
||||
|
|
@ -108,7 +109,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -208,7 +209,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,8 +48,8 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logging.info(self.parsedUrl.netloc)
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.info(self.parsedUrl.netloc)
|
||||
# normalized story URL.
|
||||
if "explicit" in self.parsedUrl.netloc:
|
||||
self._setURL('http://explicit.' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -91,7 +92,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -112,7 +113,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -234,7 +235,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import cookielib as cl
|
||||
|
|
@ -54,7 +55,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story/'+self.story.getMetadata('storyId'))
|
||||
|
|
@ -81,7 +82,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# portkey screws around with using a different URL to set the
|
||||
# cookie and it's a pain. So... cheat!
|
||||
|
|
@ -247,7 +248,7 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
data = self._fetchUrl(url)
|
||||
soup = bs.BeautifulStoneSoup(data,
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -40,7 +41,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -63,7 +64,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -191,7 +192,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/missingpieces/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -89,7 +90,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -109,7 +110,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -237,7 +238,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -202,7 +203,7 @@ class PrisonBreakFicNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
262
fanficdownloader/adapters/adapter_qafficcom.py
Normal file
262
fanficdownloader/adapters/adapter_qafficcom.py
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2012 Fanficdownloader team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
from .. import BeautifulSoup as bs
|
||||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
def getClass():
|
||||
return QafFicComAdapter
|
||||
|
||||
# Class name has to be unique. Our convention is camel case the
|
||||
# sitename with Adapter at the end. www is skipped.
|
||||
class QafFicComAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["Windows-1252",
|
||||
"utf8"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
self.password = ""
|
||||
self.is_adult=False
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/atp/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','atp')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%m/%d/%y"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'www.qaf-fic.com'
|
||||
|
||||
def getSiteExampleURLs(self):
|
||||
return "http://"+self.getSiteDomain()+"/atp/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/atp/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# Weirdly, different sites use different warning numbers.
|
||||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&warning=NC-17"
|
||||
else:
|
||||
addurl=""
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\s+)'",data)
|
||||
if m != None:
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
# We tried the default and still got a warning, so
|
||||
# let's pull the warning number from the 'continue'
|
||||
# link and reload data.
|
||||
addurl = m.group(1)
|
||||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+addurl
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = bs.BeautifulSoup(data)
|
||||
# print data
|
||||
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
||||
## Title and author
|
||||
a = soup.find('div', {'id' : 'pagetitle'})
|
||||
|
||||
aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',aut['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/atp/'+aut['href'])
|
||||
self.story.setMetadata('author',aut.string)
|
||||
aut.extract()
|
||||
|
||||
self.story.setMetadata('title',a.string[:(len(a.string)-3)])
|
||||
|
||||
# Find the chapters:
|
||||
chapters=soup.find('select')
|
||||
if chapters != None:
|
||||
for chapter in chapters.findAll('option'):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/atp/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value']))
|
||||
else:
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),url))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
for list in asoup.findAll('div', {'class' : re.compile('listbox\s+')}):
|
||||
a = list.find('a')
|
||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
||||
break
|
||||
|
||||
# eFiction sites don't help us out a lot with their meta data
|
||||
# formating, so it's a little ugly.
|
||||
|
||||
# utility method
|
||||
def defaultGetattr(d,k):
|
||||
try:
|
||||
return d[k]
|
||||
except:
|
||||
return ""
|
||||
|
||||
# <span class="label">Rated:</span> NC-17<br /> etc
|
||||
labels = list.findAll('span', {'class' : 'classification'})
|
||||
for labelspan in labels:
|
||||
label = labelspan.string
|
||||
value = labelspan.nextSibling
|
||||
|
||||
if 'Summary' in label:
|
||||
## Everything until the next span class='label'
|
||||
svalue = ""
|
||||
while not defaultGetattr(value,'class') == 'classification' and value != None:
|
||||
if "Featured Stories" not in value:
|
||||
svalue += str(value)
|
||||
value = value.nextSibling
|
||||
self.setDescription(url,svalue)
|
||||
#self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value[:len(value)-2])
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'categories.php\?catid=\d+'))
|
||||
for cat in cats:
|
||||
self.story.addToList('category',cat.string)
|
||||
|
||||
if 'Characters' in label:
|
||||
for char in value.string.split(', '):
|
||||
if not 'None' in char:
|
||||
self.story.addToList('characters',char)
|
||||
|
||||
if 'Genre' in label:
|
||||
for genre in value.string.split(', '):
|
||||
if not 'None' in genre:
|
||||
self.story.addToList('genre',genre)
|
||||
|
||||
if 'Warnings' in label:
|
||||
for warning in value.string.split(', '):
|
||||
if not 'None' in warning:
|
||||
self.story.addToList('warnings',warning)
|
||||
|
||||
if 'Completed' in label:
|
||||
if 'Yes' in value:
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
if 'Published' in label:
|
||||
self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' ::')[0]), self.dateformat))
|
||||
|
||||
if 'Updated' in label:
|
||||
# there's a stray [ at the end.
|
||||
#value = value[0:-1]
|
||||
self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
|
||||
|
||||
try:
|
||||
if list.find('a', href=re.compile(r"series.php")) != None:
|
||||
for series in asoup.findAll('a', href=re.compile(r"series.php\?seriesid=\d+")):
|
||||
# Find Series name from series URL.
|
||||
series_url = 'http://'+self.host+'/atp/'+series['href']
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
|
||||
name=seriessoup.find('div', {'id' : 'pagetitle'})
|
||||
name.find('a').extract()
|
||||
self.setSeries(name.text.split(' by[')[0], i)
|
||||
i=0
|
||||
break
|
||||
i+=1
|
||||
if i == 0:
|
||||
break
|
||||
|
||||
except:
|
||||
# I find it hard to care if the series parsing fails
|
||||
pass
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return self.utf8FromSoup(url,div)
|
||||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -80,7 +81,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -216,7 +217,7 @@ class SamDeanArchiveNuAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -94,7 +95,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -114,7 +115,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -231,7 +232,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,11 +48,11 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
self.story.setMetadata('section',self.parsedUrl.path.split('/',)[1])
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
self.section=self.parsedUrl.path.split('/',)[1]
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/'+self.story.getMetadata('section')+'/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('http://' + self.getSiteDomain() + '/'+self.section+'/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','sghp')
|
||||
|
|
@ -59,7 +60,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
# If all stories from the site fall into the same category,
|
||||
# the site itself isn't likely to label them as such, so we
|
||||
# do. Can't use extracategories, could be Atlantis or SG-1
|
||||
if 'atlantis' in self.story.getMetadata('section'):
|
||||
if 'atlantis' in self.section:
|
||||
self.story.addToList("category","Stargate: Atlantis")
|
||||
else:
|
||||
self.story.addToList("category","Stargate: SG-1")
|
||||
|
|
@ -95,7 +96,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -115,7 +116,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -149,7 +150,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.section+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -220,7 +221,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+self.story.getMetadata('section')+'/'+a['href']
|
||||
series_url = 'http://'+self.host+'/'+self.section+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
|
||||
|
|
@ -242,7 +243,7 @@ class SG1HeliopolisComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -85,7 +86,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -105,7 +106,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -235,7 +236,7 @@ class SinfulDesireOrgAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/siye/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -82,7 +83,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
# sites skip that for one-chapter stories.
|
||||
# Except it doesn't this time. :-/
|
||||
url = self.url #+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -224,7 +225,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
# soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
# BeautifulSoup objects to <p> inside <span>, which
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -58,7 +59,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/peja/cgi-bin/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -90,7 +91,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -223,7 +224,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -213,7 +214,7 @@ class StargateAtlantisOrgAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/chapterlistview.asp?SID='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -133,7 +134,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class SVUFictionComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -145,7 +146,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -258,7 +259,7 @@ class SVUFictionComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -43,7 +44,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -81,13 +82,13 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -102,7 +103,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -116,7 +117,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
# need to log in for this one.
|
||||
addurl = "&ageconsent=ok&warning=4"
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("Changing URL: "+url)
|
||||
logger.debug("Changing URL: "+url)
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
|
|
@ -229,7 +230,7 @@ class TenhawkPresentsComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
import datetime
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .. import BeautifulSoup as bs
|
||||
from .. import exceptions
|
||||
|
|
@ -48,7 +49,7 @@ class TestSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
if self.story.getMetadata('storyId') == '665' and not (self.is_adult or self.getConfig("is_adult")):
|
||||
logging.warn("self.is_adult:%s"%self.is_adult)
|
||||
logger.warn("self.is_adult:%s"%self.is_adult)
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if self.story.getMetadata('storyId') == '666':
|
||||
|
|
@ -128,7 +129,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
|
|||
|
||||
self.story.addToList('genre','Fantasy')
|
||||
self.story.addToList('genre','Comedy')
|
||||
self.story.addToList('genre','SF')
|
||||
self.story.addToList('genre','Sci-Fi')
|
||||
self.story.addToList('genre','Noir')
|
||||
|
||||
self.story.addToList('characters','Bob Smith')
|
||||
|
|
@ -184,7 +185,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
|
|||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
if self.story.getMetadata('storyId') == '667':
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url)
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -76,7 +77,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -198,7 +199,7 @@ class TheAlphaGateComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -80,7 +81,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -182,7 +183,7 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr','img')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -71,7 +72,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
|
|
@ -117,13 +118,13 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/CriminalMinds/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -145,7 +146,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -292,7 +293,7 @@ class TheHookupZoneNetAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,20 +48,20 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
if self.parsedUrl.path.split('/',)[1] == 'wiktt':
|
||||
self.story.addToList("category","Harry Potter")
|
||||
self.story.setMetadata('section','/wiktt/efiction/')
|
||||
self.section='/wiktt/efiction/'
|
||||
self.dateformat = "%m/%d/%Y"
|
||||
else:
|
||||
self.story.addToList("category","Originals")
|
||||
self.story.setMetadata('section','/efiction/')
|
||||
self.section='/efiction/'
|
||||
self.dateformat = "%b %d, %Y"
|
||||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + self.story.getMetadata('section') + 'viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('http://' + self.getSiteDomain() + self.section + 'viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','msq')
|
||||
|
|
@ -98,14 +99,14 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + self.story.getMetadata('section') + 'user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
loginUrl = 'http://' + self.getSiteDomain() + self.section + 'user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -127,7 +128,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -152,7 +153,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -186,7 +187,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host + self.story.getMetadata('section') + chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host + self.section + chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -258,7 +259,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId') +'&i=1')
|
||||
|
|
@ -91,13 +92,13 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "My Account Page" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -110,7 +111,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -223,7 +224,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -51,7 +52,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the part. Replace all to remove it usually.
|
||||
|
|
@ -101,13 +102,13 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -129,7 +130,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -272,7 +273,7 @@ class TheQuidditchPitchOrgAdapter(BaseSiteAdapter): # XXX
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -43,7 +44,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/library/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -81,13 +82,13 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/library/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -102,7 +103,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
|||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -235,7 +236,7 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
|
|
@ -40,7 +41,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
# normalized story URL.
|
||||
self._setURL("http://"+self.getSiteDomain()\
|
||||
+"/Story-"+self.story.getMetadata('storyId'))
|
||||
|
|
@ -81,7 +82,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
return
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/login.php'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['urealname']))
|
||||
|
||||
## need to pull empty login page first to get ctkn and
|
||||
|
|
@ -98,7 +99,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Stories Published" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -110,7 +111,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
# metadata and chapter list
|
||||
|
||||
url=self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# tth won't send you future updates if you aren't 'caught up'
|
||||
# on the story. Login isn't required for F21, but logging in will
|
||||
|
|
@ -137,7 +138,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
form = soup.find('form', {'id':'sitemaxratingform'})
|
||||
params={'ctkn':form.find('input', {'name':'ctkn'})['value'],
|
||||
'sitemaxrating':'5'}
|
||||
logging.info("Attempting to get rating cookie for %s" % url)
|
||||
logger.info("Attempting to get rating cookie for %s" % url)
|
||||
data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params)
|
||||
# refetch story page.
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -158,7 +159,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
try:
|
||||
# going to pull part of the meta data from author list page.
|
||||
infourl = 'http://'+self.host+ainfo['href']
|
||||
logging.debug("**StoryInfo** URL: "+infourl)
|
||||
logger.debug("**StoryInfo** URL: "+infourl)
|
||||
infodata = self._fetchUrl(infourl)
|
||||
infosoup = bs.BeautifulSoup(infodata)
|
||||
|
||||
|
|
@ -175,14 +176,14 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
try:
|
||||
# going to pull part of the meta data from *primary* author list page.
|
||||
logging.debug("**AUTHOR** URL: "+authorurl)
|
||||
logger.debug("**AUTHOR** URL: "+authorurl)
|
||||
authordata = self._fetchUrl(authorurl)
|
||||
descurl=authorurl
|
||||
authorsoup = bs.BeautifulSoup(authordata)
|
||||
# author can have several pages, scan until we find it.
|
||||
while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ):
|
||||
nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href']
|
||||
logging.debug("**AUTHOR** nextpage URL: "+nextpage)
|
||||
logger.debug("**AUTHOR** nextpage URL: "+nextpage)
|
||||
authordata = self._fetchUrl(nextpage)
|
||||
descurl=nextpage
|
||||
authorsoup = bs.BeautifulSoup(authordata)
|
||||
|
|
@ -259,7 +260,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'storyinnerbody'})
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL. http://www.twilightarchives.com/read/9353
|
||||
self._setURL('http://' + self.getSiteDomain() + '/read/'+self.story.getMetadata('storyId'))
|
||||
|
|
@ -77,7 +78,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -172,7 +173,7 @@ class TwilightArchivesComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -42,7 +43,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -83,13 +84,13 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -99,7 +100,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -225,7 +226,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -43,7 +44,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -84,13 +85,13 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -109,7 +110,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
addurl=""
|
||||
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -252,7 +253,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -45,7 +46,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -84,7 +85,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -215,7 +216,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -56,7 +57,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
# - get chapter list, if not one-shot.
|
||||
|
||||
url = self.url+'&chapter=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
|
|
@ -69,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# pull title(title) and author from the HTML title.
|
||||
title = soup.find('title').string
|
||||
logging.debug('Title: %s' % title)
|
||||
logger.debug('Title: %s' % title)
|
||||
title = title.split('::')[1].strip()
|
||||
self.story.setMetadata('title',title.split(' by ')[0].strip())
|
||||
self.story.setMetadata('author',title.split(' by ')[1].strip())
|
||||
|
|
@ -109,7 +110,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
# <i>Published:</i> 2010.08.15 - <i>Updated:</i> 2010.08.16 - <i>Chapters:</i> 4 - <i>Completed:</i> Yes - <i>Word Count:</i> 4890 </font>
|
||||
# </td></tr></table>
|
||||
|
||||
logging.debug("Author URL: "+self.story.getMetadata('authorUrl'))
|
||||
logger.debug("Author URL: "+self.story.getMetadata('authorUrl'))
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(self.story.getMetadata('authorUrl')),
|
||||
selfClosingTags=('br')) # normalize <br> tags to <br />
|
||||
# find this story in the list, parse it's metadata based on
|
||||
|
|
@ -212,7 +213,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,13 +93,13 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
logger.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
|
|
@ -120,7 +121,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -145,7 +146,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
# correct stupid & error in url.
|
||||
addurl = addurl.replace("&","&")
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -286,7 +287,7 @@ class WizardTalesNetAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -77,7 +78,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -203,7 +204,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -86,7 +87,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -209,7 +210,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import time
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -54,7 +55,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
|
@ -92,7 +93,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
|
|||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -126,7 +127,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
|
|||
# explicitly put ageconsent because google appengine regexp doesn't include it for some reason.
|
||||
addurl = addurl.replace("&","&")+'&ageconsent=ok'
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL 2nd try: "+url)
|
||||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -147,7 +148,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
|
|||
# while len(loopdata) > 0:
|
||||
# if len(loopdata) < 5000:
|
||||
# chklen = len(loopdata)
|
||||
# logging.info("loopdata: %s" % loopdata[:chklen])
|
||||
# logger.info("loopdata: %s" % loopdata[:chklen])
|
||||
# loopdata = loopdata[chklen:]
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
|
|
@ -270,7 +271,7 @@ class YourFanfictionComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@ from functools import partial
|
|||
from .. import BeautifulSoup as bs
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from google.appengine.api import apiproxy_stub_map
|
||||
def urlfetch_timeout_hook(service, call, request, response):
|
||||
|
|
@ -38,10 +40,10 @@ try:
|
|||
|
||||
apiproxy_stub_map.apiproxy.GetPreCallHooks().Append(
|
||||
'urlfetch_timeout_hook', urlfetch_timeout_hook, 'urlfetch')
|
||||
logging.info("Hook to make default deadline 10.0 installed.")
|
||||
logger.info("Hook to make default deadline 10.0 installed.")
|
||||
except:
|
||||
pass
|
||||
#logging.info("Hook to make default deadline 10.0 NOT installed--not using appengine")
|
||||
#logger.info("Hook to make default deadline 10.0 NOT installed--not using appengine")
|
||||
|
||||
from ..story import Story
|
||||
from ..gziphttp import GZipProcessor
|
||||
|
|
@ -125,7 +127,7 @@ class BaseSiteAdapter(Configurable):
|
|||
#print code
|
||||
if code == "auto":
|
||||
if not chardet:
|
||||
logging.info("chardet not available, skipping 'auto' encoding")
|
||||
logger.info("chardet not available, skipping 'auto' encoding")
|
||||
continue
|
||||
detected = chardet.detect(data)
|
||||
#print detected
|
||||
|
|
@ -133,12 +135,11 @@ class BaseSiteAdapter(Configurable):
|
|||
code=detected['encoding']
|
||||
else:
|
||||
continue
|
||||
logging.debug("try code:"+code)
|
||||
return data.decode(code)
|
||||
except:
|
||||
logging.debug("code failed:"+code)
|
||||
logger.debug("code failed:"+code)
|
||||
pass
|
||||
logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
|
||||
logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
|
||||
return "".join([x for x in data if ord(x) < 128])
|
||||
|
||||
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
|
||||
|
|
@ -175,10 +176,10 @@ class BaseSiteAdapter(Configurable):
|
|||
return self._decode(self._fetchUrlRaw(url,parameters))
|
||||
except Exception, e:
|
||||
excpt=e
|
||||
logging.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
|
||||
logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e)))
|
||||
|
||||
logging.error("Giving up on %s" %url)
|
||||
logging.exception(excpt)
|
||||
logger.error("Giving up on %s" %url)
|
||||
logger.exception(excpt)
|
||||
raise(excpt)
|
||||
|
||||
# Limit chapters to download. Input starts at 1, list starts at 0
|
||||
|
|
@ -304,7 +305,7 @@ class BaseSiteAdapter(Configurable):
|
|||
if not fetch:
|
||||
fetch=self._fetchUrlRaw
|
||||
|
||||
acceptable_attributes = ['href','name']
|
||||
acceptable_attributes = ['href','name','class','id']
|
||||
#print("include_images:"+self.getConfig('include_images'))
|
||||
if self.getConfig('include_images'):
|
||||
acceptable_attributes.extend(('src','alt','longdesc'))
|
||||
|
|
@ -356,7 +357,6 @@ class BaseSiteAdapter(Configurable):
|
|||
|
||||
def cachedfetch(realfetch,cache,url):
|
||||
if url in cache:
|
||||
print("cache hit")
|
||||
return cache[url]
|
||||
else:
|
||||
return realfetch(url)
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ class Configurable(object):
|
|||
return self.configuration.hasConfig(key)
|
||||
|
||||
def getConfig(self, key, default=""):
|
||||
return self.configuration.getConfig(key)
|
||||
return self.configuration.getConfig(key,default)
|
||||
|
||||
def getConfigList(self, key):
|
||||
return self.configuration.getConfigList(key)
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ import urlparse
|
|||
import string
|
||||
from math import floor
|
||||
from functools import partial
|
||||
import logging
|
||||
import urlparse as up
|
||||
|
||||
import exceptions
|
||||
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
|
||||
|
|
@ -52,7 +54,7 @@ try:
|
|||
if export:
|
||||
return (img.export('JPG'),'jpg','image/jpeg')
|
||||
else:
|
||||
print("image used unchanged")
|
||||
logging.debug("image used unchanged")
|
||||
return (data,'jpg','image/jpeg')
|
||||
|
||||
except:
|
||||
|
|
@ -88,23 +90,34 @@ except:
|
|||
img.save(outsio,'JPEG')
|
||||
return (outsio.getvalue(),'jpg','image/jpeg')
|
||||
else:
|
||||
print("image used unchanged")
|
||||
logging.debug("image used unchanged")
|
||||
return (data,'jpg','image/jpeg')
|
||||
|
||||
except:
|
||||
|
||||
# No calibre or PIL, simple pass through with mimetype.
|
||||
imagetypes = {
|
||||
'jpg':'image/jpeg',
|
||||
'jpeg':'image/jpeg',
|
||||
'png':'image/png',
|
||||
'gif':'image/gif',
|
||||
'svg':'image/svg+xml',
|
||||
}
|
||||
|
||||
def convert_image(url,data,sizes,grayscale):
|
||||
ext=url[url.rfind('.')+1:].lower()
|
||||
return (data,ext,imagetypes[ext])
|
||||
return no_convert_image(url,data)
|
||||
|
||||
imagetypes = {
|
||||
'jpg':'image/jpeg',
|
||||
'jpeg':'image/jpeg',
|
||||
'png':'image/png',
|
||||
'gif':'image/gif',
|
||||
'svg':'image/svg+xml',
|
||||
}
|
||||
|
||||
## also used for explicit no image processing.
|
||||
def no_convert_image(url,data):
|
||||
parsedUrl = up.urlparse(url)
|
||||
|
||||
ext=parsedUrl.path[parsedUrl.path.rfind('.')+1:].lower()
|
||||
|
||||
if ext not in imagetypes:
|
||||
logging.debug("no_convert_image url:%s - no known extension"%url)
|
||||
# doesn't have extension? use jpg.
|
||||
ext='jpg'
|
||||
|
||||
return (data,ext,imagetypes[ext])
|
||||
|
||||
def normalize_format_name(fmt):
|
||||
if fmt:
|
||||
|
|
@ -240,24 +253,35 @@ class Story(Configurable):
|
|||
## Three part effect only those key(s) lists.
|
||||
## pattern=>replacement
|
||||
## metakey,metakey=>pattern=>replacement
|
||||
## *Five* part lines. Effect only when trailing conditional key=>regexp matches
|
||||
## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp]
|
||||
def setReplace(self,replace):
|
||||
for line in replace.splitlines():
|
||||
if "&&" in line:
|
||||
(line,conditional) = map( lambda x: x.strip(), line.split("&&") )
|
||||
condparts = map( lambda x: x.strip(), conditional.split("=>") )
|
||||
else:
|
||||
condparts=[None,None]
|
||||
if "=>" in line:
|
||||
parts = map( lambda x: x.strip(), line.split("=>") )
|
||||
if len(parts) > 2:
|
||||
parts[0] = map( lambda x: x.strip(), parts[0].split(",") )
|
||||
self.replacements.append(parts)
|
||||
self.replacements.append(parts+condparts)
|
||||
else:
|
||||
self.replacements.append([None]+parts)
|
||||
self.replacements.append([None]+parts+condparts)
|
||||
|
||||
def doReplacments(self,value,key):
|
||||
for (keys,p,v) in self.replacements:
|
||||
for (keys,regexp,replacement,condkey,condregexp) in self.replacements:
|
||||
if (keys == None or key in keys) \
|
||||
and isinstance(value,basestring) \
|
||||
and re.search(p,value):
|
||||
#pv=value
|
||||
value = re.sub(p,v,value)
|
||||
#print("change:%s => %s === %s => %s "%(p,v,pv,value))
|
||||
and re.search(regexp,value):
|
||||
doreplace=True
|
||||
if condkey:
|
||||
condval = self.getMetadata(condkey)
|
||||
doreplace = condval != None and re.search(condregexp,condval)
|
||||
|
||||
if doreplace:
|
||||
value = re.sub(regexp,replacement,value)
|
||||
return value
|
||||
|
||||
def getMetadataRaw(self,key):
|
||||
|
|
@ -280,7 +304,9 @@ class Story(Configurable):
|
|||
value = commaGroups(value)
|
||||
if key == "numChapters":
|
||||
value = commaGroups("%d"%value)
|
||||
if key in ("dateCreated","datePublished","dateUpdated"):
|
||||
if key in ("dateCreated"):
|
||||
value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S"))
|
||||
if key in ("datePublished","dateUpdated"):
|
||||
value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))
|
||||
|
||||
if doreplacements:
|
||||
|
|
@ -411,11 +437,14 @@ class Story(Configurable):
|
|||
title = re.sub(self.getConfig('chapter_title_strip_pattern'),"",title)
|
||||
self.chapters.append( (title,html) )
|
||||
|
||||
def getChapters(self):
|
||||
def getChapters(self,fortoc=False):
|
||||
"Chapters will be tuples of (title,html)"
|
||||
retval = []
|
||||
if self.getConfig('add_chapter_numbers') and \
|
||||
self.getConfig('chapter_title_add_pattern'):
|
||||
## only add numbers if more than one chapter.
|
||||
if len(self.chapters) > 1 and \
|
||||
(self.getConfig('add_chapter_numbers') == "true" \
|
||||
or (self.getConfig('add_chapter_numbers') == "toconly" and fortoc)) \
|
||||
and self.getConfig('chapter_title_add_pattern'):
|
||||
for index, (title,html) in enumerate(self.chapters):
|
||||
retval.append( (string.Template(self.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
|
||||
else:
|
||||
|
|
@ -480,17 +509,22 @@ class Story(Configurable):
|
|||
prefix='ffdl'
|
||||
if imgurl not in self.imgurls:
|
||||
parsedUrl = urlparse.urlparse(imgurl)
|
||||
|
||||
try:
|
||||
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
|
||||
if self.getConfig('no_image_processing'):
|
||||
(data,ext,mime) = no_convert_image(imgurl,
|
||||
fetch(imgurl))
|
||||
else:
|
||||
try:
|
||||
sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
|
||||
except Exception, e:
|
||||
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
|
||||
(data,ext,mime) = convert_image(imgurl,
|
||||
fetch(imgurl),
|
||||
sizes,
|
||||
self.getConfig('grayscale_images'))
|
||||
except Exception, e:
|
||||
raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
|
||||
try:
|
||||
(data,ext,mime) = convert_image(imgurl,
|
||||
fetch(imgurl),
|
||||
sizes,
|
||||
self.getConfig('grayscale_images'))
|
||||
except Exception, e:
|
||||
print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
|
||||
logging.info("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
|
||||
return "failedtoload"
|
||||
|
||||
# explicit cover, make the first image.
|
||||
|
|
@ -525,7 +559,7 @@ class Story(Configurable):
|
|||
ext)
|
||||
self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})
|
||||
|
||||
print("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
|
||||
logging.debug("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
|
||||
else:
|
||||
newsrc = self.imgtuples[self.imgurls.index(imgurl)]['newsrc']
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
import re
|
||||
import os.path
|
||||
import datetime
|
||||
import string
|
||||
import StringIO
|
||||
import zipfile
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
|
@ -26,6 +27,8 @@ import logging
|
|||
from ..configurable import Configurable
|
||||
from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BaseStoryWriter(Configurable):
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -101,6 +104,22 @@ class BaseStoryWriter(Configurable):
|
|||
names as Story.metadata, but ENTRY should use label and value.
|
||||
"""
|
||||
if self.getConfig("include_titlepage"):
|
||||
|
||||
if self.hasConfig("titlepage_start"):
|
||||
START = string.Template(self.getConfig("titlepage_start"))
|
||||
|
||||
if self.hasConfig("titlepage_entry"):
|
||||
ENTRY = string.Template(self.getConfig("titlepage_entry"))
|
||||
|
||||
if self.hasConfig("titlepage_end"):
|
||||
END = string.Template(self.getConfig("titlepage_end"))
|
||||
|
||||
if self.hasConfig("titlepage_wide_entry"):
|
||||
WIDE_ENTRY = string.Template(self.getConfig("titlepage_wide_entry"))
|
||||
|
||||
if self.hasConfig("titlepage_no_title_entry"):
|
||||
NO_TITLE_ENTRY = string.Template(self.getConfig("titlepage_no_title_entry"))
|
||||
|
||||
self._write(out,START.substitute(self.story.getAllMetadata()))
|
||||
|
||||
if WIDE_ENTRY==None:
|
||||
|
|
@ -120,11 +139,11 @@ class BaseStoryWriter(Configurable):
|
|||
if self.hasConfig(entry+"_label"):
|
||||
label=self.getConfig(entry+"_label")
|
||||
elif entry in self.titleLabels:
|
||||
logging.debug("Using fallback label for %s_label"%entry)
|
||||
logger.debug("Using fallback label for %s_label"%entry)
|
||||
label=self.titleLabels[entry]
|
||||
else:
|
||||
label="%s"%entry.title()
|
||||
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
|
||||
logger.debug("No known label for %s, fallback to '%s'"%(entry,label))
|
||||
|
||||
# If the label for the title entry is empty, use the
|
||||
# 'no title' option if there is one.
|
||||
|
|
@ -132,6 +151,7 @@ class BaseStoryWriter(Configurable):
|
|||
TEMPLATE= NO_TITLE_ENTRY
|
||||
|
||||
self._write(out,TEMPLATE.substitute({'label':label,
|
||||
'id':entry,
|
||||
'value':self.story.getMetadata(entry)}))
|
||||
else:
|
||||
self._write(out, entry)
|
||||
|
|
@ -146,11 +166,22 @@ class BaseStoryWriter(Configurable):
|
|||
"""
|
||||
# Only do TOC if there's more than one chapter and it's configured.
|
||||
if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
|
||||
if self.hasConfig("tocpage_start"):
|
||||
START = string.Template(self.getConfig("tocpage_start"))
|
||||
|
||||
if self.hasConfig("tocpage_entry"):
|
||||
ENTRY = string.Template(self.getConfig("tocpage_entry"))
|
||||
|
||||
if self.hasConfig("tocpage_end"):
|
||||
END = string.Template(self.getConfig("tocpage_end"))
|
||||
|
||||
self._write(out,START.substitute(self.story.getAllMetadata()))
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
for index, (title,html) in enumerate(self.story.getChapters(fortoc=True)):
|
||||
if html:
|
||||
self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
|
||||
self._write(out,ENTRY.substitute({'chapter':title,
|
||||
'number':index+1,
|
||||
'index':"%04d"%(index+1)}))
|
||||
|
||||
self._write(out,END.substitute(self.story.getAllMetadata()))
|
||||
|
||||
|
|
@ -161,6 +192,8 @@ class BaseStoryWriter(Configurable):
|
|||
if outfilename == None:
|
||||
outfilename=self.getOutputFileName()
|
||||
|
||||
self.outfilename = outfilename
|
||||
|
||||
# minor cheat, tucking css into metadata.
|
||||
if self.getConfig("output_css"):
|
||||
self.story.setMetadata("output_css",
|
||||
|
|
@ -171,11 +204,11 @@ class BaseStoryWriter(Configurable):
|
|||
|
||||
if not outstream:
|
||||
close=True
|
||||
logging.info("Save directly to file: %s" % outfilename)
|
||||
logger.info("Save directly to file: %s" % outfilename)
|
||||
if self.getConfig('make_directories'):
|
||||
path=""
|
||||
dirs = os.path.dirname(outfilename).split('/')
|
||||
for dir in dirs:
|
||||
outputdirs = os.path.dirname(outfilename).split('/')
|
||||
for dir in outputdirs:
|
||||
path+=dir+"/"
|
||||
if not os.path.exists(path):
|
||||
os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2?
|
||||
|
|
@ -198,7 +231,7 @@ class BaseStoryWriter(Configurable):
|
|||
outstream = open(outfilename,"wb")
|
||||
else:
|
||||
close=False
|
||||
logging.debug("Save to stream")
|
||||
logger.debug("Save to stream")
|
||||
|
||||
if not metaonly:
|
||||
self.story = self.adapter.getStory() # get full story now,
|
||||
|
|
@ -209,14 +242,14 @@ class BaseStoryWriter(Configurable):
|
|||
# fetch once.
|
||||
if self.getConfig('zip_output'):
|
||||
out = StringIO.StringIO()
|
||||
self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
|
||||
self.writeStoryImpl(out)
|
||||
zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
|
||||
zipout.writestr(self.getBaseFileName(),out.getvalue())
|
||||
self.zipout.writestr(self.getBaseFileName(),out.getvalue())
|
||||
# declares all the files created by Windows. otherwise, when
|
||||
# it runs in appengine, windows unzips the files as 000 perms.
|
||||
for zf in zipout.filelist:
|
||||
for zf in self.zipout.filelist:
|
||||
zf.create_system = 0
|
||||
zipout.close()
|
||||
self.zipout.close()
|
||||
out.close()
|
||||
else:
|
||||
self.writeStoryImpl(outstream)
|
||||
|
|
@ -224,6 +257,27 @@ class BaseStoryWriter(Configurable):
|
|||
if close:
|
||||
outstream.close()
|
||||
|
||||
def writeFile(self, filename, data):
|
||||
logger.debug("writeFile:%s"%filename)
|
||||
|
||||
if self.getConfig('zip_output'):
|
||||
outputdirs = os.path.dirname(self.getBaseFileName())
|
||||
if outputdirs:
|
||||
filename=outputdirs+'/'+filename
|
||||
self.zipout.writestr(filename,data)
|
||||
else:
|
||||
outputdirs = os.path.dirname(self.outfilename)
|
||||
if outputdirs:
|
||||
filename=outputdirs+'/'+filename
|
||||
|
||||
dir = os.path.dirname(filename)
|
||||
if not os.path.exists(dir):
|
||||
os.mkdir(dir) ## os.makedirs() doesn't work in 2.5.2?
|
||||
|
||||
outstream = open(filename,"wb")
|
||||
outstream.write(data)
|
||||
outstream.close()
|
||||
|
||||
def writeStoryImpl(self, out):
|
||||
"Must be overriden by sub classes."
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ from xml.dom.minidom import parse, parseString, getDOMImplementation
|
|||
from base_writer import *
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class EpubWriter(BaseStoryWriter):
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -151,8 +153,16 @@ ${value}<br />
|
|||
<h3>Update Log</h3>
|
||||
''')
|
||||
|
||||
self.EPUB_LOG_UPDATE_START = string.Template('''
|
||||
<p class='log_entry'>
|
||||
''')
|
||||
|
||||
self.EPUB_LOG_ENTRY = string.Template('''
|
||||
<b>${label}:</b> <span id="${id}">${value}</span>
|
||||
''')
|
||||
|
||||
self.EPUB_LOG_UPDATE_END = string.Template('''
|
||||
</p><hr />
|
||||
''')
|
||||
|
||||
self.EPUB_LOG_PAGE_END = string.Template('''
|
||||
|
|
@ -160,30 +170,50 @@ ${value}<br />
|
|||
</html>
|
||||
''')
|
||||
|
||||
self.EPUB_LOG_PAGE_END = string.Template('''
|
||||
</body>
|
||||
</html>
|
||||
''')
|
||||
|
||||
self.EPUB_COVER = string.Template('''
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"><head><title>Cover</title><style type="text/css" title="override_css">
|
||||
@page {padding: 0pt; margin:0pt}
|
||||
body { text-align: center; padding:0pt; margin: 0pt; }
|
||||
div { margin: 0pt; padding: 0pt; }
|
||||
</style></head><body><div>
|
||||
<img src="${coverimg}" alt="cover"/>
|
||||
</div></body></html>
|
||||
''')
|
||||
|
||||
def writeLogPage(self, out):
|
||||
"""
|
||||
XXX
|
||||
|
||||
|
||||
Write the log page, but only include entries that there's
|
||||
metadata for. START, ENTRY and END are expected to already by
|
||||
metadata for. START, ENTRY and END are expected to already be
|
||||
string.Template(). START and END are expected to use the same
|
||||
names as Story.metadata, but ENTRY should use id, label and value.
|
||||
"""
|
||||
if self.getConfig("include_logpage"):
|
||||
if self.hasConfig("logpage_start"):
|
||||
START = string.Template(self.getConfig("logpage_start"))
|
||||
else:
|
||||
START = self.EPUB_LOG_PAGE_START
|
||||
|
||||
# if there's a self.story.logfile, there's an existing log
|
||||
# to add to.
|
||||
if self.story.logfile:
|
||||
print("existing logfile found, appending")
|
||||
print("existing data:%s"%self._getLastLogData(self.story.logfile))
|
||||
replace_string = "</body>" # "</h3>"
|
||||
self._write(out,self.story.logfile.replace(replace_string,self._makeLogEntry(self._getLastLogData(self.story.logfile))+replace_string))
|
||||
else:
|
||||
# otherwise, write a new one.
|
||||
self._write(out,self.EPUB_LOG_PAGE_START.substitute(self.story.getAllMetadata()))
|
||||
self._write(out,self._makeLogEntry())
|
||||
self._write(out,self.EPUB_LOG_PAGE_END.substitute(self.story.getAllMetadata()))
|
||||
if self.hasConfig("logpage_end"):
|
||||
END = string.Template(self.getConfig("logpage_end"))
|
||||
else:
|
||||
END = self.EPUB_LOG_PAGE_END
|
||||
|
||||
# if there's a self.story.logfile, there's an existing log
|
||||
# to add to.
|
||||
if self.story.logfile:
|
||||
logger.debug("existing logfile found, appending")
|
||||
logger.debug("existing data:%s"%self._getLastLogData(self.story.logfile))
|
||||
replace_string = "</body>" # "</h3>"
|
||||
self._write(out,self.story.logfile.replace(replace_string,self._makeLogEntry(self._getLastLogData(self.story.logfile))+replace_string))
|
||||
else:
|
||||
# otherwise, write a new one.
|
||||
self._write(out,START.substitute(self.story.getAllMetadata()))
|
||||
self._write(out,self._makeLogEntry())
|
||||
self._write(out,END.substitute(self.story.getAllMetadata()))
|
||||
|
||||
# self parsing instead of Soup because it should be simple and not
|
||||
# worth the overhead.
|
||||
|
|
@ -206,7 +236,22 @@ ${value}<br />
|
|||
return values
|
||||
|
||||
def _makeLogEntry(self, oldvalues={}):
|
||||
retval = "<p class='log_entry'>"
|
||||
if self.hasConfig("logpage_update_start"):
|
||||
START = string.Template(self.getConfig("logpage_update_start"))
|
||||
else:
|
||||
START = self.EPUB_LOG_UPDATE_START
|
||||
|
||||
if self.hasConfig("logpage_entry"):
|
||||
ENTRY = string.Template(self.getConfig("logpage_entry"))
|
||||
else:
|
||||
ENTRY = self.EPUB_LOG_ENTRY
|
||||
|
||||
if self.hasConfig("logpage_update_end"):
|
||||
END = string.Template(self.getConfig("logpage_update_end"))
|
||||
else:
|
||||
END = self.EPUB_LOG_UPDATE_END
|
||||
|
||||
retval = START.substitute(self.story.getAllMetadata())
|
||||
|
||||
for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"):
|
||||
if self.isValidMetaEntry(entry):
|
||||
|
|
@ -215,22 +260,22 @@ ${value}<br />
|
|||
if self.hasConfig(entry+"_label"):
|
||||
label=self.getConfig(entry+"_label")
|
||||
elif entry in self.titleLabels:
|
||||
logging.debug("Using fallback label for %s_label"%entry)
|
||||
logger.debug("Using fallback label for %s_label"%entry)
|
||||
label=self.titleLabels[entry]
|
||||
else:
|
||||
label="%s"%entry.title()
|
||||
logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
|
||||
logger.debug("No known label for %s, fallback to '%s'"%(entry,label))
|
||||
|
||||
retval = retval + self.EPUB_LOG_ENTRY.substitute({'id':entry,
|
||||
'label':label,
|
||||
'value':val})
|
||||
retval = retval + ENTRY.substitute({'id':entry,
|
||||
'label':label,
|
||||
'value':val})
|
||||
else:
|
||||
# could be useful for introducing extra text, but
|
||||
# mostly it makes it easy to tell when you get the
|
||||
# keyword wrong.
|
||||
retval = retval + entry
|
||||
|
||||
retval = retval + "</p><hr />"
|
||||
retval = retval + END.substitute(self.story.getAllMetadata())
|
||||
|
||||
if self.getConfig('replace_hr'):
|
||||
retval = retval.replace("<hr />","<div class='center'>* * *</div>")
|
||||
|
|
@ -368,9 +413,9 @@ ${value}<br />
|
|||
guide = None
|
||||
coverIO = None
|
||||
|
||||
imgid = "image0000"
|
||||
coverimgid = "image0000"
|
||||
if not self.story.cover and self.story.oldcover:
|
||||
print("writer_epub: no new cover, has old cover, write image.")
|
||||
logger.debug("writer_epub: no new cover, has old cover, write image.")
|
||||
(oldcoverhtmlhref,
|
||||
oldcoverhtmltype,
|
||||
oldcoverhtmldata,
|
||||
|
|
@ -380,8 +425,8 @@ ${value}<br />
|
|||
outputepub.writestr(oldcoverhtmlhref,oldcoverhtmldata)
|
||||
outputepub.writestr(oldcoverimghref,oldcoverimgdata)
|
||||
|
||||
imgid = "image0"
|
||||
items.append((imgid,
|
||||
coverimgid = "image0"
|
||||
items.append((coverimgid,
|
||||
oldcoverimghref,
|
||||
oldcoverimgtype,
|
||||
None))
|
||||
|
|
@ -406,6 +451,10 @@ ${value}<br />
|
|||
imgmap['mime'],
|
||||
None))
|
||||
imgcount+=1
|
||||
if 'cover' in imgfile:
|
||||
# make sure coverimgid is set to the cover, not
|
||||
# just the first image.
|
||||
coverimgid = items[-1][0]
|
||||
|
||||
|
||||
items.append(("style","OEBPS/stylesheet.css","text/css",None))
|
||||
|
|
@ -417,7 +466,7 @@ ${value}<br />
|
|||
itemrefs.append("cover")
|
||||
#
|
||||
# <meta name="cover" content="cover.jpg"/>
|
||||
metadata.appendChild(newTag(contentdom,"meta",{"content":"image0000",
|
||||
metadata.appendChild(newTag(contentdom,"meta",{"content":coverimgid,
|
||||
"name":"cover"}))
|
||||
# cover stuff for later:
|
||||
# at end of <package>:
|
||||
|
|
@ -429,16 +478,12 @@ ${value}<br />
|
|||
"title":"Cover",
|
||||
"href":"OEBPS/cover.xhtml"}))
|
||||
|
||||
if self.hasConfig("cover_content"):
|
||||
COVER = string.Template(self.getConfig("cover_content"))
|
||||
else:
|
||||
COVER = self.EPUB_COVER
|
||||
coverIO = StringIO.StringIO()
|
||||
coverIO.write('''
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"><head><title>Cover</title><style type="text/css" title="override_css">
|
||||
@page {padding: 0pt; margin:0pt}
|
||||
body { text-align: center; padding:0pt; margin: 0pt; }
|
||||
div { margin: 0pt; padding: 0pt; }
|
||||
</style></head><body><div>
|
||||
<img src="%s" alt="cover"/>
|
||||
</div></body></html>
|
||||
'''%self.story.cover)
|
||||
coverIO.write(COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))
|
||||
|
||||
if self.getConfig("include_titlepage"):
|
||||
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
|
||||
|
|
@ -447,11 +492,15 @@ div { margin: 0pt; padding: 0pt; }
|
|||
items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
|
||||
itemrefs.append("toc_page")
|
||||
|
||||
if self.getConfig("include_logpage"):
|
||||
dologpage = ( self.getConfig("include_logpage") == "smart" and \
|
||||
(self.story.logfile or self.story.getMetadataRaw("status") == "In-Progress") ) \
|
||||
or self.getConfig("include_logpage") == "true"
|
||||
|
||||
if dologpage:
|
||||
items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log"))
|
||||
itemrefs.append("log_page")
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
for index, (title,html) in enumerate(self.story.getChapters(fortoc=True)):
|
||||
if html:
|
||||
i=index+1
|
||||
items.append(("file%04d"%i,
|
||||
|
|
@ -483,8 +532,8 @@ div { margin: 0pt; padding: 0pt; }
|
|||
contentxml = contentdom.toxml(encoding='utf-8')
|
||||
|
||||
# tweak for brain damaged Nook STR. Nook insists on name before content.
|
||||
contentxml = contentxml.replace('<meta content="%s" name="cover"/>'%imgid,
|
||||
'<meta name="cover" content="%s"/>'%imgid)
|
||||
contentxml = contentxml.replace('<meta content="%s" name="cover"/>'%coverimgid,
|
||||
'<meta name="cover" content="%s"/>'%coverimgid)
|
||||
outputepub.writestr("content.opf",contentxml)
|
||||
|
||||
contentdom.unlink()
|
||||
|
|
@ -582,17 +631,28 @@ div { margin: 0pt; padding: 0pt; }
|
|||
outputepub.writestr("OEBPS/toc_page.xhtml",tocpageIO.getvalue())
|
||||
tocpageIO.close()
|
||||
|
||||
# write log page.
|
||||
logpageIO = StringIO.StringIO()
|
||||
self.writeLogPage(logpageIO)
|
||||
if logpageIO.getvalue(): # will be false if no log page.
|
||||
if dologpage:
|
||||
# write log page.
|
||||
logpageIO = StringIO.StringIO()
|
||||
self.writeLogPage(logpageIO)
|
||||
outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
|
||||
logpageIO.close()
|
||||
logpageIO.close()
|
||||
|
||||
if self.hasConfig('chapter_start'):
|
||||
CHAPTER_START = string.Template(self.getConfig("chapter_start"))
|
||||
else:
|
||||
CHAPTER_START = self.EPUB_CHAPTER_START
|
||||
|
||||
if self.hasConfig('chapter_end'):
|
||||
CHAPTER_END = string.Template(self.getConfig("chapter_end"))
|
||||
else:
|
||||
CHAPTER_END = self.EPUB_CHAPTER_END
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
|
||||
logger.debug('Writing chapter text for: %s' % title)
|
||||
vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
|
||||
fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals)
|
||||
# ffnet(& maybe others) gives the whole chapter text
|
||||
# as one line. This causes problems for nook(at
|
||||
# least) when the chapter size starts getting big
|
||||
|
|
|
|||
|
|
@ -46,6 +46,10 @@ ${output_css}
|
|||
<h1><a href="${storyUrl}">${title}</a> by ${authorHTML}</h1>
|
||||
''')
|
||||
|
||||
self.HTML_COVER = string.Template('''
|
||||
<img src="${coverimg}" alt="cover" />
|
||||
''')
|
||||
|
||||
self.HTML_TITLE_PAGE_START = string.Template('''
|
||||
<table class="full">
|
||||
''')
|
||||
|
|
@ -75,6 +79,8 @@ ${output_css}
|
|||
<a name="section${index}"><h2>${chapter}</h2></a>
|
||||
''')
|
||||
|
||||
self.HTML_CHAPTER_END = string.Template('')
|
||||
|
||||
self.HTML_FILE_END = string.Template('''
|
||||
</body>
|
||||
</html>''')
|
||||
|
|
@ -82,8 +88,26 @@ ${output_css}
|
|||
|
||||
def writeStoryImpl(self, out):
|
||||
|
||||
self._write(out,self.HTML_FILE_START.substitute(self.story.getAllMetadata()))
|
||||
if self.hasConfig("cover_content"):
|
||||
COVER = string.Template(self.getConfig("cover_content"))
|
||||
else:
|
||||
COVER = self.HTML_COVER
|
||||
|
||||
if self.hasConfig('file_start'):
|
||||
FILE_START = string.Template(self.getConfig("file_start"))
|
||||
else:
|
||||
FILE_START = self.HTML_FILE_START
|
||||
|
||||
if self.hasConfig('file_end'):
|
||||
FILE_END = string.Template(self.getConfig("file_end"))
|
||||
else:
|
||||
FILE_END = self.HTML_FILE_END
|
||||
|
||||
self._write(out,FILE_START.substitute(self.story.getAllMetadata()))
|
||||
|
||||
if self.getConfig('include_images') and self.story.cover:
|
||||
self._write(out,COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))
|
||||
|
||||
self.writeTitlePage(out,
|
||||
self.HTML_TITLE_PAGE_START,
|
||||
self.HTML_TITLE_ENTRY,
|
||||
|
|
@ -94,10 +118,27 @@ ${output_css}
|
|||
self.HTML_TOC_ENTRY,
|
||||
self.HTML_TOC_PAGE_END)
|
||||
|
||||
if self.hasConfig('chapter_start'):
|
||||
CHAPTER_START = string.Template(self.getConfig("chapter_start"))
|
||||
else:
|
||||
CHAPTER_START = self.HTML_CHAPTER_START
|
||||
|
||||
if self.hasConfig('chapter_end'):
|
||||
CHAPTER_END = string.Template(self.getConfig("chapter_end"))
|
||||
else:
|
||||
CHAPTER_END = self.HTML_CHAPTER_END
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
|
||||
vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
|
||||
self._write(out,CHAPTER_START.substitute(vals))
|
||||
self._write(out,html)
|
||||
self._write(out,CHAPTER_END.substitute(vals))
|
||||
|
||||
self._write(out,self.HTML_FILE_END.substitute(self.story.getAllMetadata()))
|
||||
self._write(out,FILE_END.substitute(self.story.getAllMetadata()))
|
||||
|
||||
if self.getConfig('include_images'):
|
||||
for imgmap in self.story.getImgUrls():
|
||||
self.writeFile(imgmap['newsrc'],imgmap['data'])
|
||||
|
||||
|
|
|
|||
|
|
@ -88,27 +88,6 @@ ${value}<br />
|
|||
self.MOBI_TABLE_TITLE_PAGE_END = string.Template('''
|
||||
</table>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
''')
|
||||
|
||||
self.MOBI_TOC_PAGE_START = string.Template('''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>${title} by ${author}</title>
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<h3>Table of Contents</h3>
|
||||
''')
|
||||
|
||||
self.MOBI_TOC_ENTRY = string.Template('''
|
||||
<a href="file${index}.xhtml">${chapter}</a><br />
|
||||
''')
|
||||
|
||||
self.MOBI_TOC_PAGE_END = string.Template('''
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
''')
|
||||
|
|
@ -169,10 +148,21 @@ ${value}<br />
|
|||
# files.append(tocpageIO.getvalue())
|
||||
# tocpageIO.close()
|
||||
|
||||
if self.hasConfig('chapter_start'):
|
||||
CHAPTER_START = string.Template(self.getConfig("chapter_start"))
|
||||
else:
|
||||
CHAPTER_START = self.MOBI_CHAPTER_START
|
||||
|
||||
if self.hasConfig('chapter_end'):
|
||||
CHAPTER_END = string.Template(self.getConfig("chapter_end"))
|
||||
else:
|
||||
CHAPTER_END = self.MOBI_CHAPTER_END
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
|
||||
vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
|
||||
fullhtml = CHAPTER_START.substitute(vals) + html + CHAPTER_END.substitute(vals)
|
||||
# ffnet(& maybe others) gives the whole chapter text
|
||||
# as one line. This causes problems for nook(at
|
||||
# least) when the chapter size starts getting big
|
||||
|
|
|
|||
|
|
@ -98,6 +98,7 @@ ${chapter}
|
|||
\t${chapter}
|
||||
|
||||
''')
|
||||
self.TEXT_CHAPTER_END = string.Template(u'')
|
||||
|
||||
self.TEXT_FILE_END = string.Template(u'''
|
||||
|
||||
|
|
@ -114,7 +115,17 @@ End file.
|
|||
|
||||
wrapout = KludgeStringIO()
|
||||
|
||||
wrapout.write(self.TEXT_FILE_START.substitute(self.story.getAllMetadata()))
|
||||
if self.hasConfig("file_start"):
|
||||
FILE_START = string.Template(self.getConfig("file_start"))
|
||||
else:
|
||||
FILE_START = self.TEXT_FILE_START
|
||||
|
||||
if self.hasConfig("file_end"):
|
||||
FILE_END = string.Template(self.getConfig("file_end"))
|
||||
else:
|
||||
FILE_END = self.TEXT_FILE_END
|
||||
|
||||
wrapout.write(FILE_START.substitute(self.story.getAllMetadata()))
|
||||
|
||||
self.writeTitlePage(wrapout,
|
||||
self.TEXT_TITLE_PAGE_START,
|
||||
|
|
@ -133,13 +144,25 @@ End file.
|
|||
|
||||
self._write(out,self.lineends(self.wraplines(towrap)))
|
||||
|
||||
if self.hasConfig('chapter_start'):
|
||||
CHAPTER_START = string.Template(self.getConfig("chapter_start"))
|
||||
else:
|
||||
CHAPTER_START = self.TEXT_CHAPTER_START
|
||||
|
||||
if self.hasConfig('chapter_end'):
|
||||
CHAPTER_END = string.Template(self.getConfig("chapter_end"))
|
||||
else:
|
||||
CHAPTER_END = self.TEXT_CHAPTER_END
|
||||
|
||||
for index, (title,html) in enumerate(self.story.getChapters()):
|
||||
if html:
|
||||
logging.debug('Writing chapter text for: %s' % title)
|
||||
self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1})))))
|
||||
vals={'chapter':title, 'index':"%04d"%(index+1), 'number':index+1}
|
||||
self._write(out,self.lineends(self.wraplines(removeAllEntities(CHAPTER_START.substitute(vals)))))
|
||||
self._write(out,self.lineends(html2text(html,wrap_width=self.wrap_width)))
|
||||
self._write(out,self.lineends(self.wraplines(removeAllEntities(CHAPTER_END.substitute(vals)))))
|
||||
|
||||
self._write(out,self.lineends(self.wraplines(self.TEXT_FILE_END.substitute(self.story.getAllMetadata()))))
|
||||
self._write(out,self.lineends(self.wraplines(FILE_END.substitute(self.story.getAllMetadata()))))
|
||||
|
||||
def wraplines(self, text):
|
||||
|
||||
|
|
|
|||
13
index.html
13
index.html
|
|
@ -54,10 +54,6 @@
|
|||
much easier. </p>
|
||||
</div>
|
||||
<!-- put announcements here, h3 is a good title size. -->
|
||||
<h3>New Fixes</h3>
|
||||
<p>
|
||||
New version containing some bug fixes, and a couple metadata features.
|
||||
</p>
|
||||
<p>
|
||||
Questions? Check out our
|
||||
<a href="http://code.google.com/p/fanficdownloader/wiki/FanFictionDownloaderFAQs">FAQs</a>.
|
||||
|
|
@ -66,7 +62,7 @@
|
|||
If you have any problems with this application, please
|
||||
report them in
|
||||
the <a href="http://groups.google.com/group/fanfic-downloader">FanFictionDownLoader Google Group</a>. The
|
||||
<a href="http://4-4-25.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
|
||||
<a href="http://4-4-28.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
|
||||
</p>
|
||||
<div id='error'>
|
||||
{{ error_message }}
|
||||
|
|
@ -543,7 +539,14 @@
|
|||
Use the URL of the story's chapter list, such as
|
||||
<br /><a href="http://thehookupzone.net/CriminalMinds/viewstory.php?sid=1234">http://thehookupzone.net/CriminalMinds/viewstory.php?sid=1234</a>
|
||||
</dd>
|
||||
<dt>www.qaf-fic.com</dt>
|
||||
<dd>
|
||||
Use the URL of the story's chapter list, such as
|
||||
<br /><a href="http://www.qaf-fic.com/atp/viewstory.php?sid=1234">http://www.qaf-fic.com/atp/viewstory.php?sid=1234</a>
|
||||
</dd>
|
||||
</dl>
|
||||
|
||||
|
||||
<p>
|
||||
A few additional things to know, which will make your life substantially easier:
|
||||
</p>
|
||||
|
|
|
|||
|
|
@ -131,8 +131,10 @@ extratags: FanFiction
|
|||
## for regexp details.
|
||||
## Make sure to keep at least one space at the start of each line and
|
||||
## to escape % to %%, if used.
|
||||
## Two or three part lines. Two part effect everything.
|
||||
## Two, three or five part lines. Two part effect everything.
|
||||
## Three part effect only those key(s) lists.
|
||||
## *Five* part lines. Effect only when trailing conditional key=>regexp matches
|
||||
## metakey[,metakey]=>pattern=>replacement[&&metakey=>regexp]
|
||||
#replace_metadata:
|
||||
# genre,category=>Sci-Fi=>SF
|
||||
# Puella Magi Madoka Magica.* => Madoka
|
||||
|
|
@ -140,7 +142,9 @@ extratags: FanFiction
|
|||
# Crossover: (.*)=>\1
|
||||
# title=>(.*)Great(.*)=>\1Moderate\2
|
||||
# .*-Centered=>
|
||||
|
||||
# characters=>Sam W\.=>Sam Witwicky&&category=>Transformers
|
||||
# characters=>Sam W\.=>Sam Winchester&&category=>Supernatural
|
||||
|
||||
## Some readers don't show horizontal rule (<hr />) tags correctly.
|
||||
## This replaces them all with a centered '* * *'. (Note centering
|
||||
## doesn't work on some devices either.)
|
||||
|
|
@ -168,10 +172,15 @@ keep_summary_html:true
|
|||
## Don't like the numbers at the start of chapter titles on some
|
||||
## sites? You can use strip_chapter_numbers to strip them off. Just
|
||||
## want to make them all look the same? Strip them off, then add them
|
||||
## back on with add_chapter_numbers. Don't like the way it strips
|
||||
## numbers or adds them back? See chapter_title_strip_pattern and
|
||||
## chapter_title_add_pattern.
|
||||
## back on with add_chapter_numbers:true. Only want them added back
|
||||
## on for Table of Contents(toc)? Use add_chapter_numbers:toconly.
|
||||
## (toconly doesn't work on mobi output.) Don't like the way it
|
||||
## strips numbers or adds them back? See chapter_title_strip_pattern
|
||||
## and chapter_title_add_pattern.
|
||||
strip_chapter_numbers:false
|
||||
|
||||
## add_chapter_numbers can be true, false or toconly
|
||||
## (Note number is not added when there's only one chapter.)
|
||||
add_chapter_numbers:false
|
||||
|
||||
## (Two versions of chapter_title_strip_pattern are shown below. You
|
||||
|
|
@ -202,6 +211,17 @@ chapter_title_add_pattern:${index}. ${title}
|
|||
## Each output format has a section that overrides [defaults]
|
||||
[html]
|
||||
|
||||
## include images from img tags in the body and summary of
|
||||
## stories. Images will be converted to jpg for size if possible.
|
||||
## include_images is *only* available in epub and html output formats.
|
||||
## include_images is *not* available in the web service in any format.
|
||||
#include_images:false
|
||||
|
||||
## This switch prevents FFDL from doing any processing on the images.
|
||||
## Usually they would be converted to jpg, resized and optionally made
|
||||
## grayscale.
|
||||
no_image_processing: true
|
||||
|
||||
## output background color--only used by html and epub (and ignored in
|
||||
## epub by many readers). Included below in output_css--will be
|
||||
## ignored if not in output_css.
|
||||
|
|
@ -242,13 +262,17 @@ windows_eol: true
|
|||
## mobi generated from epub by calibre will have a TOC at the end.
|
||||
include_tocpage: false
|
||||
|
||||
## include a Update Log page before the story text. If included, the
|
||||
## log will be updated each time the epub is an all the metadata
|
||||
## include a Update Log page before the story text. If 'true', the
|
||||
## log will be updated each time the epub is and all the metadata
|
||||
## fields that have changed since the last update (typically
|
||||
## dateUpdated,numChapters,numWords at a minimum) will be shown.
|
||||
## Great for tracking when chapters came out and when the description,
|
||||
## etc changed.
|
||||
include_logpage: false
|
||||
## If set to 'smart', logpage will only be included if the story is
|
||||
## status:In-Progress or already had a logpage. That way you don't
|
||||
## end up with Completed stories that have just one logpage entry.
|
||||
#include_logpage: smart
|
||||
|
||||
## items to include in the log page Empty metadata entries, or those
|
||||
## that haven't changed since the last update, will *not* appear, even
|
||||
|
|
@ -301,6 +325,7 @@ output_css:
|
|||
|
||||
## include images from img tags in the body and summary of
|
||||
## stories. Images will be converted to jpg for size if possible.
|
||||
## include_images is *only* available in epub and html output format.
|
||||
#include_images:false
|
||||
|
||||
## If set, the first image found will be made the cover image. If
|
||||
|
|
@ -355,31 +380,31 @@ nook_img_fix:true
|
|||
## URLs like: http://test1.com?sid=12345
|
||||
[test1.com]
|
||||
extratags: FanFiction,Testing
|
||||
extracategories:Fafner
|
||||
extragenres:Romance,Fluff
|
||||
extracharacters:Reginald Smythe-Smythe,Mokona,Harry P.
|
||||
extraships:Smythe-Smythe/Mokona
|
||||
extrawarnings:Extreme Bogosity
|
||||
# extracategories:Fafner
|
||||
# extragenres:Romance,Fluff
|
||||
# extracharacters:Reginald Smythe-Smythe,Mokona,Harry P.
|
||||
# extraships:Smythe-Smythe/Mokona
|
||||
# extrawarnings:Extreme Bogosity
|
||||
|
||||
extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
# extra_valid_entries:metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
|
||||
include_in_compositeJ:dateCreated
|
||||
include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ
|
||||
include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated,
|
||||
# include_in_compositeJ:dateCreated
|
||||
# include_in_compositeK:metaC,listX,compositeL,compositeJ,compositeK,listZ
|
||||
# include_in_compositeL:ships,metaA,listZ,datePublished,dateUpdated,
|
||||
|
||||
extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
extra_subject_tags: metaA,metaB,metaC
|
||||
# extra_titlepage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
# extra_logpage_entries: metaA,metaB,metaC,listX,listY,listZ,compositeJ,compositeK,compositeL
|
||||
# extra_subject_tags: metaA,metaB,metaC
|
||||
|
||||
replace_metadata:
|
||||
compositeL=>Val=>VALUE
|
||||
series,extratags=>Test=>Plan
|
||||
Puella Magi Madoka Magica.* => Madoka
|
||||
Comedy=>Humor
|
||||
Crossover: (.*)=>\1
|
||||
(.*)Great(.*)=>\1Moderate\2
|
||||
.*-Centered=>
|
||||
characters=>Harry P\.=>Harry Potter
|
||||
# replace_metadata:
|
||||
# compositeL=>Val=>VALUE
|
||||
# series,extratags=>Test=>Plan
|
||||
# Puella Magi Madoka Magica.* => Madoka
|
||||
# Comedy=>Humor
|
||||
# Crossover: (.*)=>\1
|
||||
# (.*)Great(.*)=>\1Moderate\2
|
||||
# .*-Centered=>
|
||||
# characters=>Harry P\.=>Harry Potter
|
||||
|
||||
|
||||
## If necessary, you can define [<site>:<format>] sections to
|
||||
|
|
@ -557,9 +582,6 @@ cliches_label:Character Cliches
|
|||
# themes=>#bcolumn,a
|
||||
# timeline=>#ccolumn,n
|
||||
|
||||
## adds to include_subject_tags instead of replacing it.
|
||||
#extra_subject_tags: themes,timeline,cliches
|
||||
|
||||
[erosnsappho.sycophanthex.com]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Harry Potter
|
||||
|
|
@ -1015,6 +1037,15 @@ extracategories:Harry Potter
|
|||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Prison Break
|
||||
|
||||
[www.qaf-fic.com]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Queer as Folk
|
||||
|
||||
## Some sites do not require a login, but do require the user to
|
||||
## confirm they are adult for adult content. In commandline version,
|
||||
## this should go in your personal.ini, not defaults.ini.
|
||||
#is_adult:true
|
||||
|
||||
[www.scarvesandcoffee.net]
|
||||
## Site dedicated to these categories/characters/ships
|
||||
extracategories:Glee
|
||||
|
|
|
|||
Loading…
Reference in a new issue