Change logging styles & change adult check for jlaunlimited

This commit is contained in:
Besnef 2012-10-24 20:32:05 -04:00
parent 8863e1be63
commit ce4b03707d
2 changed files with 19 additions and 57 deletions

View file

@ -16,7 +16,8 @@
#
import time
import logging
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -50,7 +51,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('storyId',m.group('id'))
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
# normalized story URL.
self._setURL('http://www.' + self.getSiteDomain() + '/blog/archive/'+self.story.getMetadata('storyId')+'-'+m.group('name')+'/')
else:
@ -80,8 +81,8 @@ class InDeathNetAdapter(BaseSiteAdapter):
def getDateFromComponents(self, postmonth, postday):
ym = re.search(re.compile(r"Entries\ in\ (?P<mon>January|February|March|April|May|June|July|August|September|October|November|December)\ (?P<year>\d{4})"),postmonth)
d = re.search(re.compile(r"(?P<day>\d{2})\ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"),postday)
ym = re.search("Entries\ in\ (?P<mon>January|February|March|April|May|June|July|August|September|October|November|December)\ (?P<year>\d{4})",postmonth)
d = re.search("(?P<day>\d{2})\ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)",postday)
postdate = makeDate(d.group('day')+' '+ym.group('mon')+' '+ym.group('year'),self.dateformat)
return postdate
@ -115,7 +116,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
# Find authorid and URL from first link in Recent Entries (don't yet reference 'recent entries' - let's see if that is required)
a = soup.find('a', href=re.compile(r"http://www.indeath.net/user/\d+\-[a-z0-9]+/$")) #http://www.indeath.net/user/9083-cyrex/
m = re.search(re.compile(r'http://www.indeath.net/user/(?P<id>\d+)\-(?P<name>[a-z0-9]*)/$'),a['href'])
m = re.search('http://www.indeath.net/user/(?P<id>\d+)\-(?P<name>[a-z0-9]*)/$',a['href'])
self.story.setMetadata('authorId',m.group('id'))
self.story.setMetadata('authorUrl',a['href'])
self.story.setMetadata('author',m.group('name'))
@ -143,7 +144,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
# Process List of Chapters
self.story.setMetadata('numChapters',len(chapters))
logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
for x in range(0,len(chapters)):
# just in case there's tags, like <i> in chapter titles.
chapter=chapters[x]
@ -151,7 +152,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
self.chapterUrls.append((self.story.getMetadata('title'),chapter['href']))
else:
ct = stripHTML(chapter)
tnew = re.match(re.compile(r"(?i)"+self.story.getMetadata('title')+r" - (?P<newtitle>.*)$"),ct)
tnew = re.match("(?i)"+self.story.getMetadata('title')+r" - (?P<newtitle>.*)$",ct)
if tnew:
chaptertitle = tnew.group('newtitle')
else:
@ -163,7 +164,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
#chapter=bs.BeautifulSoup('<div class="story"></div>')
data = self._fetchUrl(url)

View file

@ -16,7 +16,8 @@
#
import time
import logging
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
@ -47,7 +48,7 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
self._setURL('http://' + self.getSiteDomain() + '/eFiction1.1/viewstory.php?sid='+self.story.getMetadata('storyId'))
@ -69,41 +70,7 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/eFiction1.1/viewstory.php?sid=")+r"\d+$"
# ## Login seems to be reasonably standard across eFiction sites. This story is in The Bedchamber
# def needToLoginCheck(self, data):
# if 'This story is in The Bedchamber' in data \
# or 'That username is not in our database' in data \
# or "That password is not correct, please try again" in data:
# return True
# else:
# return False
#
# def performLogin(self, url):
# params = {}
#
# if self.password:
# params['name'] = self.username
# params['pass'] = self.password
# else:
# params['name'] = self.getConfig("username")
# params['pass'] = self.getConfig("password")
# params['login'] = 'yes'
# params['submit'] = 'login'
#
# loginUrl = 'http://' + self.getSiteDomain()+'/login.php'
# d = self._fetchUrl(loginUrl,params)
# e = self._fetchUrl(url)
#
# if "Welcome back," not in d : #Member Account
# logging.info("Failed to login to URL %s as %s" % (loginUrl,
# params['name']))
# raise exceptions.FailedToLogin(url,params['name'])
# return False
# elif "This story is in The Bedchamber" in e:
# raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Your account does not have sufficient priviliges to read this story.")
# return False
# else:
# return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
@ -114,14 +81,15 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter):
# If the title search below fails, there's a good chance
# you need a different number. print data at that point
# and see what the 'click here to continue' url says.
addurl = "&ageconsent=ok&warning=4" # XXX
addurl = "&ageconsent=ok&warning=5" # XXX
else:
addurl=""
addurl=""
print addurl
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url+'&index=1'+addurl
logging.debug("URL: "+url)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
@ -131,20 +99,13 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter):
else:
raise e
# if self.needToLoginCheck(data):
# # need to log in for this one.
# self.performLogin(url)
# data = self._fetchUrl(url)
# The actual text that is used to announce you need to be an
# adult varies from site to site. Again, print data before
# the title search to troubleshoot.
if "I am 18 or older" in data: # XXX
if "I am 18 or older" in data or "Not suitable for readers under 17 years of age" in data:
raise exceptions.AdultCheckRequired(self.url)
if "Not suitable for readers under 17 years of age" in data:
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Not suitable for readers under 17 years of age")
# use BeautifulSoup HTML parser to make everything easier to find.
soup = bs.BeautifulSoup(data)
# print data
@ -270,7 +231,7 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
logger.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.