mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-01-04 07:03:44 +01:00
Change logging styles & change adult check for jlaunlimited
This commit is contained in:
parent
8863e1be63
commit
ce4b03707d
2 changed files with 19 additions and 57 deletions
|
|
@ -16,7 +16,8 @@
|
|||
#
|
||||
|
||||
import time
|
||||
import logging
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -50,7 +51,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
|
|||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
# normalized story URL.
|
||||
self._setURL('http://www.' + self.getSiteDomain() + '/blog/archive/'+self.story.getMetadata('storyId')+'-'+m.group('name')+'/')
|
||||
else:
|
||||
|
|
@ -80,8 +81,8 @@ class InDeathNetAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
def getDateFromComponents(self, postmonth, postday):
|
||||
ym = re.search(re.compile(r"Entries\ in\ (?P<mon>January|February|March|April|May|June|July|August|September|October|November|December)\ (?P<year>\d{4})"),postmonth)
|
||||
d = re.search(re.compile(r"(?P<day>\d{2})\ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"),postday)
|
||||
ym = re.search("Entries\ in\ (?P<mon>January|February|March|April|May|June|July|August|September|October|November|December)\ (?P<year>\d{4})",postmonth)
|
||||
d = re.search("(?P<day>\d{2})\ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)",postday)
|
||||
postdate = makeDate(d.group('day')+' '+ym.group('mon')+' '+ym.group('year'),self.dateformat)
|
||||
return postdate
|
||||
|
||||
|
|
@ -115,7 +116,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# Find authorid and URL from first link in Recent Entries (don't yet reference 'recent entries' - let's see if that is required)
|
||||
a = soup.find('a', href=re.compile(r"http://www.indeath.net/user/\d+\-[a-z0-9]+/$")) #http://www.indeath.net/user/9083-cyrex/
|
||||
m = re.search(re.compile(r'http://www.indeath.net/user/(?P<id>\d+)\-(?P<name>[a-z0-9]*)/$'),a['href'])
|
||||
m = re.search('http://www.indeath.net/user/(?P<id>\d+)\-(?P<name>[a-z0-9]*)/$',a['href'])
|
||||
self.story.setMetadata('authorId',m.group('id'))
|
||||
self.story.setMetadata('authorUrl',a['href'])
|
||||
self.story.setMetadata('author',m.group('name'))
|
||||
|
|
@ -143,7 +144,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# Process List of Chapters
|
||||
self.story.setMetadata('numChapters',len(chapters))
|
||||
logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters'))
|
||||
for x in range(0,len(chapters)):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
chapter=chapters[x]
|
||||
|
|
@ -151,7 +152,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
|
|||
self.chapterUrls.append((self.story.getMetadata('title'),chapter['href']))
|
||||
else:
|
||||
ct = stripHTML(chapter)
|
||||
tnew = re.match(re.compile(r"(?i)"+self.story.getMetadata('title')+r" - (?P<newtitle>.*)$"),ct)
|
||||
tnew = re.match("(?i)"+self.story.getMetadata('title')+r" - (?P<newtitle>.*)$",ct)
|
||||
if tnew:
|
||||
chaptertitle = tnew.group('newtitle')
|
||||
else:
|
||||
|
|
@ -163,7 +164,7 @@ class InDeathNetAdapter(BaseSiteAdapter):
|
|||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
#chapter=bs.BeautifulSoup('<div class="story"></div>')
|
||||
data = self._fetchUrl(url)
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@
|
|||
#
|
||||
|
||||
import time
|
||||
import logging
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
|
|
@ -47,7 +48,7 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter):
|
|||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
logging.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
|
||||
|
||||
self._setURL('http://' + self.getSiteDomain() + '/eFiction1.1/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
|
|
@ -69,41 +70,7 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/eFiction1.1/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
# ## Login seems to be reasonably standard across eFiction sites. This story is in The Bedchamber
|
||||
# def needToLoginCheck(self, data):
|
||||
# if 'This story is in The Bedchamber' in data \
|
||||
# or 'That username is not in our database' in data \
|
||||
# or "That password is not correct, please try again" in data:
|
||||
# return True
|
||||
# else:
|
||||
# return False
|
||||
#
|
||||
# def performLogin(self, url):
|
||||
# params = {}
|
||||
#
|
||||
# if self.password:
|
||||
# params['name'] = self.username
|
||||
# params['pass'] = self.password
|
||||
# else:
|
||||
# params['name'] = self.getConfig("username")
|
||||
# params['pass'] = self.getConfig("password")
|
||||
# params['login'] = 'yes'
|
||||
# params['submit'] = 'login'
|
||||
#
|
||||
# loginUrl = 'http://' + self.getSiteDomain()+'/login.php'
|
||||
# d = self._fetchUrl(loginUrl,params)
|
||||
# e = self._fetchUrl(url)
|
||||
#
|
||||
# if "Welcome back," not in d : #Member Account
|
||||
# logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
# params['name']))
|
||||
# raise exceptions.FailedToLogin(url,params['name'])
|
||||
# return False
|
||||
# elif "This story is in The Bedchamber" in e:
|
||||
# raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Your account does not have sufficient priviliges to read this story.")
|
||||
# return False
|
||||
# else:
|
||||
# return True
|
||||
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
|
|
@ -114,14 +81,15 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter):
|
|||
# If the title search below fails, there's a good chance
|
||||
# you need a different number. print data at that point
|
||||
# and see what the 'click here to continue' url says.
|
||||
addurl = "&ageconsent=ok&warning=4" # XXX
|
||||
addurl = "&ageconsent=ok&warning=5" # XXX
|
||||
else:
|
||||
addurl=""
|
||||
addurl=""
|
||||
print addurl
|
||||
|
||||
# index=1 makes sure we see the story chapter index. Some
|
||||
# sites skip that for one-chapter stories.
|
||||
url = self.url+'&index=1'+addurl
|
||||
logging.debug("URL: "+url)
|
||||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
|
|
@ -131,20 +99,13 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
raise e
|
||||
|
||||
# if self.needToLoginCheck(data):
|
||||
# # need to log in for this one.
|
||||
# self.performLogin(url)
|
||||
# data = self._fetchUrl(url)
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
# the title search to troubleshoot.
|
||||
if "I am 18 or older" in data: # XXX
|
||||
if "I am 18 or older" in data or "Not suitable for readers under 17 years of age" in data:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
if "Not suitable for readers under 17 years of age" in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Not suitable for readers under 17 years of age")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = bs.BeautifulSoup(data)
|
||||
# print data
|
||||
|
|
@ -270,7 +231,7 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
|
|
|||
Loading…
Reference in a new issue