From ce4b03707dd075ff3caf2b984b8e685fccaa2f0c Mon Sep 17 00:00:00 2001 From: Besnef Date: Wed, 24 Oct 2012 20:32:05 -0400 Subject: [PATCH] Change logging styles & change adult check for jlaunlimited --- .../adapters/adapter_indeathnet.py | 17 +++--- .../adapters/adapter_jlaunlimitedcom.py | 59 ++++--------------- 2 files changed, 19 insertions(+), 57 deletions(-) diff --git a/fanficdownloader/adapters/adapter_indeathnet.py b/fanficdownloader/adapters/adapter_indeathnet.py index 03ea3477..b1281499 100644 --- a/fanficdownloader/adapters/adapter_indeathnet.py +++ b/fanficdownloader/adapters/adapter_indeathnet.py @@ -16,7 +16,8 @@ # import time -import logging +import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -50,7 +51,7 @@ class InDeathNetAdapter(BaseSiteAdapter): m = re.match(self.getSiteURLPattern(),url) if m: self.story.setMetadata('storyId',m.group('id')) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. self._setURL('http://www.' + self.getSiteDomain() + '/blog/archive/'+self.story.getMetadata('storyId')+'-'+m.group('name')+'/') else: @@ -80,8 +81,8 @@ class InDeathNetAdapter(BaseSiteAdapter): def getDateFromComponents(self, postmonth, postday): - ym = re.search(re.compile(r"Entries\ in\ (?PJanuary|February|March|April|May|June|July|August|September|October|November|December)\ (?P\d{4})"),postmonth) - d = re.search(re.compile(r"(?P\d{2})\ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"),postday) + ym = re.search("Entries\ in\ (?PJanuary|February|March|April|May|June|July|August|September|October|November|December)\ (?P\d{4})",postmonth) + d = re.search("(?P\d{2})\ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)",postday) postdate = makeDate(d.group('day')+' '+ym.group('mon')+' '+ym.group('year'),self.dateformat) return postdate @@ -115,7 +116,7 @@ class InDeathNetAdapter(BaseSiteAdapter): # Find authorid and URL from first link in Recent Entries (don't yet reference 'recent entries' - let's see if that is required) a = soup.find('a', href=re.compile(r"http://www.indeath.net/user/\d+\-[a-z0-9]+/$")) #http://www.indeath.net/user/9083-cyrex/ - m = re.search(re.compile(r'http://www.indeath.net/user/(?P\d+)\-(?P[a-z0-9]*)/$'),a['href']) + m = re.search('http://www.indeath.net/user/(?P\d+)\-(?P[a-z0-9]*)/$',a['href']) self.story.setMetadata('authorId',m.group('id')) self.story.setMetadata('authorUrl',a['href']) self.story.setMetadata('author',m.group('name')) @@ -143,7 +144,7 @@ class InDeathNetAdapter(BaseSiteAdapter): # Process List of Chapters self.story.setMetadata('numChapters',len(chapters)) - logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) + logger.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) for x in range(0,len(chapters)): # just in case there's tags, like in chapter titles. chapter=chapters[x] @@ -151,7 +152,7 @@ class InDeathNetAdapter(BaseSiteAdapter): self.chapterUrls.append((self.story.getMetadata('title'),chapter['href'])) else: ct = stripHTML(chapter) - tnew = re.match(re.compile(r"(?i)"+self.story.getMetadata('title')+r" - (?P.*)$"),ct) + tnew = re.match("(?i)"+self.story.getMetadata('title')+r" - (?P.*)$",ct) if tnew: chaptertitle = tnew.group('newtitle') else: @@ -163,7 +164,7 @@ class InDeathNetAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) #chapter=bs.BeautifulSoup('
') data = self._fetchUrl(url) diff --git a/fanficdownloader/adapters/adapter_jlaunlimitedcom.py b/fanficdownloader/adapters/adapter_jlaunlimitedcom.py index 7b369059..3ed9922a 100644 --- a/fanficdownloader/adapters/adapter_jlaunlimitedcom.py +++ b/fanficdownloader/adapters/adapter_jlaunlimitedcom.py @@ -16,7 +16,8 @@ # import time -import logging +import logging +logger = logging.getLogger(__name__) import re import urllib2 @@ -47,7 +48,7 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + logger.debug("storyId: (%s)"%self.story.getMetadata('storyId')) self._setURL('http://' + self.getSiteDomain() + '/eFiction1.1/viewstory.php?sid='+self.story.getMetadata('storyId')) @@ -69,41 +70,7 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter): def getSiteURLPattern(self): return re.escape("http://"+self.getSiteDomain()+"/eFiction1.1/viewstory.php?sid=")+r"\d+$" -# ## Login seems to be reasonably standard across eFiction sites. This story is in The Bedchamber -# def needToLoginCheck(self, data): -# if 'This story is in The Bedchamber' in data \ -# or 'That username is not in our database' in data \ -# or "That password is not correct, please try again" in data: -# return True -# else: -# return False -# -# def performLogin(self, url): -# params = {} -# -# if self.password: -# params['name'] = self.username -# params['pass'] = self.password -# else: -# params['name'] = self.getConfig("username") -# params['pass'] = self.getConfig("password") -# params['login'] = 'yes' -# params['submit'] = 'login' -# -# loginUrl = 'http://' + self.getSiteDomain()+'/login.php' -# d = self._fetchUrl(loginUrl,params) -# e = self._fetchUrl(url) -# -# if "Welcome back," not in d : #Member Account -# logging.info("Failed to login to URL %s as %s" % (loginUrl, -# params['name'])) -# raise exceptions.FailedToLogin(url,params['name']) -# return False -# elif "This story is in The Bedchamber" in e: -# raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Your account does not have sufficient priviliges to read this story.") -# return False -# else: -# return True + ## Getting the chapter list and the meta data, plus 'is adult' checking. @@ -114,14 +81,15 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter): # If the title search below fails, there's a good chance # you need a different number. print data at that point # and see what the 'click here to continue' url says. - addurl = "&ageconsent=ok&warning=4" # XXX + addurl = "&ageconsent=ok&warning=5" # XXX else: - addurl="" + addurl="" + print addurl # index=1 makes sure we see the story chapter index. Some # sites skip that for one-chapter stories. url = self.url+'&index=1'+addurl - logging.debug("URL: "+url) + logger.debug("URL: "+url) try: data = self._fetchUrl(url) @@ -131,20 +99,13 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter): else: raise e -# if self.needToLoginCheck(data): -# # need to log in for this one. -# self.performLogin(url) -# data = self._fetchUrl(url) # The actual text that is used to announce you need to be an # adult varies from site to site. Again, print data before # the title search to troubleshoot. - if "I am 18 or older" in data: # XXX + if "I am 18 or older" in data or "Not suitable for readers under 17 years of age" in data: raise exceptions.AdultCheckRequired(self.url) - if "Not suitable for readers under 17 years of age" in data: - raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Not suitable for readers under 17 years of age") - # use BeautifulSoup HTML parser to make everything easier to find. soup = bs.BeautifulSoup(data) # print data @@ -270,7 +231,7 @@ class JLAUnlimitedComAdapter(BaseSiteAdapter): # grab the text for an individual chapter. def getChapterText(self, url): - logging.debug('Getting chapter text from: %s' % url) + logger.debug('Getting chapter text from: %s' % url) soup = bs.BeautifulSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.