From bbdf2fb0030bbec1be56eed53cc69bfc0065f969 Mon Sep 17 00:00:00 2001 From: botmtl Date: Sat, 20 May 2017 20:28:40 -0400 Subject: [PATCH] Removed most of the logging (kept 4, one being the big one to API_STORY_INFO) All str.replace calls now use the same method. --- fanficfare/adapters/adapter_wattpadcom.py | 59 ++++++----------------- 1 file changed, 16 insertions(+), 43 deletions(-) diff --git a/fanficfare/adapters/adapter_wattpadcom.py b/fanficfare/adapters/adapter_wattpadcom.py index 9a93f4e2..9d347833 100644 --- a/fanficfare/adapters/adapter_wattpadcom.py +++ b/fanficfare/adapters/adapter_wattpadcom.py @@ -22,24 +22,22 @@ import re from .base_adapter import BaseSiteAdapter, makeDate from .. import exceptions as exceptions - logger = logging.getLogger(__name__) - class WattpadComAdapter(BaseSiteAdapter): # All the API discovery work done by github user de3sw2aq1 # Source: https://github.com/de3sw2aq1/wattpad-ebook-scraper/blob/master/scrape.py API_GETCATEGORIES = 'https://www.wattpad.com/apiv2/getcategories' - API_STORYINFO = 'https://www.wattpad.com/api/v3/stories/{0}' # SAME URL WITH /stories?id=X is NIT the same - API_STORYTEXT = 'https://www.wattpad.com/apiv2/storytext?id={0}' - API_CHAPTERINFO = 'https://www.wattpad.com/apiv2/info?id={0}' + API_STORYINFO = 'https://www.wattpad.com/api/v3/stories/%s' # stories?id=X is NOT the same + API_STORYTEXT = 'https://www.wattpad.com/apiv2/storytext?id=%s' + API_CHAPTERINFO = 'https://www.wattpad.com/apiv2/info?id=%s' CATEGORY_DEFs = None def __init__(self, config, url): BaseSiteAdapter.__init__(self, config, url) - self.storyId = str(self.getStoryId(url)) + self.storyId = unicode(self.getStoryId(url)) self.story.setMetadata('storyId', self.storyId) - self._setURL('https://www.wattpad.com/story/{0}'.format(self.storyId)) + self._setURL('https://www.wattpad.com/story/%s' % self.storyId) # categoryDefs do not change all that often, if at all. Could be put in a constant, leaving it as a class var for now # note: classvar may be useless because of del adapter @@ -47,7 +45,8 @@ class WattpadComAdapter(BaseSiteAdapter): try: WattpadComAdapter.CATEGORY_DEFs = json.loads(self._fetchUrl(WattpadComAdapter.API_GETCATEGORIES)) except: - logger.debug('Something went wrong trying to fetch the category definitions (API_GETCATEGORIES)') + logger.debug('API_GETCATEGORIES failed.') + WattpadComAdapter.CATEGORY_DEFs = [] @staticmethod def getSiteDomain(): @@ -79,94 +78,68 @@ class WattpadComAdapter(BaseSiteAdapter): def getStoryId(self, url): storyIdInUrl = re.match('https://www\.wattpad\.com/story/(?P\d+).*', url) if storyIdInUrl is not None: - logger.debug('storyId found in given url:{0}'.format(storyIdInUrl)) return storyIdInUrl.group("storyId") else: - chapterIdInUrl = re.match('https://www\.wattpad\.com/(?P\d+).*', url) - logger.debug('call API_CHAPTER_INFO for:{0}'.format(chapterIdInUrl)) - chapterInfo = json.loads(self._fetchUrl(WattpadComAdapter.API_CHAPTERINFO.format(chapterIdInUrl.group('storyId')))) + chapterIdInUrl = re.match('https://www\.wattpad\.com/(?P\d+).*', url) + chapterInfo = json.loads(self._fetchUrl(WattpadComAdapter.API_CHAPTERINFO % chapterIdInUrl.group('chapterId'))) groupid = chapterInfo.get('groupId', None) - logger.debug('API_CHAPTER_INFO returned {0} for chapterId {1}'.format(groupid, chapterIdInUrl)) if groupid is None: raise exceptions.StoryDoesNotExist(url) else: return groupid def doExtractChapterUrlsAndMetadata(self, get_cover=True): - # API_STORYINFO - logger.debug('Start of metadata extraction for storyId:' + self.storyId) try: - storyInfo = json.loads(self._fetchUrl(WattpadComAdapter.API_STORYINFO.format(self.storyId))) - logger.debug('API_STORYINFO returned:' + json.dumps(storyInfo, indent=1)) - except Exception, e: + storyInfo = json.loads(self._fetchUrl(WattpadComAdapter.API_STORYINFO % self.storyId)) + logger.debug('storyInfo: %s' % json.dumps(storyInfo)) + except Exception: raise exceptions.InvalidStoryURL(self.url, self.getSiteDomain(), self.getSiteExampleURLs()) - if not self.getConfig('is_adult') and storyInfo['mature'] == True: - logger.debug('config is_adult is false but storyInfo[mature] is true') + if not (self.is_adult or self.getConfig("is_adult")) and storyInfo['mature'] == True: raise exceptions.AdultCheckRequired(self.url) # title - logger.debug('Found Title' + storyInfo['title']) self.story.setMetadata('title', storyInfo['title']) # author - logger.debug('authorId (and author name)' + storyInfo['user']['name']) self.story.setMetadata('authorId', storyInfo['user']['name']) self.story.setMetadata('author', storyInfo['user']['name']) - logger.debug('authorURL: ' + 'https://www.wattpad.com/user/{0}'.format(storyInfo['user']['name'])) - self.story.setMetadata('authorUrl', 'https://www.wattpad.com/user/{0}'.format(storyInfo['user']['name'])) - logger.debug('Story read count: {0}'.format(storyInfo['readCount'])) + self.story.setMetadata('authorUrl', 'https://www.wattpad.com/user/%s' % storyInfo['user']['name']) self.story.setMetadata('reads', storyInfo['readCount']) # STATUS - logger.debug('status: ' + str(storyInfo['completed'])) self.story.setMetadata('status', 'In-Progress') if storyInfo['completed']: self.story.setMetadata('status', 'Completed') # DESCRIPTION - logger.debug('description: ' + storyInfo['description']) self.setDescription(storyInfo['url'], storyInfo['description']) # DATES - logger.debug('dateUpdated: ' + storyInfo['modifyDate']) self.story.setMetadata('dateUpdated', makeDate(storyInfo['modifyDate'].rstrip('Z'), "%Y-%m-%dT%H:%M:%S")) - logger.debug('datePublished: ' + storyInfo['createDate']) self.story.setMetadata('datePublished', makeDate(storyInfo['createDate'].rstrip('Z'), "%Y-%m-%dT%H:%M:%S")) self.chapterUrls = [(part['title'], part['url']) for part in storyInfo['parts']] - logger.debug('chapterUrls:' + str(storyInfo['parts'])) self.story.setMetadata('numChapters', len(self.chapterUrls)) - - logger.debug('Cover: {0}'.format(storyInfo['cover'])) self.setCoverImage(storyInfo['url'], storyInfo['cover']) - - logger.debug('Language: ', storyInfo['language']['name']) self.story.setMetadata('language', storyInfo['language']['name']) # CATEGORIES - # there should be only one category per book, but the data structure allows for more try: - logger.debug('Category Keys: ' + str(storyInfo['categories'])) storyCategories = [WattpadComAdapter.CATEGORY_DEFs.get(str(c)) for c in storyInfo['categories'] if WattpadComAdapter.CATEGORY_DEFs.has_key(str(c))] - logger.debug('Categories from Category Keys: {0}.'.format(str(storyCategories))) - logger.debug('Tags: {0}.'.format(str(storyInfo['tags']))) tags = storyCategories + storyInfo['tags'] - logger.debug('Tags + Categories = ', str(tags)) self.story.setMetadata('tags', tags) except: - logger.debug('Conversion from category keys to tags failed.') pass return self.extractChapterUrlsAndMetadata() def getChapterText(self, url): - logger.debug('Getting chapter text from: %s' % url) + logger.debug('%s' % url) chapterID = re.search(u'https://www.wattpad.com/(?P\d+).*', url).group('chapterID') - return self._fetchUrl(WattpadComAdapter.API_STORYTEXT.format(chapterID)) - + return self._fetchUrl(WattpadComAdapter.API_STORYTEXT % chapterID) # adapter self-dicovery is not implemented in fanficfare (it existed for the previous project) def getClass():