From e8dba4e56580be7114cb5108facbc21277b75a5f Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 18 Jan 2017 12:32:07 -0600 Subject: [PATCH] adapter_bloodshedversecom needs to be able to change storyId. --- fanficfare/adapters/adapter_bloodshedversecom.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fanficfare/adapters/adapter_bloodshedversecom.py b/fanficfare/adapters/adapter_bloodshedversecom.py index 57749d55..9f14a15c 100644 --- a/fanficfare/adapters/adapter_bloodshedversecom.py +++ b/fanficfare/adapters/adapter_bloodshedversecom.py @@ -3,6 +3,9 @@ import re import urllib2 import urlparse +import logging +logger = logging.getLogger(__name__) + from bs4 import BeautifulSoup from ..htmlcleanup import stripHTML @@ -70,6 +73,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter): return url def extractChapterUrlsAndMetadata(self): + logger.debug("URL: "+self.url) soup = self._customized_fetch_url(self.url) # Since no 404 error code we have to raise the exception ourselves. @@ -83,12 +87,22 @@ class BloodshedverseComAdapter(BaseSiteAdapter): url = self.READ_URL_TEMPLATE % option['value'] self.chapterUrls.append((title, url)) + # Reset the storyId to be the first chapter no. Needed + # because emails contain link to later chapters instead. + query_data = urlparse.parse_qs(self.chapterUrls[0][1]) + story_no = query_data['no'][0] + + self.story.setMetadata('storyId', story_no) + self._setURL(self.READ_URL_TEMPLATE % story_no) + logger.info("updated storyId:%s"%story_no) + logger.info("updated storyUrl:%s"%self.url) + + story_no = self.story.getMetadata('storyId') # Get the URL to the author's page and find the correct story entry to # scrape the metadata author_url = urlparse.urljoin(self.url, soup.find('a', {'class': 'headline'})['href']) soup = self._customized_fetch_url(author_url) - story_no = self.story.getMetadata('storyId') # Ignore first list_box div, it only contains the author information for list_box in soup('div', {'class': 'list_box'})[1:]: url = list_box.find('a', {'class': 'fictitle'})['href']