From 33d1fabd11915ecc1e64e74e42764bdadee32feb Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Wed, 10 Nov 2010 10:18:46 -0600 Subject: [PATCH] Changeset Tag Branch User Date Summary 53:c93e07566456 wsuetholz 2010-11-10 10:11 Ficwad wants you to login in order to view some stories.. They old ficwad.py got around that by starting with the first chapter instead of the story index page. Since I needed the story index page I had changed it to switch to that page, and then scrape the chapter information from there, which doesn't work if the chapters are blocked. While it still won't work if you pass in the URL for the story index page, I now switch back to the page that you passed in originally when looking for the chapters to download.. The one problem I have with this, is I change the self.url to the story index page, which should probably remain so that we have a consistent self.url even if the user starts with chapter 9 this time instead of chapter 1. --- ficwad.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ficwad.py b/ficwad.py index 133d424a..4fa0b87d 100644 --- a/ficwad.py +++ b/ficwad.py @@ -71,6 +71,7 @@ class FicWad(FanfictionSiteAdapter): self.storyUserRating = '0' self.storyCharacters = [] self.storySeries = '' + oldurl = '' data = u2.urlopen(self.url).read() soup = bs.BeautifulStoneSoup(data) @@ -84,6 +85,7 @@ class FicWad(FanfictionSiteAdapter): if u1 == "story": # This page does not have the correct information on it.. Need to get the Story Title Page logging.debug('URL %s is a chapter URL. Getting Title Page http://%s/%s/%s.' % (self.url, self.host, u1, storyid)) + oldurl = self.url self.url = 'http://' + self.host + '/' + u1 + '/' + storyid data = u2.urlopen(self.url).read() soup = bs.BeautifulStoneSoup(data) @@ -177,6 +179,10 @@ class FicWad(FanfictionSiteAdapter): result = [] ii = 1 + if oldurl is not None and len(oldurl) > 0: + data = u2.urlopen(oldurl).read() + soup = bs.BeautifulStoneSoup(data) + storylist = soup.find('ul', {'id' : 'storylist'}) if storylist is not None: allH4s = storylist.findAll('h4')