Deal with being passed in the first chapter as the url.

2025-12-06 08:52:55 +01:00 · 2010-11-11 13:53:48 -06:00 · 2010-11-11 13:53:48 -06:00 · 981a922d00
commit 981a922d00
parent b51dd549f5
1 changed files with 35 additions and 5 deletions
--- a/fictionalley.py
+++ b/fictionalley.py
@ -74,6 +74,7 @@ class FictionAlley(FanfictionSiteAdapter):
 		self.storyUserRating = '0'
 		self.storyCharacters = []
 		self.storySeries = ''
+		self.storyName = ''
 		self.outputName = ''
 		self.outputStorySep = '-fa_'	
 		
@ -129,18 +130,47 @@ class FictionAlley(FanfictionSiteAdapter):
 		data = data.replace('<!-- headerstart -->','<crazytagstringnobodywouldstumbleonaccidently id="storyheaders">').replace('<!-- headerend -->','</crazytagstringnobodywouldstumbleonaccidently>')
 		soup = bs.BeautifulStoneSoup(data)
 				
-		# Get title from <title>, remove before '-'.
-		title = soup.find('title').string
-		self.storyName = "-".join(title.split('-')[1:]).strip().replace(" (Story Text)","")
-		
-		links = soup.findAll('li')
+		breadcrumbs = soup.find('div', {'class': 'breadcrumbs'})
+		if breadcrumbs is not None:
+			# Be aware that this means that the user has entered the {STORY}01.html 
+			# We will not have valid Publised and Updated dates.  User should enter 
+			# the {STORY}.html instead.  We should force that instead of this.
+			logging.debug('breadcrumbs=%s' % breadcrumbs )
+			bcas = breadcrumbs.findAll('a')
+			logging.debug('bcas=%s' % bcas )
+			if bcas is not None and len(bcas) > 1:
+				bca = bcas[1]
+				logging.debug('bca=%s' % bca )
+				if 'href' in bca._getAttrMap():
+					logging.debug('bca.href=%s' % bca['href'] )
+					url = str(bca['href'])
+					if url is not None and len(url) > 0:
+						self.url = url
+						logging.debug('self.url=%s' % self.url )
+						ss = self.url.split('/')
+						self.storyId = ss[-1].replace('.html','')
+						self.storyName = bca.string
+						logging.debug('self.storyId=%s, self.storyName=%s' % (self.storyId, self.storyName))

+						data = self.opener.open(self.url).read()		
+						
+						# There is some usefull information in the headers of the first chapter page..
+						data = data.replace('<!-- headerstart -->','<crazytagstringnobodywouldstumbleonaccidently id="storyheaders">').replace('<!-- headerend -->','</crazytagstringnobodywouldstumbleonaccidently>')
+						soup = bs.BeautifulStoneSoup(data)
+		
 		# If it is decided that we really do care about number of words..  It's only available on the author's page..
 		#d0 = self.opener.open(self.authorURL).read()
 		#soupA = bs.BeautifulStoneSoup(d0)
 		#dls = soupA.findAll('dl')
 		#logging.debug('dls=%s' % dls)
 		
+		# Get title from <title>, remove before '-'.
+		if len(self.storyName) == 0:
+			title = soup.find('title').string
+			self.storyName = "-".join(title.split('-')[1:]).strip().replace(" (Story Text)","")
+		
+		links = soup.findAll('li')
+
 		self.numChapters = 0;
 		result = []
 		if len(links) == 0: