mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-20 07:45:21 +01:00
Fix adapters that used getMetadata(title), which can be changed by various settings.
This commit is contained in:
parent
5c49248700
commit
c48c5dd35a
3 changed files with 19 additions and 13 deletions
|
|
@ -129,7 +129,8 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
|
|||
if not title:
|
||||
raise exceptions.StoryDoesNotExist('Cannot find title on the page {}'.format(url))
|
||||
|
||||
self.story.setMetadata('title', stripHTML(soup.find('h2')))
|
||||
rawtitle = stripHTML(soup.find('h2'))
|
||||
self.story.setMetadata('title', rawtitle)
|
||||
|
||||
# This site has the entire story on one page, so we will be using the normalized URL as
|
||||
# the chapterUrl and the Title as the chapter Title
|
||||
|
|
@ -153,7 +154,7 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
|
|||
if ',' in mdata:
|
||||
self.story.setMetadata('coauthor', ', '.join(mdata.split(',')[1:]).strip())
|
||||
mdata = mdata.split(',')[0]
|
||||
|
||||
|
||||
# print mdata
|
||||
# self.story.getMetadata('coauthor')
|
||||
# sys.exit()
|
||||
|
|
@ -184,13 +185,16 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
|
|||
if stories:
|
||||
for story in stories:
|
||||
# There alot of nbsp's (non broken spaces) in here, so I'm going to remove them
|
||||
# I'm also getting rid of the bold tags and the nextline characters to make it
|
||||
# I'm also getting rid of the bold tags and the nextline characters to make it
|
||||
# easier to get the information below
|
||||
story = repr(story).replace(u'\\xa0', '').replace(' ',' ').replace(
|
||||
'<b>','').replace('</b>','').replace(r'\n','')
|
||||
story = self.make_soup(story).find('p')
|
||||
story_a = story.find('a')
|
||||
title = self.story.getMetadata('title').split('-')[0].strip()
|
||||
# not sure why this split is here, but it caused
|
||||
# problems when title_chapter_range_pattern
|
||||
# introduces a '-', so save rawtitle --JM
|
||||
title = rawtitle.split('-')[0].strip()
|
||||
if story_a.get_text() == title:
|
||||
story_found = True
|
||||
break
|
||||
|
|
|
|||
|
|
@ -129,7 +129,8 @@ class BFAArchiveShriftwebOrgSiteAdapter(BaseSiteAdapter):
|
|||
if not title:
|
||||
raise exceptions.StoryDoesNotExist('Cannot find title on the page {}'.format(url))
|
||||
|
||||
self.story.setMetadata('title', stripHTML(title))
|
||||
rawtitle = stripHTML(title)
|
||||
self.story.setMetadata('title', rawtitle)
|
||||
|
||||
# This site has the entire story on one page, so we will be using the normalized URL as
|
||||
# the chapterUrl and the Title as the chapter Title
|
||||
|
|
@ -182,7 +183,7 @@ class BFAArchiveShriftwebOrgSiteAdapter(BaseSiteAdapter):
|
|||
story = self.make_soup(story).find('div')
|
||||
story_a = story.find('a')
|
||||
## some stories have special characters... need to fix them.
|
||||
title = repr(self.story.getMetadata('title'))[2:-1].replace('&', '&')
|
||||
title = repr(rawtitle)[2:-1].replace('&', '&')
|
||||
if title in story_a.get_text():
|
||||
story_found = True
|
||||
break
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ class UnknowableRoomOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# 1252 is a superset of iso-8859-1. Most sites that claim to be iso-8859-1 (and some that
|
||||
# claim to be utf8) are really windows-1252.
|
||||
self.decode = ["Windows-1252", "utf8", "iso-8859-1"]
|
||||
self.decode = ["Windows-1252", "utf8", "iso-8859-1"]
|
||||
|
||||
# Setting the adult status to false initially
|
||||
self.is_adult=False
|
||||
|
|
@ -122,10 +122,11 @@ class UnknowableRoomOrgSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorId', author)
|
||||
self.story.setMetadata('authorUrl', 'http://'+self.getSiteDomain())
|
||||
self.story.setMetadata('author', author)
|
||||
|
||||
|
||||
## Title
|
||||
self.story.setMetadata('title',stripHTML(soup.find('h1')).replace(
|
||||
'by '+self.story.getMetadata('author'), '').strip())
|
||||
rawtitle = stripHTML(soup.find('h1')).replace(
|
||||
'by '+self.story.getMetadata('author'), '').strip()
|
||||
self.story.setMetadata('title',rawtitle)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.find('select').find_all('option', value=re.compile(
|
||||
|
|
@ -148,12 +149,12 @@ class UnknowableRoomOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
story_found = False
|
||||
for story in asoup.find('ul', {'id':'fic_list'}).find_all('li'):
|
||||
if self.story.getMetadata('title') == stripHTML(story.a):
|
||||
if rawtitle == stripHTML(story.a):
|
||||
story_found = True
|
||||
break
|
||||
else:
|
||||
story_found = False
|
||||
|
||||
|
||||
if not story_found:
|
||||
raise exceptions.StoryDoesNotExist("Cannot find story '{}' on author's page '{}'".format(
|
||||
url, self.story.getMetadata('authorUrl')))
|
||||
|
|
@ -200,7 +201,7 @@ class UnknowableRoomOrgSiteAdapter(BaseSiteAdapter):
|
|||
'rd,', ',').replace('th,', ',').replace('.', '').strip()
|
||||
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
|
||||
|
||||
# I'm going to add the disclaimer
|
||||
# I'm going to add the disclaimer
|
||||
disclaimer = soup.find('strong', {'id':'disclaimer'})
|
||||
if disclaimer:
|
||||
self.story.setMetadata('disclaimer', stripHTML(disclaimer).replace(
|
||||
|
|
|
|||
Loading…
Reference in a new issue