Fix adapters that used getMetadata(title), which can be changed by various settings.

2025-12-20 07:45:21 +01:00 · 2018-08-05 18:21:09 -05:00 · 2018-08-05 18:21:09 -05:00 · c48c5dd35a
commit c48c5dd35a
parent 5c49248700
3 changed files with 19 additions and 13 deletions
--- a/fanficfare/adapters/adapter_fireflypopulliorg.py
+++ b/fanficfare/adapters/adapter_fireflypopulliorg.py
@ -129,7 +129,8 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
        if not title:
            raise exceptions.StoryDoesNotExist('Cannot find title on the page {}'.format(url))

-        self.story.setMetadata('title', stripHTML(soup.find('h2')))
+        rawtitle = stripHTML(soup.find('h2'))
+        self.story.setMetadata('title', rawtitle)

        # This site has the entire story on one page, so we will be using the normalized URL as
        # the chapterUrl and the Title as the chapter Title
@ -153,7 +154,7 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
        if ',' in mdata:
            self.story.setMetadata('coauthor', ', '.join(mdata.split(',')[1:]).strip())
            mdata = mdata.split(',')[0]
-        
+
 #        print mdata
 #        self.story.getMetadata('coauthor')
 #        sys.exit()
@ -184,13 +185,16 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
                if stories:
                    for story in stories:
                        # There alot of nbsp's (non broken spaces) in here, so I'm going to remove them
-                        # I'm also getting rid of the bold tags and the nextline characters to make it 
+                        # I'm also getting rid of the bold tags and the nextline characters to make it
                        # easier to get the information below
                        story = repr(story).replace(u'\\xa0', '').replace('  ',' ').replace(
                            '<b>','').replace('</b>','').replace(r'\n','')
                        story = self.make_soup(story).find('p')
                        story_a = story.find('a')
-                        title = self.story.getMetadata('title').split('-')[0].strip()
+                        # not sure why this split is here, but it caused
+                        # problems when title_chapter_range_pattern
+                        # introduces a '-', so save rawtitle --JM
+                        title = rawtitle.split('-')[0].strip()
                        if story_a.get_text() == title:
                            story_found = True
                            break
--- a/fanficfare/adapters/adapter_shriftweborgbfa.py
+++ b/fanficfare/adapters/adapter_shriftweborgbfa.py
@ -129,7 +129,8 @@ class BFAArchiveShriftwebOrgSiteAdapter(BaseSiteAdapter):
        if not title:
            raise exceptions.StoryDoesNotExist('Cannot find title on the page {}'.format(url))

-        self.story.setMetadata('title', stripHTML(title))
+        rawtitle = stripHTML(title)
+        self.story.setMetadata('title', rawtitle)

        # This site has the entire story on one page, so we will be using the normalized URL as
        # the chapterUrl and the Title as the chapter Title
@ -182,7 +183,7 @@ class BFAArchiveShriftwebOrgSiteAdapter(BaseSiteAdapter):
                        story = self.make_soup(story).find('div')
                        story_a = story.find('a')
                        ## some stories have special characters... need to fix them.
-                        title = repr(self.story.getMetadata('title'))[2:-1].replace('&amp;', '&')
+                        title = repr(rawtitle)[2:-1].replace('&amp;', '&')
                        if title in story_a.get_text():
                            story_found = True
                            break
--- a/fanficfare/adapters/adapter_unknowableroomorg.py
+++ b/fanficfare/adapters/adapter_unknowableroomorg.py
@ -48,7 +48,7 @@ class UnknowableRoomOrgSiteAdapter(BaseSiteAdapter):

        # 1252 is a superset of iso-8859-1.  Most sites that claim to be  iso-8859-1 (and some that
        # claim to be  utf8) are really windows-1252.
-        self.decode = ["Windows-1252", "utf8", "iso-8859-1"] 
+        self.decode = ["Windows-1252", "utf8", "iso-8859-1"]

        # Setting the adult status to false initially
        self.is_adult=False
@ -122,10 +122,11 @@ class UnknowableRoomOrgSiteAdapter(BaseSiteAdapter):
            self.story.setMetadata('authorId', author)
            self.story.setMetadata('authorUrl', 'http://'+self.getSiteDomain())
            self.story.setMetadata('author', author)
-            
+
        ## Title
-        self.story.setMetadata('title',stripHTML(soup.find('h1')).replace(
-            'by '+self.story.getMetadata('author'), '').strip())
+        rawtitle = stripHTML(soup.find('h1')).replace(
+            'by '+self.story.getMetadata('author'), '').strip()
+        self.story.setMetadata('title',rawtitle)

        # Find the chapters:
        for chapter in soup.find('select').find_all('option', value=re.compile(
@ -148,12 +149,12 @@ class UnknowableRoomOrgSiteAdapter(BaseSiteAdapter):

            story_found = False
            for story in asoup.find('ul', {'id':'fic_list'}).find_all('li'):
-                if self.story.getMetadata('title') == stripHTML(story.a):
+                if rawtitle == stripHTML(story.a):
                    story_found = True
                    break
                else:
                    story_found = False
-            
+
            if not story_found:
                raise exceptions.StoryDoesNotExist("Cannot find story '{}' on author's page '{}'".format(
                    url, self.story.getMetadata('authorUrl')))
@ -200,7 +201,7 @@ class UnknowableRoomOrgSiteAdapter(BaseSiteAdapter):
                        'rd,', ',').replace('th,', ',').replace('.', '').strip()
                    self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))

-        # I'm going to add the disclaimer 
+        # I'm going to add the disclaimer
        disclaimer = soup.find('strong', {'id':'disclaimer'})
        if disclaimer:
            self.story.setMetadata('disclaimer', stripHTML(disclaimer).replace(