Add use_archived_author option for archiveofourown.org.

2025-12-26 02:35:32 +01:00 · 2017-04-05 12:27:21 -05:00 · 2017-04-05 12:27:21 -05:00 · 06aebc1707
commit 06aebc1707
parent 7b98e41c9b
5 changed files with 51 additions and 22 deletions
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@ -916,6 +916,14 @@ include_in_freefromtags:freeformtags
 ## stories less often you can likely get by with reducing this sleep.
 slow_down_sleep_time:2

+## AO3 allows users to archive stories they didn't write in certain
+## cases.  These are indicated by showing a byline such as:
+## 'Orig Author [archived by Archivist Author]'
+## If use_archived_author is set true (and there's only one author
+## listed), author will be set to 'Orig Author' instead of 'Archivist
+## Author'.  authorUrl will still point to the Archivist Author's page.
+#use_archived_author:false
+
 [ashwinder.sycophanthex.com]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In
--- a/fanficfare/adapters/adapter_archiveofourownorg.py
+++ b/fanficfare/adapters/adapter_archiveofourownorg.py
@ -170,7 +170,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
        ## Title
        a = soup.find('a', href=re.compile(r"/works/\d+$"))
        self.story.setMetadata('title',stripHTML(a))
-		
+
        # Find authorid and URL from... author url.
        alist = soup.findAll('a', href=re.compile(r"/users/\w+/pseuds/\w+"))
        if len(alist) < 1: # ao3 allows for author 'Anonymous' with no author link.
@ -187,6 +187,18 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
        if byline:
            self.story.setMetadata('byline',stripHTML(byline))

+        # byline:
+        # <h3 class="byline heading">
+        # Hope Roy [archived by <a href="/users/ssa_archivist/pseuds/ssa_archivist" rel="author">ssa_archivist</a>]
+        # </h3>
+        # stripped:"Hope Roy [archived by ssa_archivist]"
+
+        m = re.match(r'(?P<author>.*) \[archived by (?P<archivist>.*)\]',stripHTML(byline))
+        if( m and
+            len(alist) == 1 and
+            self.getConfig('use_archived_author') ):
+            self.story.setMetadata('author',m.group('author'))
+
        newestChapter = None
        self.newestChapterNum = None # save for comparing during update.
        # Scan all chapters to find the oldest and newest, on AO3 it's
@ -214,26 +226,26 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
        if a != None:
            self.setDescription(url,a)
            #self.story.setMetadata('description',a.text)
-		
+
        a = metasoup.find('dd',{'class':"rating tags"})
        if a != None:
            self.story.setMetadata('rating',stripHTML(a.text))
-		
+
        d = metasoup.find('dd',{'class':"language"})
        if d != None:
            self.story.setMetadata('language',stripHTML(d.text))
-		
+
        a = metasoup.find('dd',{'class':"fandom tags"})
        fandoms = a.findAll('a',{'class':"tag"})
        for fandom in fandoms:
            self.story.addToList('fandoms',fandom.string)
-		
+
        a = metasoup.find('dd',{'class':"warning tags"})
        if a != None:
            warnings = a.findAll('a',{'class':"tag"})
            for warning in warnings:
                self.story.addToList('warnings',warning.string)
-		
+
        a = metasoup.find('dd',{'class':"freeform tags"})
        if a != None:
            genres = a.findAll('a',{'class':"tag"})
@ -246,7 +258,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
            for genre in genres:
                if genre != "Gen":
                    self.story.addToList('ao3categories',genre.string)
-		
+
        a = metasoup.find('dd',{'class':"character tags"})
        if a != None:
            chars = a.findAll('a',{'class':"tag"})
@ -258,13 +270,13 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
            ships = a.findAll('a',{'class':"tag"})
            for ship in ships:
                self.story.addToList('ships',ship.string)
-		
+
        a = metasoup.find('dd',{'class':"collections"})
        if a != None:
            collections = a.findAll('a')
            for collection in collections:
                self.story.addToList('collections',collection.string)
-		
+
        stats = metasoup.find('dl',{'class':'stats'})
        dt = stats.findAll('dt')
        dd = stats.findAll('dd')
@ -274,19 +286,19 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):

            if 'Words:' in label:
                self.story.setMetadata('numWords', value)
-				
+
            if 'Comments:' in label:
                self.story.setMetadata('comments', value)
-				
+
            if 'Kudos:' in label:
                self.story.setMetadata('kudos', value)
-				
+
            if 'Hits:' in label:
                self.story.setMetadata('hits', value)
-				
+
            if 'Bookmarks:' in label:
                self.story.setMetadata('bookmarks', value)
-				
+
            if 'Chapters:' in label:
                if value.split('/')[0] == value.split('/')[1]:
                    self.story.setMetadata('status', 'Completed')
@ -300,11 +312,11 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):

            if 'Updated' in label:
                self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
-				
+
            if 'Completed' in label:
                self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))

-		
+
        # Find Series name from series URL.
        ddseries = metasoup.find('dd',{'class':"series"})

@ -328,7 +340,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
    # grab the text for an individual chapter.
    def getChapterText(self, url):
        logger.debug('Getting chapter text from: %s' % url)
-		
+
        chapter=self.make_soup('<div class="story"></div>').find('div')
        data = self._fetchUrl(url)
        soup = self.make_soup(data)
@ -363,27 +375,27 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
                if chapnotes != None:
                    append_tag(chapter,'b',"Notes for the Chapter:")
                    chapter.append(chapnotes)
-		
+
        text = soup.find('div', {'class' : "userstuff module"})
        chtext = text.find('h3', {'class' : "landmark heading"})
        if chtext:
            chtext.extract()
        chapter.append(text)
-		
+
        if 'chapterfootnotes' not in exclude_notes:
            chapfoot = soup.find('div', {'class' : "end notes module", 'role' : "complementary"})
            if chapfoot != None:
                chapfoot = chapfoot.find('blockquote')
                append_tag(chapter,'b',"Notes for the Chapter:")
                chapter.append(chapfoot)
-		
+
        if 'authorfootnotes' not in exclude_notes:
            footnotes = soup.find('div', {'id' : "work_endnotes"})
            if footnotes != None:
                footnotes = footnotes.find('blockquote')
                append_tag(chapter,'b',"Author's Note:")
                chapter.append(footnotes)
-			
+
        if None == soup:
            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)

--- a/fanficfare/configurable.py
+++ b/fanficfare/configurable.py
@ -197,6 +197,7 @@ def get_valid_set_options():
               'fail_on_password':(['fimfiction.net'],None,boollist),
               'do_update_hook':(['fimfiction.net',
                                  'archiveofourown.org'],None,boollist),
+               'use_archived_author':(['archiveofourown.org'],None,boollist),

               'force_login':(['phoenixsong.net'],None,boollist),
               'non_breaking_spaces':(['fictionmania.tv'],None,boollist),
@ -315,6 +316,7 @@ def get_valid_keywords():
                 'default_cover_image',
                 'description_limit',
                 'do_update_hook',
+                 'use_archived_author',
                 'exclude_notes',
                 'exclude_editor_signature',
                 'extra_logpage_entries',
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@ -961,6 +961,14 @@ include_in_freefromtags:freeformtags
 ## stories less often you can likely get by with reducing this sleep.
 slow_down_sleep_time:2

+## AO3 allows users to archive stories they didn't write in certain
+## cases.  These are indicated by showing a byline such as:
+## 'Orig Author [archived by Archivist Author]'
+## If use_archived_author is set true (and there's only one author
+## listed), author will be set to 'Orig Author' instead of 'Archivist
+## Author'.  authorUrl will still point to the Archivist Author's page.
+#use_archived_author:false
+
 [ashwinder.sycophanthex.com]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In
--- a/fanficfare/story.py
+++ b/fanficfare/story.py
@ -760,7 +760,6 @@ class Story(Configurable):
                    auth=removeAllEntities(auth)

                htmllist.append(linkhtml%('author',aurl,auth))
-            # join_string = self.getConfig("join_string_authorHTML",u", ").replace(SPACE_REPLACE,' ')
            self.setMetadata('authorHTML',self.join_list("join_string_authorHTML",htmllist))
        else:
            self.setMetadata('authorHTML',linkhtml%('author',self.getMetadata('authorUrl', removeallentities, doreplacements),