Add AO3 feature use_view_full_work -- true by default.

This commit is contained in:
Jim Miller 2017-04-29 17:11:17 -05:00
parent 33c33b602f
commit 5afac3ce5b
4 changed files with 70 additions and 30 deletions

View file

@ -871,6 +871,11 @@ website_encodings:Windows-1252,utf8
## personal.ini, not defaults.ini.
#is_adult:true
## AO3 offers a page to download the entire work at once. This can
## save a lot of network traffic if there are many chapters.
## True by built-in default.
#use_view_full_work:true
## archiveofourown.org stories allow chapters to be added out of
## order. So the newest chapter may not be the last one. FanFicFare update
## doesn't like that. If do_update_hook is uncommented and set true,

View file

@ -41,10 +41,11 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
self.password = ""
self.is_adult=False
self.full_work_soup = None
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
# get storyId from url--url validation guarantees query correct
m = re.match(self.getSiteURLPattern(),url)
if m:
@ -355,65 +356,92 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
return len(self.oldchapters)
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
def getChapterTextNum(self, url, index):
logger.debug('Getting chapter text for: %s index: %s' % (url,index))
chapter=self.make_soup('<div class="story"></div>').find('div')
data = self._fetchUrl(url)
soup = self.make_soup(data)
save_chapter_soup = self.make_soup('<div class="story"></div>')
## use the div because the full soup will also have <html><body>.
## need save_chapter_soup for .new_tag()
save_chapter=save_chapter_soup.find('div')
if self.getConfig("use_view_full_work",True):
logger.debug("USE view_full_work")
## Assumed view_adult=true was cookied during metadata
if not self.full_work_soup:
self.full_work_soup = self.make_soup(self._fetchUrl(self.url+"?view_full_work=true"))
whole_dl_soup = self.full_work_soup
chapter_dl_soup = whole_dl_soup.find('div',{'id':'chapter-%s'%(index+1)})
if not chapter_dl_soup:
raise exceptions.FailedToDownload("chapter-%s not found in view_full_work"%(index+1))
else:
whole_dl_soup = chapter_dl_soup = self.make_soup(self._fetchUrl(url))
if None == chapter_dl_soup:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
exclude_notes=self.getConfigList('exclude_notes')
def append_tag(elem,tag,string):
'''bs4 requires tags be added separately.'''
new_tag = soup.new_tag(tag)
new_tag = save_chapter_soup.new_tag(tag)
new_tag.string=string
elem.append(new_tag)
if 'authorheadnotes' not in exclude_notes:
headnotes = soup.find('div', {'class' : "preface group"}).find('div', {'class' : "notes module"})
## These are the over-all work's 'Notes at the beginning'.
## They only appear on the first chapter in individual chapter
## pages and before chapter-1 div. Appending removes
## headnotes from whole_dl_soup, so be sure to only do it on
## the first chapter.
if 'authorheadnotes' not in exclude_notes and index == 0:
headnotes = whole_dl_soup.find('div', {'class' : "preface group"}).find('div', {'class' : "notes module"})
if headnotes != None:
headnotes = headnotes.find('blockquote', {'class' : "userstuff"})
if headnotes != None:
append_tag(chapter,'b',"Author's Note:")
chapter.append(headnotes)
append_tag(save_chapter,'b',"Author's Note:")
save_chapter.append(headnotes)
## Can appear on every chapter
if 'chaptersummary' not in exclude_notes:
chapsumm = soup.find('div', {'id' : "summary"})
chapsumm = chapter_dl_soup.find('div', {'id' : "summary"})
if chapsumm != None:
chapsumm = chapsumm.find('blockquote')
append_tag(chapter,'b',"Summary for the Chapter:")
chapter.append(chapsumm)
append_tag(save_chapter,'b',"Summary for the Chapter:")
save_chapter.append(chapsumm)
## Can appear on every chapter
if 'chapterheadnotes' not in exclude_notes:
chapnotes = soup.find('div', {'id' : "notes"})
chapnotes = chapter_dl_soup.find('div', {'id' : "notes"})
if chapnotes != None:
chapnotes = chapnotes.find('blockquote')
if chapnotes != None:
append_tag(chapter,'b',"Notes for the Chapter:")
chapter.append(chapnotes)
append_tag(save_chapter,'b',"Notes for the Chapter:")
save_chapter.append(chapnotes)
text = soup.find('div', {'class' : "userstuff module"})
text = chapter_dl_soup.find('div', {'class' : "userstuff module"})
chtext = text.find('h3', {'class' : "landmark heading"})
if chtext:
chtext.extract()
chapter.append(text)
save_chapter.append(text)
## Can appear on every chapter
if 'chapterfootnotes' not in exclude_notes:
chapfoot = soup.find('div', {'class' : "end notes module", 'role' : "complementary"})
chapfoot = chapter_dl_soup.find('div', {'class' : "end notes module", 'role' : "complementary"})
if chapfoot != None:
chapfoot = chapfoot.find('blockquote')
append_tag(chapter,'b',"Notes for the Chapter:")
chapter.append(chapfoot)
append_tag(save_chapter,'b',"Notes for the Chapter:")
save_chapter.append(chapfoot)
if 'authorfootnotes' not in exclude_notes:
footnotes = soup.find('div', {'id' : "work_endnotes"})
## These are the over-all work's 'Notes at the end'.
## They only appear on the last chapter in individual chapter
## pages and after chapter-# div. Appending removes
## headnotes from whole_dl_soup, so be sure to only do it on
## the last chapter.
if 'authorfootnotes' not in exclude_notes and index+1 == len(self.chapterUrls):
footnotes = whole_dl_soup.find('div', {'id' : "work_endnotes"})
if footnotes != None:
footnotes = footnotes.find('blockquote')
append_tag(chapter,'b',"Author's Note:")
chapter.append(footnotes)
if footnotes:
append_tag(save_chapter,'b',"Author's Note:")
save_chapter.append(footnotes)
if None == soup:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,chapter)
return self.utf8FromSoup(url,save_chapter)

View file

@ -200,6 +200,7 @@ def get_valid_set_options():
'archiveofourown.org'],None,boollist),
'always_login':(['archiveofourown.org'],None,boollist),
'use_archived_author':(['archiveofourown.org'],None,boollist),
'use_view_full_work':(['archiveofourown.org'],None,boollist),
'force_login':(['phoenixsong.net'],None,boollist),
'non_breaking_spaces':(['fictionmania.tv'],None,boollist),
@ -322,6 +323,7 @@ def get_valid_keywords():
'description_limit',
'do_update_hook',
'use_archived_author',
'use_view_full_work',
'always_login',
'exclude_notes',
'exclude_editor_signature',

View file

@ -920,6 +920,11 @@ website_encodings:Windows-1252,utf8
## personal.ini, not defaults.ini.
#is_adult:true
## AO3 offers a page to download the entire work at once. This can
## save a lot of network traffic if there are many chapters.
## True by built-in default.
#use_view_full_work:true
## archiveofourown.org stories allow chapters to be added out of
## order. So the newest chapter may not be the last one. FanFicFare update
## doesn't like that. If do_update_hook is uncommented and set true,