mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-08 12:36:11 +02:00
adapter_ficbooknet: Add replace_text_formatting option to replace CSS paragraphing with tags, for txt output.
This commit is contained in:
parent
6963153aac
commit
9a52a10626
5 changed files with 69 additions and 9 deletions
|
|
@ -2061,6 +2061,22 @@ add_to_comma_entries:,likes,follows,reviews,numAwards,numCollections
|
|||
datePublished_format:%%Y-%%m-%%d %%H:%%M
|
||||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M
|
||||
|
||||
## Ficbook chapters can include headnotes and footnotes. We've
|
||||
## traditionally included them all in the chapter text, but this
|
||||
## allows you to customize which you include. Copy this parameter to
|
||||
## your personal.ini and list the ones you don't want.
|
||||
#exclude_notes:headnotes,footnotes
|
||||
|
||||
[ficbook.net:txt]
|
||||
## ficbook uses CSS white-space: pre-wrap instead of tags for
|
||||
## paragraph breaks. This doesn't carry over to txt output. This
|
||||
## site-specific feature replaces \n in chapter text and
|
||||
## headnotes,footnotes only. Only applied if the known white-space
|
||||
## classes are present and removes those classes.
|
||||
## Can also be used in [ficbook.net] or [ficbook.net:epub] if you want
|
||||
## to download epub and convert to text.
|
||||
replace_text_formatting:true
|
||||
|
||||
[fiction.live]
|
||||
## Recommended if you include images, fiction.live tends to have many
|
||||
## duplicated images.
|
||||
|
|
|
|||
|
|
@ -317,6 +317,15 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
if cover is not None:
|
||||
self.setCoverImage(url,cover['src-original'])
|
||||
|
||||
def replace_formatting(self,tag):
|
||||
tname = tag.name
|
||||
## operating on plain text because BS4 is hard to work on
|
||||
## text with.
|
||||
txt = stripHTML(tag)
|
||||
txt = txt.replace("\n","<br/>")
|
||||
soup = self.make_soup("<"+tname+">"+txt+"</"+tname+">")
|
||||
return soup.find(tname)
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
|
||||
|
|
@ -331,18 +340,30 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
if chapter is None:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
## ficbook uses weird CSS white-space: pre-wrap; for
|
||||
## paragraphing. Doesn't work with txt output
|
||||
if 'part_text' in chapter['class'] and self.getConfig('replace_text_formatting'):
|
||||
## copy classes, except part_text
|
||||
divclasses = chapter['class']
|
||||
divclasses.remove('part_text')
|
||||
chapter = self.replace_formatting(chapter)
|
||||
chapter['class'] = divclasses
|
||||
|
||||
exclude_notes=self.getConfigList('exclude_notes')
|
||||
if 'headnotes' not in exclude_notes:
|
||||
# Find the headnote
|
||||
head_note = soup.find('div', {'class': 'part-comment-top'})
|
||||
head_note = soup.select_one("div.part-comment-top div.js-public-beta-comment-before")
|
||||
if head_note:
|
||||
head_notes_content = head_note.find('div', {'class': 'js-public-beta-comment-before'}).get_text(strip=True)
|
||||
# Create the structure for the headnote
|
||||
head_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_head_notes'})
|
||||
head_b_tag = soup.new_tag('b')
|
||||
head_b_tag.string = 'Примечания:'
|
||||
head_blockquote_tag = soup.new_tag('blockquote')
|
||||
head_blockquote_tag.string = head_notes_content
|
||||
if 'text-preline' in head_note['class'] and self.getConfig('replace_text_formatting'):
|
||||
head_blockquote_tag = self.replace_formatting(head_note)
|
||||
head_blockquote_tag.name = 'blockquote'
|
||||
else:
|
||||
head_blockquote_tag = soup.new_tag('blockquote')
|
||||
head_blockquote_tag.string = stripHTML(head_note)
|
||||
head_notes_div_tag.append(head_b_tag)
|
||||
head_notes_div_tag.append(head_blockquote_tag)
|
||||
# Prepend the headnotes to the chapter, <hr> to mimic the site
|
||||
|
|
@ -351,15 +372,18 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
if 'footnotes' not in exclude_notes:
|
||||
# Find the endnote
|
||||
end_note = soup.find('div', {'class': 'part-comment-bottom'})
|
||||
end_note = soup.select_one("div.part-comment-bottom div.js-public-beta-comment-after")
|
||||
if end_note:
|
||||
end_notes_content = end_note.find('div', {'class': 'js-public-beta-comment-after'}).get_text(strip=True)
|
||||
# Create the structure for the footnote
|
||||
end_notes_div_tag = soup.new_tag('div', attrs={'class': 'fff_chapter_notes fff_foot_notes'})
|
||||
end_b_tag = soup.new_tag('b')
|
||||
end_b_tag.string = 'Примечания:'
|
||||
end_blockquote_tag = soup.new_tag('blockquote')
|
||||
end_blockquote_tag.string = end_notes_content
|
||||
if 'text-preline' in end_note['class'] and self.getConfig('replace_text_formatting'):
|
||||
end_blockquote_tag = self.replace_formatting(end_note)
|
||||
end_blockquote_tag.name = 'blockquote'
|
||||
else:
|
||||
end_blockquote_tag = soup.new_tag('blockquote')
|
||||
end_blockquote_tag.string = stripHTML(end_note)
|
||||
end_notes_div_tag.append(end_b_tag)
|
||||
end_notes_div_tag.append(end_blockquote_tag)
|
||||
# Append the endnotes to the chapter, <hr> to mimic the site
|
||||
|
|
|
|||
|
|
@ -338,6 +338,7 @@ def get_valid_set_options():
|
|||
'show_nsfw_cover_images': (['fiction.live'], None, boollist),
|
||||
'show_timestamps': (['fiction.live'], None, boollist),
|
||||
'prepend_section_titles': (['syosetu.com','kakuyomu.jp'], None, boollist+['firstepisode']),
|
||||
'replace_text_formatting':(['ficbook.net'], None, boollist),
|
||||
}
|
||||
|
||||
return dict(valdict)
|
||||
|
|
@ -631,6 +632,7 @@ def get_valid_keywords():
|
|||
'decode_emails',
|
||||
'epub_version',
|
||||
'prepend_section_titles',
|
||||
'replace_text_formatting',
|
||||
])
|
||||
|
||||
# *known* entry keywords -- or rather regexps for them.
|
||||
|
|
|
|||
|
|
@ -2054,6 +2054,22 @@ add_to_comma_entries:,likes,follows,reviews,numAwards,numCollections
|
|||
datePublished_format:%%Y-%%m-%%d %%H:%%M
|
||||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M
|
||||
|
||||
## Ficbook chapters can include headnotes and footnotes. We've
|
||||
## traditionally included them all in the chapter text, but this
|
||||
## allows you to customize which you include. Copy this parameter to
|
||||
## your personal.ini and list the ones you don't want.
|
||||
#exclude_notes:headnotes,footnotes
|
||||
|
||||
[ficbook.net:txt]
|
||||
## ficbook uses CSS white-space: pre-wrap instead of tags for
|
||||
## paragraph breaks. This doesn't carry over to txt output. This
|
||||
## site-specific feature replaces \n in chapter text and
|
||||
## headnotes,footnotes only. Only applied if the known white-space
|
||||
## classes are present and removes those classes.
|
||||
## Can also be used in [ficbook.net] or [ficbook.net:epub] if you want
|
||||
## to download epub and convert to text.
|
||||
replace_text_formatting:true
|
||||
|
||||
[fiction.live]
|
||||
## Recommended if you include images, fiction.live tends to have many
|
||||
## duplicated images.
|
||||
|
|
|
|||
|
|
@ -59,7 +59,9 @@ followsects = [
|
|||
|
||||
with open(argv[1],"w", encoding="utf8") as outfile:
|
||||
kl = list(sections.keys())
|
||||
kl.sort()
|
||||
# to force [site:format] after [site]
|
||||
kl.sort(key=lambda x : x.replace(']',''))
|
||||
# print(kl)
|
||||
for k in leadsects:
|
||||
if k in sections:
|
||||
outfile.write("".join(sections[k]))
|
||||
|
|
|
|||
Loading…
Reference in a new issue