mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-01-09 01:22:57 +01:00
Fix escaping of stories with richly formatted (HTML) chapter content (#238)
Issue #237
This commit is contained in:
parent
2cbdea1f8b
commit
0b4787ef3a
1 changed files with 6 additions and 12 deletions
|
|
@ -16,8 +16,6 @@
|
|||
#
|
||||
|
||||
# Adapted by GComyn on April 16, 2017
|
||||
import HTMLParser
|
||||
import cgi
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
|
@ -31,7 +29,6 @@ from ..htmlcleanup import stripHTML
|
|||
|
||||
UNIX_EPOCHE = datetime.fromtimestamp(0)
|
||||
logger = logging.getLogger(__name__)
|
||||
_html_parser = HTMLParser.HTMLParser()
|
||||
|
||||
|
||||
def getClass():
|
||||
|
|
@ -76,9 +73,6 @@ def _parse_relative_date_string(string_):
|
|||
return today - time_ago
|
||||
|
||||
|
||||
_unescape_html = _html_parser.unescape
|
||||
|
||||
|
||||
class WWWWebNovelComAdapter(BaseSiteAdapter):
|
||||
_GET_VIP_CONTENT_DELAY = 8
|
||||
|
||||
|
|
@ -216,10 +210,10 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
content = chapter_info['content']
|
||||
|
||||
# First unescape all HTML entities in the chapter content and then escape them again: we can't be sure if
|
||||
# webnovel.com has processed the HTML entities already or not (seemingly story-by-story basis)
|
||||
content = cgi.escape(_unescape_html(content))
|
||||
# Content is HTML, so return it directly
|
||||
if chapter_info['isRichFormat']:
|
||||
return content
|
||||
|
||||
# Turn raw chapter text into HTML
|
||||
content = content.replace('\r', '').replace('\n', '<br />')
|
||||
return content
|
||||
# Content is raw text, so convert paired newlines into paragraphs like the website
|
||||
content = content.replace('\r', '')
|
||||
return re.sub(r'\n(.+?)\n', r'<p>\1</p>', content)
|
||||
|
|
|
|||
Loading…
Reference in a new issue