Fix escaping of stories with richly formatted (HTML) chapter content (#238)

Issue #237
This commit is contained in:
Chris Braun 2017-10-18 16:05:58 +02:00 committed by Jim Miller
parent 2cbdea1f8b
commit 0b4787ef3a

View file

@ -16,8 +16,6 @@
#
# Adapted by GComyn on April 16, 2017
import HTMLParser
import cgi
import json
import logging
import re
@ -31,7 +29,6 @@ from ..htmlcleanup import stripHTML
UNIX_EPOCHE = datetime.fromtimestamp(0)
logger = logging.getLogger(__name__)
_html_parser = HTMLParser.HTMLParser()
def getClass():
@ -76,9 +73,6 @@ def _parse_relative_date_string(string_):
return today - time_ago
_unescape_html = _html_parser.unescape
class WWWWebNovelComAdapter(BaseSiteAdapter):
_GET_VIP_CONTENT_DELAY = 8
@ -216,10 +210,10 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
else:
content = chapter_info['content']
# First unescape all HTML entities in the chapter content and then escape them again: we can't be sure if
# webnovel.com has processed the HTML entities already or not (seemingly story-by-story basis)
content = cgi.escape(_unescape_html(content))
# Content is HTML, so return it directly
if chapter_info['isRichFormat']:
return content
# Turn raw chapter text into HTML
content = content.replace('\r', '').replace('\n', '<br />')
return content
# Content is raw text, so convert paired newlines into paragraphs like the website
content = content.replace('\r', '')
return re.sub(r'\n(.+?)\n', r'<p>\1</p>', content)