mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-15 13:22:23 +01:00
Add decode_emails option, defaults to false.
This commit is contained in:
parent
94bd4bf236
commit
832387dea0
5 changed files with 56 additions and 1 deletions
|
|
@ -677,6 +677,10 @@ chapter_title_error_mark:(CHAPTER ERROR)
|
||||||
## single marks and is the recommended setting if you use it.
|
## single marks and is the recommended setting if you use it.
|
||||||
#max_zalgo:1
|
#max_zalgo:1
|
||||||
|
|
||||||
|
## Some site use a common obfuscation of email addresses. Set
|
||||||
|
## decode_emails:true for FFF to attempt to decode them.
|
||||||
|
decode_emails:false
|
||||||
|
|
||||||
## Apply adapter's normalize_chapterurl() to all links in chapter
|
## Apply adapter's normalize_chapterurl() to all links in chapter
|
||||||
## texts, if they match the known pattern(s) for chapter URLs. As of
|
## texts, if they match the known pattern(s) for chapter URLs. As of
|
||||||
## writing, base_xenforoforum, adapter_archiveofourownorg &
|
## writing, base_xenforoforum, adapter_archiveofourownorg &
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
from ..story import Story
|
from ..story import Story
|
||||||
from ..requestable import Requestable
|
from ..requestable import Requestable
|
||||||
from ..htmlcleanup import stripHTML
|
from ..htmlcleanup import stripHTML, decode_email
|
||||||
from ..exceptions import InvalidStoryURL, StoryDoesNotExist, HTTPErrorFFF
|
from ..exceptions import InvalidStoryURL, StoryDoesNotExist, HTTPErrorFFF
|
||||||
|
|
||||||
# was defined here before, imported for all the adapters that still
|
# was defined here before, imported for all the adapters that still
|
||||||
|
|
@ -657,6 +657,32 @@ class BaseSiteAdapter(Requestable):
|
||||||
if not fetch:
|
if not fetch:
|
||||||
fetch=self.get_request_raw
|
fetch=self.get_request_raw
|
||||||
|
|
||||||
|
if self.getConfig("decode_emails"):
|
||||||
|
# <a href="/cdn-cgi/l/email-protection" class="__cf_email__" data-cfemail="c7ada8afa9a3a8a287a2aaa6aeabe9a4a8aa">[email protected]</a>
|
||||||
|
# <a href="/cdn-cgi/l/email-protection#e3a18f8a8d87ae8c969086d2d7d0a3b3abac8d869790cd8c9184"><span class="__cf_email__" data-cfemail="296b4540474d64465c5a4c181d1a69796166474c5d5a07465b4e">[email protected]</span></a>
|
||||||
|
for emailtag in soup.select('a.__cf_email__') + soup.select('span.__cf_email__'):
|
||||||
|
tagtext = '(tagtext not set yet)'
|
||||||
|
try:
|
||||||
|
tagtext = unicode(emailtag)
|
||||||
|
emaildata = emailtag['data-cfemail']
|
||||||
|
if not emaildata:
|
||||||
|
continue
|
||||||
|
addr = decode_email(emaildata)
|
||||||
|
repltag = emailtag
|
||||||
|
if( emailtag.name == 'span' and
|
||||||
|
emailtag.parent.name == 'a' and
|
||||||
|
emailtag.parent['href'].startswith('/cdn-cgi/l/email-protection') ):
|
||||||
|
repltag = emailtag.parent
|
||||||
|
repltag.name='span'
|
||||||
|
if repltag.has_attr('href'):
|
||||||
|
del repltag['href']
|
||||||
|
repltag['class']='decoded_email'
|
||||||
|
repltag.string = addr
|
||||||
|
except Exception as e:
|
||||||
|
logger.info("decode_emails failed on (%s)"%tagtext)
|
||||||
|
logger.info(e)
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
acceptable_attributes = self.getConfigList('keep_html_attrs',['href','name','class','id','data-orighref'])
|
acceptable_attributes = self.getConfigList('keep_html_attrs',['href','name','class','id','data-orighref'])
|
||||||
|
|
||||||
if self.getConfig("keep_style_attr"):
|
if self.getConfig("keep_style_attr"):
|
||||||
|
|
|
||||||
|
|
@ -205,6 +205,7 @@ def get_valid_set_options():
|
||||||
'remove_class_chapter':(None,None,boollist),
|
'remove_class_chapter':(None,None,boollist),
|
||||||
'mark_new_chapters':(None,None,boollist+['latestonly']),
|
'mark_new_chapters':(None,None,boollist+['latestonly']),
|
||||||
'titlepage_use_table':(None,None,boollist),
|
'titlepage_use_table':(None,None,boollist),
|
||||||
|
'decode_emails':(None,None,boollist),
|
||||||
|
|
||||||
'use_ssl_unverified_context':(None,None,boollist),
|
'use_ssl_unverified_context':(None,None,boollist),
|
||||||
'use_ssl_default_seclevelone':(None,None,boollist),
|
'use_ssl_default_seclevelone':(None,None,boollist),
|
||||||
|
|
@ -584,6 +585,7 @@ def get_valid_keywords():
|
||||||
'show_nsfw_cover_images',
|
'show_nsfw_cover_images',
|
||||||
'show_spoiler_tags',
|
'show_spoiler_tags',
|
||||||
'max_zalgo',
|
'max_zalgo',
|
||||||
|
'decode_emails',
|
||||||
'epub_version',
|
'epub_version',
|
||||||
'prepend_section_titles',
|
'prepend_section_titles',
|
||||||
])
|
])
|
||||||
|
|
|
||||||
|
|
@ -678,6 +678,10 @@ chapter_title_error_mark:(CHAPTER ERROR)
|
||||||
## single marks and is the recommended setting if you use it.
|
## single marks and is the recommended setting if you use it.
|
||||||
#max_zalgo:1
|
#max_zalgo:1
|
||||||
|
|
||||||
|
## Some site use a common obfuscation of email addresses. Set
|
||||||
|
## decode_emails:true for FFF to attempt to decode them.
|
||||||
|
decode_emails:false
|
||||||
|
|
||||||
## Apply adapter's normalize_chapterurl() to all links in chapter
|
## Apply adapter's normalize_chapterurl() to all links in chapter
|
||||||
## texts, if they match the known pattern(s) for chapter URLs. As of
|
## texts, if they match the known pattern(s) for chapter URLs. As of
|
||||||
## writing, base_xenforoforum, adapter_archiveofourownorg &
|
## writing, base_xenforoforum, adapter_archiveofourownorg &
|
||||||
|
|
|
||||||
|
|
@ -22,10 +22,16 @@ logger = logging.getLogger(__name__)
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# py2 vs py3 transition
|
# py2 vs py3 transition
|
||||||
|
from .six.moves.urllib.parse import unquote
|
||||||
from .six import text_type as unicode
|
from .six import text_type as unicode
|
||||||
from .six import string_types as basestring
|
from .six import string_types as basestring
|
||||||
from .six import ensure_text
|
from .six import ensure_text
|
||||||
from .six import unichr
|
from .six import unichr
|
||||||
|
from .six import PY2
|
||||||
|
if PY2:
|
||||||
|
from cgi import escape as htmlescape
|
||||||
|
else: # PY3
|
||||||
|
from html import escape as htmlescape
|
||||||
|
|
||||||
def _unirepl(match):
|
def _unirepl(match):
|
||||||
"Return the unicode string for a decimal number"
|
"Return the unicode string for a decimal number"
|
||||||
|
|
@ -179,6 +185,19 @@ def reduce_zalgo(text,max_zalgo=1):
|
||||||
count+=1
|
count+=1
|
||||||
return ''.join(lineout)
|
return ''.join(lineout)
|
||||||
|
|
||||||
|
def parse_hex(n, c):
|
||||||
|
r = n[c:c+2]
|
||||||
|
return int(r, 16)
|
||||||
|
|
||||||
|
def decode_email(n, c=0):
|
||||||
|
o = ""
|
||||||
|
a = parse_hex(n, c)
|
||||||
|
for i in range(c + 2, len(n), 2):
|
||||||
|
l = parse_hex(n, i) ^ a
|
||||||
|
o += chr(l)
|
||||||
|
o = unquote(o)
|
||||||
|
return htmlescape(o)
|
||||||
|
|
||||||
# entity list from http://code.google.com/p/doctype/wiki/CharacterEntitiesConsistent
|
# entity list from http://code.google.com/p/doctype/wiki/CharacterEntitiesConsistent
|
||||||
entities = { 'á' : 'á',
|
entities = { 'á' : 'á',
|
||||||
'Á' : 'Á',
|
'Á' : 'Á',
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue