diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini
index 76cf9149..79c1e2fa 100644
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@@ -677,6 +677,10 @@ chapter_title_error_mark:(CHAPTER ERROR)
## single marks and is the recommended setting if you use it.
#max_zalgo:1
+## Some site use a common obfuscation of email addresses. Set
+## decode_emails:true for FFF to attempt to decode them.
+decode_emails:false
+
## Apply adapter's normalize_chapterurl() to all links in chapter
## texts, if they match the known pattern(s) for chapter URLs. As of
## writing, base_xenforoforum, adapter_archiveofourownorg &
diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py
index 45f8a1b2..aadce29f 100644
--- a/fanficfare/adapters/base_adapter.py
+++ b/fanficfare/adapters/base_adapter.py
@@ -39,7 +39,7 @@ logger = logging.getLogger(__name__)
from ..story import Story
from ..requestable import Requestable
-from ..htmlcleanup import stripHTML
+from ..htmlcleanup import stripHTML, decode_email
from ..exceptions import InvalidStoryURL, StoryDoesNotExist, HTTPErrorFFF
# was defined here before, imported for all the adapters that still
@@ -657,6 +657,32 @@ class BaseSiteAdapter(Requestable):
if not fetch:
fetch=self.get_request_raw
+ if self.getConfig("decode_emails"):
+ # [email protected]
+ # [email protected]
+ for emailtag in soup.select('a.__cf_email__') + soup.select('span.__cf_email__'):
+ tagtext = '(tagtext not set yet)'
+ try:
+ tagtext = unicode(emailtag)
+ emaildata = emailtag['data-cfemail']
+ if not emaildata:
+ continue
+ addr = decode_email(emaildata)
+ repltag = emailtag
+ if( emailtag.name == 'span' and
+ emailtag.parent.name == 'a' and
+ emailtag.parent['href'].startswith('/cdn-cgi/l/email-protection') ):
+ repltag = emailtag.parent
+ repltag.name='span'
+ if repltag.has_attr('href'):
+ del repltag['href']
+ repltag['class']='decoded_email'
+ repltag.string = addr
+ except Exception as e:
+ logger.info("decode_emails failed on (%s)"%tagtext)
+ logger.info(e)
+ logger.debug(traceback.format_exc())
+
acceptable_attributes = self.getConfigList('keep_html_attrs',['href','name','class','id','data-orighref'])
if self.getConfig("keep_style_attr"):
diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py
index f51518b1..c6438fc3 100644
--- a/fanficfare/configurable.py
+++ b/fanficfare/configurable.py
@@ -205,6 +205,7 @@ def get_valid_set_options():
'remove_class_chapter':(None,None,boollist),
'mark_new_chapters':(None,None,boollist+['latestonly']),
'titlepage_use_table':(None,None,boollist),
+ 'decode_emails':(None,None,boollist),
'use_ssl_unverified_context':(None,None,boollist),
'use_ssl_default_seclevelone':(None,None,boollist),
@@ -584,6 +585,7 @@ def get_valid_keywords():
'show_nsfw_cover_images',
'show_spoiler_tags',
'max_zalgo',
+ 'decode_emails',
'epub_version',
'prepend_section_titles',
])
diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini
index fb5a1cd8..2d85877d 100644
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@@ -678,6 +678,10 @@ chapter_title_error_mark:(CHAPTER ERROR)
## single marks and is the recommended setting if you use it.
#max_zalgo:1
+## Some site use a common obfuscation of email addresses. Set
+## decode_emails:true for FFF to attempt to decode them.
+decode_emails:false
+
## Apply adapter's normalize_chapterurl() to all links in chapter
## texts, if they match the known pattern(s) for chapter URLs. As of
## writing, base_xenforoforum, adapter_archiveofourownorg &
diff --git a/fanficfare/htmlcleanup.py b/fanficfare/htmlcleanup.py
index 5e19ba81..4161bfc3 100644
--- a/fanficfare/htmlcleanup.py
+++ b/fanficfare/htmlcleanup.py
@@ -22,10 +22,16 @@ logger = logging.getLogger(__name__)
import re
# py2 vs py3 transition
+from .six.moves.urllib.parse import unquote
from .six import text_type as unicode
from .six import string_types as basestring
from .six import ensure_text
from .six import unichr
+from .six import PY2
+if PY2:
+ from cgi import escape as htmlescape
+else: # PY3
+ from html import escape as htmlescape
def _unirepl(match):
"Return the unicode string for a decimal number"
@@ -179,6 +185,19 @@ def reduce_zalgo(text,max_zalgo=1):
count+=1
return ''.join(lineout)
+def parse_hex(n, c):
+ r = n[c:c+2]
+ return int(r, 16)
+
+def decode_email(n, c=0):
+ o = ""
+ a = parse_hex(n, c)
+ for i in range(c + 2, len(n), 2):
+ l = parse_hex(n, i) ^ a
+ o += chr(l)
+ o = unquote(o)
+ return htmlescape(o)
+
# entity list from http://code.google.com/p/doctype/wiki/CharacterEntitiesConsistent
entities = { 'á' : 'á',
'Á' : 'Á',