diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index 74de4a74..cdd94c5a 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -123,6 +123,10 @@ include_tocpage: true ## it has +90% confidence. 'auto' is not reliable. #website_encodings: utf8, Windows-1252, iso-8859-1 +## When using 'auto' in website_encodings, you can tweak the +## confidence required to use the chardet detected. +#chardet_confidence_limit:0.9 + ## entries to make epub subjects and calibre tags ## lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d" include_subject_tags: extratags, genre, category, characters, ships, status diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index 41593476..c8d7a250 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -354,6 +354,7 @@ def get_valid_keywords(): 'include_subject_tags', 'include_titlepage', 'include_tocpage', + 'chardet_confidence_limit', 'is_adult', 'join_string_authorHTML', 'keep_style_attr', @@ -912,9 +913,11 @@ class Configuration(ConfigParser.SafeConfigParser): continue detected = chardet.detect(data) #print detected - if detected['confidence'] > 0.9: + if detected['confidence'] > float(self.getConfig("chardet_confidence_limit",0.9)): + logger.debug("using chardet detected encoding:%s(%s)"%(detected['encoding'],detected['confidence'])) code=detected['encoding'] else: + logger.debug("chardet confidence too low:%s(%s)"%(detected['encoding'],detected['confidence'])) continue return data.decode(code) except: diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 1548e935..9a64d327 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -123,6 +123,10 @@ include_tocpage: true ## it has +90% confidence. 'auto' is not reliable. #website_encodings: utf8, Windows-1252, iso-8859-1 +## When using 'auto' in website_encodings, you can tweak the +## confidence required to use the chardet detected. +#chardet_confidence_limit:0.9 + ## python string Template, string with ${title}, ${author} etc, same as titlepage_entries ## Can include directories. #output_filename: books/${title}-${siteabbrev}_${storyId}${formatext}