mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-01-29 18:55:50 +01:00
When trying to detect the encoding of html, do not use more than the first 10KB so that detection is not too slow
This commit is contained in:
parent
a904d5d192
commit
fd2e3db07a
1 changed files with 1 additions and 1 deletions
|
|
@ -53,7 +53,7 @@ def substitute_entites(raw):
|
|||
def force_encoding(raw, verbose, assume_utf8=False):
|
||||
from calibre.constants import preferred_encoding
|
||||
try:
|
||||
chardet = detect(raw)
|
||||
chardet = detect(raw[:1024*10])
|
||||
except:
|
||||
chardet = {'encoding':preferred_encoding, 'confidence':0}
|
||||
encoding = chardet['encoding']
|
||||
|
|
|
|||
Loading…
Reference in a new issue