mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Yet more fixes for poor parsing of numeric entities--this time, my poor parsing.
This commit is contained in:
parent
3cb92f48cc
commit
0a2cff3469
3 changed files with 16 additions and 5 deletions
|
|
@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
|
|||
description = 'UI plugin to download FanFiction stories from various sites.'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'Jim Miller'
|
||||
version = (1, 5, 15)
|
||||
version = (1, 5, 16)
|
||||
minimum_calibre_version = (0, 8, 30)
|
||||
|
||||
#: This field defines the GUI plugin class that contains all the code
|
||||
|
|
|
|||
|
|
@ -184,6 +184,7 @@ br breaks<br><br>
|
|||
Puella Magi Madoka Magica/魔法少女まどか★マギカ
|
||||
<a href="http://code.google.com/p/fanficdownloader/wiki/FanFictionDownLoaderPluginWithReadingList" title="Tilt-a-Whirl by Jim & Sarah, on Flickr"><img src="http://i.imgur.com/bo8eD.png"></a><br/>
|
||||
br breaks<br><br>
|
||||
Don't—e;ver—d;o—that—a;gain, 法 é
|
||||
<hr>
|
||||
horizontal rules
|
||||
<hr size=1 noshade>
|
||||
|
|
|
|||
|
|
@ -19,12 +19,21 @@ import re
|
|||
|
||||
def _unirepl(match):
|
||||
"Return the unicode string for a decimal number"
|
||||
if match.group(1)=='x':
|
||||
if match.group(1).startswith('x'):
|
||||
radix=16
|
||||
s = match.group(1)[1:]
|
||||
else:
|
||||
radix=10
|
||||
value = int(match.group(2), radix)
|
||||
return "%s%s"%(unichr(value),match.group(3))
|
||||
s = match.group(1)
|
||||
try:
|
||||
value = int(s, radix)
|
||||
retval = "%s%s"%(unichr(value),match.group(2))
|
||||
except:
|
||||
# This way, at least if there's more of entities out there
|
||||
# that fail, it doesn't blow the entire download.
|
||||
print "Numeric entity translation failed, skipping: &#x%s%s"%(match.group(1),match.group(2))
|
||||
retval = ""
|
||||
return retval
|
||||
|
||||
def _replaceNumberEntities(data):
|
||||
# The same brokenish entity parsing in SGMLParser that inserts ';'
|
||||
|
|
@ -33,7 +42,8 @@ def _replaceNumberEntities(data):
|
|||
# "Don't—ever—do—that—again," becomes
|
||||
# "Don't—e;ver—d;o—that—a;gain,"
|
||||
# Also need to allow for 5 digit decimal entities 法
|
||||
p = re.compile(r'&#(x?)([0-9]{,5}|[0-9a-fA-F]{,4})([0-9a-fA-F]*?);')
|
||||
# Last expression didn't allow for 2 digit hex correctly: é
|
||||
p = re.compile(r'&#(x[0-9a-fA-F]{,4}|[0-9]{,5})([0-9a-fA-F]*?);')
|
||||
return p.sub(_unirepl, data)
|
||||
|
||||
def _replaceNotEntities(data):
|
||||
|
|
|
|||
Loading…
Reference in a new issue