Fixes for: thehexfiles nasty HTML, spaces in img urls, status 'updating BAD'

This commit is contained in:
Jim Miller 2012-07-04 08:49:04 -05:00
parent f484ccb436
commit d33896da6a
4 changed files with 15 additions and 9 deletions

View file

@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
description = 'UI plugin to download FanFiction stories from various sites.'
supported_platforms = ['windows', 'osx', 'linux']
author = 'Jim Miller'
version = (1, 5, 40)
version = (1, 5, 41)
minimum_calibre_version = (0, 8, 30)
#: This field defines the GUI plugin class that contains all the code

View file

@ -779,11 +779,10 @@ make_firstimage_cover:true
bad_list = filter(lambda x : x['calibre_id'] and not x['good'], book_list)
total_bad = len(bad_list)
self.gui.status_bar.show_message(_('Adding/Updating %s BAD books.'%total_bad))
if total_bad > 0:
custom_columns = self.gui.library_view.model().custom_columns
if prefs['errorcol'] != '' and prefs['errorcol'] in custom_columns:
self.gui.status_bar.show_message(_('Adding/Updating %s BAD books.'%total_bad))
label = custom_columns[prefs['errorcol']]['label']
print("errorcol label:%s"%label)
## if error column and all bad.

View file

@ -190,12 +190,19 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
logging.debug('Getting chapter text from: %s' % url)
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
for a in soup.findAll('table'):
a.extract()
selfClosingTags=('br','hr','img')) # otherwise soup eats the br/hr tags.
if None == soup:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
# Ugh. chapter html doesn't haven't anything useful around it to demarcate.
for a in soup.findAll('table'):
a.extract()
for a in soup.findAll('head'):
a.extract()
html = soup.find('html')
html.name='div'
return self.utf8FromSoup(url,soup)

View file

@ -160,9 +160,9 @@ class BaseSiteAdapter(Configurable):
def _fetchUrlRaw(self, url, parameters=None):
if parameters != None:
return self.opener.open(url,urllib.urlencode(parameters)).read()
return self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters)).read()
else:
return self.opener.open(url).read()
return self.opener.open(url.replace(' ','%20')).read()
# parameters is a dict()
def _fetchUrl(self, url, parameters=None):