Fixes for: thehexfiles nasty HTML, spaces in img urls, status 'updating BAD'

2025-12-22 16:54:06 +01:00 · 2012-07-04 08:49:04 -05:00 · 2012-07-04 08:49:04 -05:00 · d33896da6a
commit d33896da6a
parent f484ccb436
4 changed files with 15 additions and 9 deletions
--- a/calibre-plugin/init.py
+++ b/calibre-plugin/init.py
@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
    description         = 'UI plugin to download FanFiction stories from various sites.'
    supported_platforms = ['windows', 'osx', 'linux']
    author              = 'Jim Miller'
-    version             = (1, 5, 40)
+    version             = (1, 5, 41)
    minimum_calibre_version = (0, 8, 30)

    #: This field defines the GUI plugin class that contains all the code
--- a/calibre-plugin/ffdl_plugin.py
+++ b/calibre-plugin/ffdl_plugin.py
@ -779,11 +779,10 @@ make_firstimage_cover:true
            bad_list = filter(lambda x : x['calibre_id'] and not x['good'], book_list)
            total_bad = len(bad_list)

-            self.gui.status_bar.show_message(_('Adding/Updating %s BAD books.'%total_bad))
-
            if total_bad > 0:
                custom_columns = self.gui.library_view.model().custom_columns
                if prefs['errorcol'] != '' and prefs['errorcol'] in custom_columns:
+                    self.gui.status_bar.show_message(_('Adding/Updating %s BAD books.'%total_bad))
                    label = custom_columns[prefs['errorcol']]['label']
                    print("errorcol label:%s"%label)
                    ## if error column and all bad.
--- a/fanficdownloader/adapters/adapter_thehexfilesnet.py
+++ b/fanficdownloader/adapters/adapter_thehexfilesnet.py
@ -190,12 +190,19 @@ class TheHexFilesNetAdapter(BaseSiteAdapter):
        logging.debug('Getting chapter text from: %s' % url)

        soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
-                                     selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
-        
-        for a in soup.findAll('table'):
-            a.extract()
+                                     selfClosingTags=('br','hr','img')) # otherwise soup eats the br/hr tags.

        if None == soup:
            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)
    
+        # Ugh.  chapter html doesn't haven't anything useful around it to demarcate.
+        for a in soup.findAll('table'):
+            a.extract()        
+
+        for a in soup.findAll('head'):
+            a.extract()
+
+        html = soup.find('html')
+        html.name='div'
+            
        return self.utf8FromSoup(url,soup)
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@ -160,9 +160,9 @@ class BaseSiteAdapter(Configurable):

    def _fetchUrlRaw(self, url, parameters=None):
        if parameters != None:
-            return self.opener.open(url,urllib.urlencode(parameters)).read()
+            return self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters)).read()
        else:
-            return self.opener.open(url).read()
+            return self.opener.open(url.replace(' ','%20')).read()
    
    # parameters is a dict()
    def _fetchUrl(self, url, parameters=None):