ffnet 2.7/3.7 with save-cache working.

2026-05-03 20:31:32 +02:00 · 2018-07-30 10:04:38 -05:00 · 2018-07-30 10:04:38 -05:00 · ad1ce3bbb0
commit ad1ce3bbb0
parent 615b2f54b4
5 changed files with 45 additions and 21 deletions
--- a/fanficfare/adapters/init.py
+++ b/fanficfare/adapters/init.py
@ -28,7 +28,7 @@ from .. import exceptions as exceptions
 ## must import each adapter here.

 from . import adapter_test1
-# import adapter_fanfictionnet
+from . import adapter_fanfictionnet
 # import adapter_fanficcastletvnet
 # import adapter_fictionalleyorg
 # import adapter_fictionpresscom
--- a/fanficfare/adapters/adapter_fanfictionnet.py
+++ b/fanficfare/adapters/adapter_fanfictionnet.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
+# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -15,17 +15,21 @@
 # limitations under the License.
 #

+from __future__ import absolute_import
 from datetime import datetime
 import logging
 logger = logging.getLogger(__name__)
 import re
-import urllib2
-from urllib import unquote_plus
+
+# py2 vs py3 transition
+from six import text_type as unicode
+from six.moves.urllib.error import HTTPError
+

 from .. import exceptions as exceptions
 from ..htmlcleanup import stripHTML

-from base_adapter import BaseSiteAdapter,  makeDate
+from .base_adapter import BaseSiteAdapter,  makeDate

 ffnetgenres=["Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy", "Friendship", "General",
             "Horror", "Humor", "Hurt-Comfort", "Mystery", "Parody", "Poetry", "Romance", "Sci-Fi",
@ -100,7 +104,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
            data = self._fetchUrl(url)
            #logger.debug("\n===================\n%s\n===================\n"%data)
            soup = self.make_soup(data)
-        except urllib2.HTTPError as e:
+        except HTTPError as e:
            if e.code == 404:
                raise exceptions.StoryDoesNotExist(url)
            else:
@ -135,7 +139,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
                        and "This request takes too long to process, it is timed out by the server." not in newdata:
                    logger.debug('=======Found newer chapter: %s' % tryurl)
                    soup = self.make_soup(newdata)
-            except urllib2.HTTPError as e:
+            except HTTPError as e:
                if e.code == 503:
                    raise e
            except Exception as e:
--- a/fanficfare/cli.py
+++ b/fanficfare/cli.py
@ -26,16 +26,25 @@ import logging
 import pprint
 import string
 import os, sys
-
 import pickle
+
+if sys.version_info < (2, 5):
+    print('This program requires Python 2.5 or newer.')
+    sys.exit(1)
+elif sys.version_info < (3, 0):
+    reload(sys)  # Reload restores 'hidden' setdefaultencoding method
+    sys.setdefaultencoding("utf-8")
+    def pickle_load(f):
+        return pickle.load(f)
+else: # > 3.0
+    def pickle_load(f):
+        return pickle.load(f,encoding="bytes")
+
 from six.moves import http_cookiejar as cl

 version="2.27.12"
 os.environ['CURRENT_VERSION_ID']=version

-if sys.version_info < (2, 5):
-    print('This program requires Python 2.5 or newer.')
-    sys.exit(1)

 if sys.version_info >= (2, 7):
    # suppresses default logger.  Logging is setup in fanficfare/__init__.py so it works in calibre, too.
@ -251,11 +260,11 @@ def main(argv=None,
    if options.save_cache:
        try:
            with open('global_cache','rb') as jin:
-                options.pagecache = pickle.load(jin) # ,encoding="utf-8"
+                options.pagecache = pickle_load(jin)
            options.cookiejar = cl.LWPCookieJar()
            options.cookiejar.load('global_cookies')
-        except:
-            print("didn't load global_cache")
+        except Exception as e:
+            print("didn't load global_cache %s"%e)

    if not list_only:
        if len(urls) < 1:
--- a/fanficfare/configurable.py
+++ b/fanficfare/configurable.py
@ -19,13 +19,14 @@ import re
 import exceptions
 import codecs

+# py2 vs py3 transition
 import six
 import six.moves.configparser as ConfigParser
 from six.moves.configparser import DEFAULTSECT, MissingSectionHeaderError, ParsingError
 from six.moves import urllib
 from six.moves.urllib.request import (build_opener, HTTPCookieProcessor)
+from six.moves.urllib.error import HTTPError
 from six.moves import http_cookiejar as cl
-# py2 vs py3 transition
 from six import text_type as unicode
 from six import string_types as basestring

@ -950,6 +951,10 @@ class Configuration(ConfigParser.SafeConfigParser):
 ## iso-8859-1.  Most sites that claim to be iso-8859-1 (and some that
 ## claim to be utf8) are really windows-1252.
    def _decode(self,data):
+        if not hasattr(data,'decode'):
+            ## py3 str() from pickle doesn't have .decode and is
+            ## already decoded.
+            return data
        decode = self.getConfigList('website_encodings',
                                    default=["utf8",
                                             "Windows-1252",
@ -976,8 +981,9 @@ class Configuration(ConfigParser.SafeConfigParser):
                    return data.decode(code,errors='ignore')
                else:
                    return data.decode(code)
-            except:
+            except Exception as e:
                logger.debug("code failed:"+code)
+                logger.debug(e)
                pass
        logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
        return "".join([x for x in data if ord(x) < 128])
@ -1027,6 +1033,8 @@ class Configuration(ConfigParser.SafeConfigParser):

        data = self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read())
        self._progressbar()
+        ## postURL saves data to the pagecache *after* _decode() while
+        ## fetchRaw saves it *before* _decode()--because raw.
        self._set_to_pagecache(cachekey,data,url)
        return data

@ -1093,6 +1101,8 @@ class Configuration(ConfigParser.SafeConfigParser):
                                      float(self.getConfig('connect_timeout',30.0)))
        self._progressbar()
        data = opened.read()
+        ## postURL saves data to the pagecache *after* _decode() while
+        ## fetchRaw saves it *before* _decode()--because raw.
        self._set_to_pagecache(cachekey,data,opened.url)

        return (data,opened)
@ -1131,7 +1141,7 @@ class Configuration(ConfigParser.SafeConfigParser):
                                                      extrasleep=extrasleep,
                                                      referer=referer)
                return (self._decode(data),opened)
-            except urllib.HTTPError as he:
+            except HTTPError as he:
                excpt=he
                if he.code in (403,404,410):
                    logger.debug("Caught an exception reading URL: %s  Exception %s."%(unicode(safe_url(url)),unicode(he)))
--- a/fanficfare/gziphttp.py
+++ b/fanficfare/gziphttp.py
@ -1,8 +1,9 @@
 ## Borrowed from http://techknack.net/python-urllib2-handlers/

-from six.moves.urllib_request import BaseHandler
+from six.moves.urllib.request import BaseHandler
+from six.moves.urllib.response import addinfourl
 from gzip import GzipFile
-from six import StringIO
+from six import BytesIO

 class GZipProcessor(BaseHandler):
    """A handler to add gzip capabilities to urllib2 requests
@ -16,7 +17,7 @@ class GZipProcessor(BaseHandler):
        #print("Content-Encoding:%s"%resp.headers.get("Content-Encoding"))
        if resp.headers.get("Content-Encoding") == "gzip":
            gz = GzipFile(
-                        fileobj=StringIO(resp.read()),
+                        fileobj=BytesIO(resp.read()),
                        mode="r"
                      )
 #            resp.read = gz.read
@ -24,7 +25,7 @@ class GZipProcessor(BaseHandler):
 #            resp.readline = gz.readline
 #            resp.next = gz.next
            old_resp = resp
-            resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+            resp = addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
            resp.msg = old_resp.msg
        return resp
    https_response = http_response