ffnet 2.7/3.7 with save-cache working.

This commit is contained in:
Jim Miller 2018-07-30 10:04:38 -05:00
parent 615b2f54b4
commit ad1ce3bbb0
5 changed files with 45 additions and 21 deletions

View file

@ -28,7 +28,7 @@ from .. import exceptions as exceptions
## must import each adapter here.
from . import adapter_test1
# import adapter_fanfictionnet
from . import adapter_fanfictionnet
# import adapter_fanficcastletvnet
# import adapter_fictionalleyorg
# import adapter_fictionpresscom

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -15,17 +15,21 @@
# limitations under the License.
#
from __future__ import absolute_import
from datetime import datetime
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
from urllib import unquote_plus
# py2 vs py3 transition
from six import text_type as unicode
from six.moves.urllib.error import HTTPError
from .. import exceptions as exceptions
from ..htmlcleanup import stripHTML
from base_adapter import BaseSiteAdapter, makeDate
from .base_adapter import BaseSiteAdapter, makeDate
ffnetgenres=["Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy", "Friendship", "General",
"Horror", "Humor", "Hurt-Comfort", "Mystery", "Parody", "Poetry", "Romance", "Sci-Fi",
@ -100,7 +104,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
data = self._fetchUrl(url)
#logger.debug("\n===================\n%s\n===================\n"%data)
soup = self.make_soup(data)
except urllib2.HTTPError as e:
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(url)
else:
@ -135,7 +139,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
and "This request takes too long to process, it is timed out by the server." not in newdata:
logger.debug('=======Found newer chapter: %s' % tryurl)
soup = self.make_soup(newdata)
except urllib2.HTTPError as e:
except HTTPError as e:
if e.code == 503:
raise e
except Exception as e:

View file

@ -26,16 +26,25 @@ import logging
import pprint
import string
import os, sys
import pickle
if sys.version_info < (2, 5):
print('This program requires Python 2.5 or newer.')
sys.exit(1)
elif sys.version_info < (3, 0):
reload(sys) # Reload restores 'hidden' setdefaultencoding method
sys.setdefaultencoding("utf-8")
def pickle_load(f):
return pickle.load(f)
else: # > 3.0
def pickle_load(f):
return pickle.load(f,encoding="bytes")
from six.moves import http_cookiejar as cl
version="2.27.12"
os.environ['CURRENT_VERSION_ID']=version
if sys.version_info < (2, 5):
print('This program requires Python 2.5 or newer.')
sys.exit(1)
if sys.version_info >= (2, 7):
# suppresses default logger. Logging is setup in fanficfare/__init__.py so it works in calibre, too.
@ -251,11 +260,11 @@ def main(argv=None,
if options.save_cache:
try:
with open('global_cache','rb') as jin:
options.pagecache = pickle.load(jin) # ,encoding="utf-8"
options.pagecache = pickle_load(jin)
options.cookiejar = cl.LWPCookieJar()
options.cookiejar.load('global_cookies')
except:
print("didn't load global_cache")
except Exception as e:
print("didn't load global_cache %s"%e)
if not list_only:
if len(urls) < 1:

View file

@ -19,13 +19,14 @@ import re
import exceptions
import codecs
# py2 vs py3 transition
import six
import six.moves.configparser as ConfigParser
from six.moves.configparser import DEFAULTSECT, MissingSectionHeaderError, ParsingError
from six.moves import urllib
from six.moves.urllib.request import (build_opener, HTTPCookieProcessor)
from six.moves.urllib.error import HTTPError
from six.moves import http_cookiejar as cl
# py2 vs py3 transition
from six import text_type as unicode
from six import string_types as basestring
@ -950,6 +951,10 @@ class Configuration(ConfigParser.SafeConfigParser):
## iso-8859-1. Most sites that claim to be iso-8859-1 (and some that
## claim to be utf8) are really windows-1252.
def _decode(self,data):
if not hasattr(data,'decode'):
## py3 str() from pickle doesn't have .decode and is
## already decoded.
return data
decode = self.getConfigList('website_encodings',
default=["utf8",
"Windows-1252",
@ -976,8 +981,9 @@ class Configuration(ConfigParser.SafeConfigParser):
return data.decode(code,errors='ignore')
else:
return data.decode(code)
except:
except Exception as e:
logger.debug("code failed:"+code)
logger.debug(e)
pass
logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
return "".join([x for x in data if ord(x) < 128])
@ -1027,6 +1033,8 @@ class Configuration(ConfigParser.SafeConfigParser):
data = self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read())
self._progressbar()
## postURL saves data to the pagecache *after* _decode() while
## fetchRaw saves it *before* _decode()--because raw.
self._set_to_pagecache(cachekey,data,url)
return data
@ -1093,6 +1101,8 @@ class Configuration(ConfigParser.SafeConfigParser):
float(self.getConfig('connect_timeout',30.0)))
self._progressbar()
data = opened.read()
## postURL saves data to the pagecache *after* _decode() while
## fetchRaw saves it *before* _decode()--because raw.
self._set_to_pagecache(cachekey,data,opened.url)
return (data,opened)
@ -1131,7 +1141,7 @@ class Configuration(ConfigParser.SafeConfigParser):
extrasleep=extrasleep,
referer=referer)
return (self._decode(data),opened)
except urllib.HTTPError as he:
except HTTPError as he:
excpt=he
if he.code in (403,404,410):
logger.debug("Caught an exception reading URL: %s Exception %s."%(unicode(safe_url(url)),unicode(he)))

View file

@ -1,8 +1,9 @@
## Borrowed from http://techknack.net/python-urllib2-handlers/
from six.moves.urllib_request import BaseHandler
from six.moves.urllib.request import BaseHandler
from six.moves.urllib.response import addinfourl
from gzip import GzipFile
from six import StringIO
from six import BytesIO
class GZipProcessor(BaseHandler):
"""A handler to add gzip capabilities to urllib2 requests
@ -16,7 +17,7 @@ class GZipProcessor(BaseHandler):
#print("Content-Encoding:%s"%resp.headers.get("Content-Encoding"))
if resp.headers.get("Content-Encoding") == "gzip":
gz = GzipFile(
fileobj=StringIO(resp.read()),
fileobj=BytesIO(resp.read()),
mode="r"
)
# resp.read = gz.read
@ -24,7 +25,7 @@ class GZipProcessor(BaseHandler):
# resp.readline = gz.readline
# resp.next = gz.next
old_resp = resp
resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp = addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
return resp
https_response = http_response