mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-03 20:31:32 +02:00
ffnet 2.7/3.7 with save-cache working.
This commit is contained in:
parent
615b2f54b4
commit
ad1ce3bbb0
5 changed files with 45 additions and 21 deletions
|
|
@ -28,7 +28,7 @@ from .. import exceptions as exceptions
|
|||
## must import each adapter here.
|
||||
|
||||
from . import adapter_test1
|
||||
# import adapter_fanfictionnet
|
||||
from . import adapter_fanfictionnet
|
||||
# import adapter_fanficcastletvnet
|
||||
# import adapter_fictionalleyorg
|
||||
# import adapter_fictionpresscom
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,17 +15,21 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from datetime import datetime
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
import urllib2
|
||||
from urllib import unquote_plus
|
||||
|
||||
# py2 vs py3 transition
|
||||
from six import text_type as unicode
|
||||
from six.moves.urllib.error import HTTPError
|
||||
|
||||
|
||||
from .. import exceptions as exceptions
|
||||
from ..htmlcleanup import stripHTML
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
from .base_adapter import BaseSiteAdapter, makeDate
|
||||
|
||||
ffnetgenres=["Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy", "Friendship", "General",
|
||||
"Horror", "Humor", "Hurt-Comfort", "Mystery", "Parody", "Poetry", "Romance", "Sci-Fi",
|
||||
|
|
@ -100,7 +104,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
data = self._fetchUrl(url)
|
||||
#logger.debug("\n===================\n%s\n===================\n"%data)
|
||||
soup = self.make_soup(data)
|
||||
except urllib2.HTTPError as e:
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
else:
|
||||
|
|
@ -135,7 +139,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
and "This request takes too long to process, it is timed out by the server." not in newdata:
|
||||
logger.debug('=======Found newer chapter: %s' % tryurl)
|
||||
soup = self.make_soup(newdata)
|
||||
except urllib2.HTTPError as e:
|
||||
except HTTPError as e:
|
||||
if e.code == 503:
|
||||
raise e
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -26,16 +26,25 @@ import logging
|
|||
import pprint
|
||||
import string
|
||||
import os, sys
|
||||
|
||||
import pickle
|
||||
|
||||
if sys.version_info < (2, 5):
|
||||
print('This program requires Python 2.5 or newer.')
|
||||
sys.exit(1)
|
||||
elif sys.version_info < (3, 0):
|
||||
reload(sys) # Reload restores 'hidden' setdefaultencoding method
|
||||
sys.setdefaultencoding("utf-8")
|
||||
def pickle_load(f):
|
||||
return pickle.load(f)
|
||||
else: # > 3.0
|
||||
def pickle_load(f):
|
||||
return pickle.load(f,encoding="bytes")
|
||||
|
||||
from six.moves import http_cookiejar as cl
|
||||
|
||||
version="2.27.12"
|
||||
os.environ['CURRENT_VERSION_ID']=version
|
||||
|
||||
if sys.version_info < (2, 5):
|
||||
print('This program requires Python 2.5 or newer.')
|
||||
sys.exit(1)
|
||||
|
||||
if sys.version_info >= (2, 7):
|
||||
# suppresses default logger. Logging is setup in fanficfare/__init__.py so it works in calibre, too.
|
||||
|
|
@ -251,11 +260,11 @@ def main(argv=None,
|
|||
if options.save_cache:
|
||||
try:
|
||||
with open('global_cache','rb') as jin:
|
||||
options.pagecache = pickle.load(jin) # ,encoding="utf-8"
|
||||
options.pagecache = pickle_load(jin)
|
||||
options.cookiejar = cl.LWPCookieJar()
|
||||
options.cookiejar.load('global_cookies')
|
||||
except:
|
||||
print("didn't load global_cache")
|
||||
except Exception as e:
|
||||
print("didn't load global_cache %s"%e)
|
||||
|
||||
if not list_only:
|
||||
if len(urls) < 1:
|
||||
|
|
|
|||
|
|
@ -19,13 +19,14 @@ import re
|
|||
import exceptions
|
||||
import codecs
|
||||
|
||||
# py2 vs py3 transition
|
||||
import six
|
||||
import six.moves.configparser as ConfigParser
|
||||
from six.moves.configparser import DEFAULTSECT, MissingSectionHeaderError, ParsingError
|
||||
from six.moves import urllib
|
||||
from six.moves.urllib.request import (build_opener, HTTPCookieProcessor)
|
||||
from six.moves.urllib.error import HTTPError
|
||||
from six.moves import http_cookiejar as cl
|
||||
# py2 vs py3 transition
|
||||
from six import text_type as unicode
|
||||
from six import string_types as basestring
|
||||
|
||||
|
|
@ -950,6 +951,10 @@ class Configuration(ConfigParser.SafeConfigParser):
|
|||
## iso-8859-1. Most sites that claim to be iso-8859-1 (and some that
|
||||
## claim to be utf8) are really windows-1252.
|
||||
def _decode(self,data):
|
||||
if not hasattr(data,'decode'):
|
||||
## py3 str() from pickle doesn't have .decode and is
|
||||
## already decoded.
|
||||
return data
|
||||
decode = self.getConfigList('website_encodings',
|
||||
default=["utf8",
|
||||
"Windows-1252",
|
||||
|
|
@ -976,8 +981,9 @@ class Configuration(ConfigParser.SafeConfigParser):
|
|||
return data.decode(code,errors='ignore')
|
||||
else:
|
||||
return data.decode(code)
|
||||
except:
|
||||
except Exception as e:
|
||||
logger.debug("code failed:"+code)
|
||||
logger.debug(e)
|
||||
pass
|
||||
logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
|
||||
return "".join([x for x in data if ord(x) < 128])
|
||||
|
|
@ -1027,6 +1033,8 @@ class Configuration(ConfigParser.SafeConfigParser):
|
|||
|
||||
data = self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read())
|
||||
self._progressbar()
|
||||
## postURL saves data to the pagecache *after* _decode() while
|
||||
## fetchRaw saves it *before* _decode()--because raw.
|
||||
self._set_to_pagecache(cachekey,data,url)
|
||||
return data
|
||||
|
||||
|
|
@ -1093,6 +1101,8 @@ class Configuration(ConfigParser.SafeConfigParser):
|
|||
float(self.getConfig('connect_timeout',30.0)))
|
||||
self._progressbar()
|
||||
data = opened.read()
|
||||
## postURL saves data to the pagecache *after* _decode() while
|
||||
## fetchRaw saves it *before* _decode()--because raw.
|
||||
self._set_to_pagecache(cachekey,data,opened.url)
|
||||
|
||||
return (data,opened)
|
||||
|
|
@ -1131,7 +1141,7 @@ class Configuration(ConfigParser.SafeConfigParser):
|
|||
extrasleep=extrasleep,
|
||||
referer=referer)
|
||||
return (self._decode(data),opened)
|
||||
except urllib.HTTPError as he:
|
||||
except HTTPError as he:
|
||||
excpt=he
|
||||
if he.code in (403,404,410):
|
||||
logger.debug("Caught an exception reading URL: %s Exception %s."%(unicode(safe_url(url)),unicode(he)))
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
## Borrowed from http://techknack.net/python-urllib2-handlers/
|
||||
|
||||
from six.moves.urllib_request import BaseHandler
|
||||
from six.moves.urllib.request import BaseHandler
|
||||
from six.moves.urllib.response import addinfourl
|
||||
from gzip import GzipFile
|
||||
from six import StringIO
|
||||
from six import BytesIO
|
||||
|
||||
class GZipProcessor(BaseHandler):
|
||||
"""A handler to add gzip capabilities to urllib2 requests
|
||||
|
|
@ -16,7 +17,7 @@ class GZipProcessor(BaseHandler):
|
|||
#print("Content-Encoding:%s"%resp.headers.get("Content-Encoding"))
|
||||
if resp.headers.get("Content-Encoding") == "gzip":
|
||||
gz = GzipFile(
|
||||
fileobj=StringIO(resp.read()),
|
||||
fileobj=BytesIO(resp.read()),
|
||||
mode="r"
|
||||
)
|
||||
# resp.read = gz.read
|
||||
|
|
@ -24,7 +25,7 @@ class GZipProcessor(BaseHandler):
|
|||
# resp.readline = gz.readline
|
||||
# resp.next = gz.next
|
||||
old_resp = resp
|
||||
resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
|
||||
resp = addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
|
||||
resp.msg = old_resp.msg
|
||||
return resp
|
||||
https_response = http_response
|
||||
|
|
|
|||
Loading…
Reference in a new issue