From b231abc03636cd81b4572aa93f457ea4793d4eb4 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 25 Jul 2018 15:07:06 -0500 Subject: [PATCH] Remove accidental file. --- fanficfare/configurable.py-filelist1 | 1149 -------------------------- 1 file changed, 1149 deletions(-) delete mode 100644 fanficfare/configurable.py-filelist1 diff --git a/fanficfare/configurable.py-filelist1 b/fanficfare/configurable.py-filelist1 deleted file mode 100644 index e35ed44e..00000000 --- a/fanficfare/configurable.py-filelist1 +++ /dev/null @@ -1,1149 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2015 Fanficdownloader team, 2017 FanFicFare team -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import ConfigParser, re -import exceptions -import codecs -from ConfigParser import DEFAULTSECT, MissingSectionHeaderError, ParsingError - -import time -import logging -import urllib -import urllib2 as u2 -import urlparse as up -import cookielib as cl - -try: - from google.appengine.api import apiproxy_stub_map - def urlfetch_timeout_hook(service, call, request, response): - if call != 'Fetch': - return - # Make the default deadline 10 seconds instead of 5. - if not request.has_deadline(): - request.set_deadline(10.0) - - apiproxy_stub_map.apiproxy.GetPreCallHooks().Append( - 'urlfetch_timeout_hook', urlfetch_timeout_hook, 'urlfetch') - logger.info("Hook to make default deadline 10.0 installed.") -except: - pass - #logger.info("Hook to make default deadline 10.0 NOT installed--not using appengine") - -try: - import chardet -except ImportError: - chardet = None - -from gziphttp import GZipProcessor - -# All of the writers(epub,html,txt) and adapters(ffnet,twlt,etc) -# inherit from Configurable. The config file(s) uses ini format: -# [sections] with key:value settings. -# -# [defaults] -# titlepage_entries: category,genre, status -# [www.whofic.com] -# titlepage_entries: category,genre, status,dateUpdated,rating -# [epub] -# titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated -# [www.whofic.com:epub] -# titlepage_entries: category,genre, status,datePublished -# [overrides] -# titlepage_entries: category - -logger = logging.getLogger(__name__) - -import adapters - -def re_compile(regex,line): - try: - return re.compile(regex,re.DOTALL) - except Exception, e: - raise exceptions.RegularExpresssionFailed(e,regex,line) - -# fall back labels. -titleLabels = { - 'category':'Category', - 'genre':'Genre', - 'language':'Language', - 'status':'Status', - 'series':'Series', - 'characters':'Characters', - 'ships':'Relationships', - 'datePublished':'Published', - 'dateUpdated':'Updated', - 'dateCreated':'Packaged', - 'rating':'Rating', - 'warnings':'Warnings', - 'numChapters':'Chapters', - 'numWords':'Words', - 'words_added':'Words Added', # logpage only - 'site':'Site', - 'storyId':'Story ID', - 'authorId':'Author ID', - 'extratags':'Extra Tags', - 'title':'Title', - 'storyUrl':'Story URL', - 'description':'Summary', - 'author':'Author', - 'authorUrl':'Author URL', - 'formatname':'File Format', - 'formatext':'File Extension', - 'siteabbrev':'Site Abbrev', - 'version':'Downloader Version' - } - -formatsections = ['html','txt','epub','mobi'] -othersections = ['defaults','overrides'] - -def get_valid_sections(): - sites = adapters.getConfigSections() - sitesections = list(othersections) - for section in sites: - sitesections.append(section) - # also allows [www.base_efiction] and [www.base_xenforoforum]. Not - # likely to matter. - if section.startswith('www.'): - # add w/o www if has www - sitesections.append(section[4:]) - else: - # add w/ www if doesn't www - sitesections.append('www.%s'%section) - - allowedsections = [] - allowedsections.extend(formatsections) - - for section in sitesections: - allowedsections.append(section) - for f in formatsections: - allowedsections.append('%s:%s'%(section,f)) - return allowedsections - -def get_valid_list_entries(): - return list(['category', - 'genre', - 'characters', - 'ships', - 'warnings', - 'extratags', - 'author', - 'authorId', - 'authorUrl', - 'lastupdate', - ]) - -boollist=['true','false'] -base_xenforo_list=['base_xenforoforum', - 'forums.spacebattles.com', - 'forums.sufficientvelocity.com', - 'questionablequesting.com', - ] -def get_valid_set_options(): - ''' - dict() of names of boolean options, but as a tuple with - valid sites, valid formats and valid values (None==all) - - This is to further restrict keywords to certain sections and/or - values. get_valid_keywords() below is the list of allowed - keywords. Any keyword listed here must also be listed there. - - This is what's used by the code when you save personal.ini in - plugin that stops and points out possible errors in keyword - *values*. It doesn't flag 'bad' keywords. Note that it's - separate from color highlighting and most keywords need to be - added to both. - ''' - - valdict = {'collect_series':(None,None,boollist), - 'include_titlepage':(None,None,boollist), - 'include_tocpage':(None,None,boollist), - 'is_adult':(None,None,boollist), - 'keep_style_attr':(None,None,boollist), - 'keep_title_attr':(None,None,boollist), - 'make_firstimage_cover':(None,None,boollist), - 'never_make_cover':(None,None,boollist), - 'nook_img_fix':(None,None,boollist), - 'replace_br_with_p':(None,None,boollist), - 'replace_hr':(None,None,boollist), - 'sort_ships':(None,None,boollist), - 'strip_chapter_numbers':(None,None,boollist), - 'mark_new_chapters':(None,None,boollist), - 'titlepage_use_table':(None,None,boollist), - - 'use_ssl_unverified_context':(None,None,boollist), - 'continue_on_chapter_error':(None,None,boollist), - - 'add_chapter_numbers':(None,None,boollist+['toconly']), - - 'check_next_chapter':(['fanfiction.net'],None,boollist), - 'tweak_fg_sleep':(['fanfiction.net'],None,boollist), - 'skip_author_cover':(['fanfiction.net'],None,boollist), - - 'fix_fimf_blockquotes':(['fimfiction.net'],None,boollist), - 'fail_on_password':(['fimfiction.net'],None,boollist), - 'do_update_hook':(['fimfiction.net', - 'archiveofourown.org'],None,boollist), - - 'force_login':(['phoenixsong.net'],None,boollist), - 'non_breaking_spaces':(['fictionmania.tv'],None,boollist), - 'universe_as_series':(['storiesonline.net','finestories.com'],None,boollist), - 'strip_text_links':(['bloodshedverse.com','asexstories.com'],None,boollist), - 'centeredcat_to_characters':(['tthfanfic.org'],None,boollist), - 'pairingcat_to_characters_ships':(['tthfanfic.org'],None,boollist), - 'romancecat_to_characters_ships':(['tthfanfic.org'],None,boollist), - - 'use_meta_keywords':(['literotica.com'],None,boollist), - 'clean_chapter_titles':(['literotica.com'],None,boollist), - 'description_in_chapter':(['literotica.com'],None,boollist), - - 'inject_chapter_title':(['asianfanfics.com'],None,boollist), - - # eFiction Base adapters allow bulk_load - # kept forgetting to add them, so now it's automatic. - 'bulk_load':(adapters.get_bulk_load_sites(), - None,boollist), - - 'include_logpage':(None,['epub'],boollist+['smart']), - 'logpage_at_end':(None,['epub'],boollist), - - 'windows_eol':(None,['txt'],boollist), - - 'include_images':(None,['epub','html'],boollist), - 'grayscale_images':(None,['epub','html'],boollist), - 'no_image_processing':(None,['epub','html'],boollist), - 'normalize_text_links':(None,['epub','html'],boollist), - 'internalize_text_links':(None,['epub','html'],boollist), - - 'capitalize_forumtags':(base_xenforo_list,None,boollist), - 'minimum_threadmarks':(base_xenforo_list,None,None), - 'first_post_title':(base_xenforo_list,None,None), - 'always_include_first_post':(base_xenforo_list,None,boollist), - 'always_reload_first_chapter':(base_xenforo_list,None,boollist), - 'always_use_forumtags':(base_xenforo_list,None,boollist), - 'use_reader_mode':(base_xenforo_list,None,boollist), - 'author_avatar_cover':(base_xenforo_list,None,boollist), - 'remove_spoilers':(base_xenforo_list,None,boollist), - 'legend_spoilers':(base_xenforo_list,None,boollist), - } - - return dict(valdict) - -def get_valid_scalar_entries(): - return list(['series', - 'seriesUrl', - 'language', - 'status', - 'datePublished', - 'dateUpdated', - 'dateCreated', - 'rating', - 'numChapters', - 'numWords', - 'words_added', # logpage only. - 'site', - 'storyId', - 'title', - 'storyUrl', - 'description', - 'formatname', - 'formatext', - 'siteabbrev', - 'version', - # internal stuff. - 'authorHTML', - 'seriesHTML', - 'langcode', - 'output_css', - 'cover_image', - ]) - -def get_valid_entries(): - return get_valid_list_entries() + get_valid_scalar_entries() - -# *known* keywords -- or rather regexps for them. -def get_valid_keywords(): - ''' - Among other things, this list is used by the color highlighting in - personal.ini editing in plugin. Note that it's separate from - value checking and most keywords need to be added to both. - ''' - return list(['(in|ex)clude_metadata_(pre|post)', - 'add_chapter_numbers', - 'add_genre_when_multi_category', - 'adult_ratings', - 'allow_unsafe_filename', - 'always_overwrite', - 'anthology_tags', - 'anthology_title_pattern', - 'background_color', - 'bulk_load', - 'chapter_end', - 'chapter_start', - 'chapter_title_strip_pattern', - 'chapter_title_def_pattern', - 'chapter_title_add_pattern', - 'chapter_title_new_pattern', - 'chapter_title_addnew_pattern', - 'title_chapter_range_pattern', - 'mark_new_chapters', - 'check_next_chapter', - 'skip_author_cover', - 'collect_series', - 'connect_timeout', - 'convert_images_to', - 'cover_content', - 'cover_exclusion_regexp', - 'custom_columns_settings', - 'dateCreated_format', - 'datePublished_format', - 'dateUpdated_format', - 'default_cover_image', - 'description_limit', - 'do_update_hook', - 'exclude_notes', - 'exclude_editor_signature', - 'extra_logpage_entries', - 'extra_subject_tags', - 'extra_titlepage_entries', - 'extra_valid_entries', - 'extratags', - 'extracategories', - 'extragenres', - 'extracharacters', - 'extraships', - 'extrawarnings', - 'fail_on_password', - 'file_end', - 'file_start', - 'fileformat', - 'find_chapters', - 'fix_fimf_blockquotes', - 'force_login', - 'generate_cover_settings', - 'grayscale_images', - 'image_max_size', - 'include_images', - 'include_logpage', - 'logpage_at_end', - 'include_subject_tags', - 'include_titlepage', - 'include_tocpage', - 'is_adult', - 'join_string_authorHTML', - 'keep_style_attr', - 'keep_title_attr', - 'keep_html_attrs', - 'replace_tags_with_spans', - 'keep_summary_html', - 'logpage_end', - 'logpage_entries', - 'logpage_entry', - 'logpage_start', - 'logpage_update_end', - 'logpage_update_start', - 'make_directories', - 'make_firstimage_cover', - 'make_linkhtml_entries', - 'max_fg_sleep', - 'max_fg_sleep_at_downloads', - 'min_fg_sleep', - 'never_make_cover', - 'no_image_processing', - 'non_breaking_spaces', - 'nook_img_fix', - 'output_css', - 'output_filename', - 'output_filename_safepattern', - 'password', - 'post_process_cmd', - 'rating_titles', - 'remove_transparency', - 'replace_br_with_p', - 'replace_hr', - 'replace_metadata', - 'slow_down_sleep_time', - 'sort_ships', - 'sort_ships_splits', - 'strip_chapter_numbers', - 'strip_chapter_numeral', - 'strip_text_links', - 'centeredcat_to_characters', - 'pairingcat_to_characters_ships', - 'romancecat_to_characters_ships', - 'use_meta_keywords', - 'clean_chapter_titles', - 'description_in_chapter', - 'inject_chapter_title', - 'titlepage_end', - 'titlepage_entries', - 'titlepage_entry', - 'titlepage_no_title_entry', - 'titlepage_start', - 'titlepage_use_table', - 'titlepage_wide_entry', - 'tocpage_end', - 'tocpage_entry', - 'tocpage_start', - 'tweak_fg_sleep', - 'universe_as_series', - 'use_ssl_unverified_context', - 'user_agent', - 'username', - 'website_encodings', - 'wide_titlepage_entries', - 'windows_eol', - 'wrap_width', - 'zip_filename', - 'zip_output', - 'capitalize_forumtags', - 'continue_on_chapter_error', - 'chapter_title_error_mark', - 'minimum_threadmarks', - 'first_post_title', - 'always_include_first_post', - 'always_reload_first_chapter', - 'always_use_forumtags', - 'use_reader_mode', - 'author_avatar_cover', - 'reader_posts_per_page', - 'remove_spoilers', - 'legend_spoilers', - 'normalize_text_links', - 'internalize_text_links', - ]) - -# *known* entry keywords -- or rather regexps for them. -def get_valid_entry_keywords(): - return list(['%s_(label|format)', - '(default_value|include_in|join_string|keep_in_order)_%s',]) - -# Moved here for test_config. -def make_generate_cover_settings(param): - vlist = [] - for line in param.splitlines(): - if "=>" in line: - try: - (template,regexp,setting) = map( lambda x: x.strip(), line.split("=>") ) - re_compile(regexp,line) - vlist.append((template,regexp,setting)) - except Exception, e: - raise exceptions.PersonalIniFailed(e,line,param) - - return vlist - - -class Configuration(ConfigParser.SafeConfigParser): - - def __init__(self, sections, fileform, lightweight=False): - site = sections[-1] # first section is site DN. - ConfigParser.SafeConfigParser.__init__(self) - - self.lightweight = lightweight - self.use_pagecache = False # default to false for old adapters. - - self.linenos=dict() # key by section or section,key -> lineno - - ## [injected] section has even less priority than [defaults] - self.sectionslist = ['defaults','injected'] - - ## add other sections (not including site DN) after defaults, - ## but before site-specific. - for section in sections[:-1]: - self.addConfigSection(section) - - if site.startswith("www."): - sitewith = site - sitewithout = site.replace("www.","") - else: - sitewith = "www."+site - sitewithout = site - - self.addConfigSection(sitewith) - self.addConfigSection(sitewithout) - - if fileform: - self.addConfigSection(fileform) - ## add other sections:fileform (not including site DN) - ## after fileform, but before site-specific:fileform. - for section in sections[:-1]: - self.addConfigSection(section+":"+fileform) - self.addConfigSection(sitewith+":"+fileform) - self.addConfigSection(sitewithout+":"+fileform) - self.addConfigSection("overrides") - - self.listTypeEntries = get_valid_list_entries() - - self.validEntries = get_valid_entries() - - self.url_config_set = False - - self.override_sleep = None - self.cookiejar = self.get_empty_cookiejar() - self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor()) - - self.pagecache = self.get_empty_pagecache() - - - def addUrlConfigSection(self,url): - if not self.lightweight: # don't need when just checking for normalized URL. - # replace if already set once. - if self.url_config_set: - self.sectionslist[self.sectionslist.index('overrides')+1]=url - else: - self.addConfigSection(url,'overrides') - self.url_config_set=True - - def addConfigSection(self,section,before=None): - if section not in self.sectionslist: # don't add if already present. - if before is None: - self.sectionslist.insert(0,section) - else: - ## because sectionslist is hi-pri first, lo-pri last, - ## 'before' means after in the list. - self.sectionslist.insert(self.sectionslist.index(before)+1,section) - - def isListType(self,key): - return key in self.listTypeEntries or self.hasConfig("include_in_"+key) - - def isValidMetaEntry(self, key): - return key in self.getValidMetaList() - - def getValidMetaList(self): - return self.validEntries + self.getConfigList("extra_valid_entries") - - # used by adapters & writers, non-convention naming style - def hasConfig(self, key): - return self.has_config(self.sectionslist, key) - - def has_config(self, sections, key): - for section in sections: - try: - self.get(section,key) - #print("found %s in section [%s]"%(key,section)) - return True - except: - try: - self.get(section,key+"_filelist") - #print("found %s_filelist in section [%s]"%(key,section)) - return True - except: - try: - self.get(section,"add_to_"+key) - #print("found add_to_%s in section [%s]"%(key,section)) - return True - except: - pass - - return False - - # used by adapters & writers, non-convention naming style - def getConfig(self, key, default=""): - return self.get_config(self.sectionslist,key,default) - - def get_config(self, sections, key, default=""): - val = default - for section in sections: - try: - val = self.get(section,key+"_filelist") - vlist = re.split(r'(?#acolumn - # themes=>#bcolumn,a - # timeline=>#ccolumn,n - # "FanFiction"=>#collection - - if not allow_all_section: - def make_sections(x): - return '['+'], ['.join(x)+']' - if keyword in valdict: - (valsites,valformats,vals)=valdict[keyword] - if valsites != None and sitename != None and sitename not in valsites: - errors.append((self.get_lineno(section,keyword),"%s not valid in section [%s] -- only valid in %s sections."%(keyword,section,make_sections(valsites)))) - if valformats != None and formatname != None and formatname not in valformats: - errors.append((self.get_lineno(section,keyword),"%s not valid in section [%s] -- only valid in %s sections."%(keyword,section,make_sections(valformats)))) - if vals != None and value not in vals: - errors.append((self.get_lineno(section,keyword),"%s not a valid value for %s"%(value,keyword))) - - ## skipping output_filename_safepattern - ## regex--not used with plugin and this isn't - ## used with CLI/web yet. - - except Exception as e: - errors.append((self.get_lineno(section,keyword),"Error:%s in (%s:%s)"%(e,keyword,value))) - - return errors - -#### methods for fetching. Moved here from base_adapter when -#### *_filelist feature was added. - - @staticmethod - def get_empty_cookiejar(): - return cl.LWPCookieJar() - - @staticmethod - def get_empty_pagecache(): - return {} - - def get_cookiejar(self): - return self.cookiejar - - def set_cookiejar(self,cj): - self.cookiejar = cj - saveheaders = self.opener.addheaders - self.opener = u2.build_opener(u2.HTTPCookieProcessor(self.cookiejar),GZipProcessor()) - self.opener.addheaders = saveheaders - - def load_cookiejar(self,filename): - ''' - Needs to be called after adapter create, but before any fetchs - are done. Takes file *name*. - ''' - self.get_cookiejar().load(filename, ignore_discard=True, ignore_expires=True) - - def get_pagecache(self): - return self.pagecache - - def set_pagecache(self,d): - self.pagecache=d - - def _get_cachekey(self, url, parameters=None, headers=None): - keylist=[url] - if parameters != None: - keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(parameters.items()))) - if headers != None: - keylist.append('&'.join('{0}={1}'.format(key, val) for key, val in sorted(headers.items()))) - return '?'.join(keylist) - - def _has_cachekey(self,cachekey): - return self.use_pagecache and cachekey in self.get_pagecache() - - def _get_from_pagecache(self,cachekey): - if self.use_pagecache: - return self.get_pagecache().get(cachekey) - else: - return None - - def _set_to_pagecache(self,cachekey,data,redirectedurl): - if self.use_pagecache: - self.get_pagecache()[cachekey] = (data,redirectedurl) - - -## website encoding(s)--in theory, each website reports the character -## encoding they use for each page. In practice, some sites report it -## incorrectly. Each adapter has a default list, usually "utf8, -## Windows-1252" or "Windows-1252, utf8". The special value 'auto' -## will call chardet and use the encoding it reports if it has +90% -## confidence. 'auto' is not reliable. 1252 is a superset of -## iso-8859-1. Most sites that claim to be iso-8859-1 (and some that -## claim to be utf8) are really windows-1252. - def _decode(self,data): - decode = self.getConfigList('website_encodings', - default=["utf8", - "Windows-1252", - "iso-8859-1"]) - for code in decode: - try: - #print code - if code == "auto": - if not chardet: - logger.info("chardet not available, skipping 'auto' encoding") - continue - detected = chardet.detect(data) - #print detected - if detected['confidence'] > 0.9: - code=detected['encoding'] - else: - continue - return data.decode(code) - except: - logger.debug("code failed:"+code) - pass - logger.info("Could not decode story, tried:%s Stripping non-ASCII."%decode) - return "".join([x for x in data if ord(x) < 128]) - - # Assumes application/x-www-form-urlencoded. parameters, headers are dict()s - def _postUrl(self, url, - parameters={}, - headers={}, - extrasleep=None, - usecache=True): - ''' - When should cache be cleared or not used? logins... - - extrasleep is primarily for ffnet adapter which has extra - sleeps. Passed into fetchs so it can be bypassed when - cache hits. - ''' - cachekey=self._get_cachekey(url, parameters, headers) - if usecache and self._has_cachekey(cachekey): - logger.debug("#####################################\npagecache(POST) HIT: %s"%safe_url(cachekey)) - data,redirecturl = self._get_from_pagecache(cachekey) - return data - - logger.debug("#####################################\npagecache(POST) MISS: %s"%safe_url(cachekey)) - self.do_sleep(extrasleep) - - ## u2.Request assumes POST when data!=None. Also assumes data - ## is application/x-www-form-urlencoded. - if 'Content-type' not in headers: - headers['Content-type']='application/x-www-form-urlencoded' - if 'Accept' not in headers: - headers['Accept']="text/html,*/*" - req = u2.Request(url, - data=urllib.urlencode(parameters), - headers=headers) - - ## Specific UA because too many sites are blocking the default python UA. - logger.debug("user_agent:%s"%self.getConfig('user_agent')) - self.opener.addheaders = [('User-Agent', self.getConfig('user_agent')), - ('X-Clacks-Overhead','GNU Terry Pratchett')] - - data = self._decode(self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read()) - self._set_to_pagecache(cachekey,data,url) - return data - - def _fetchUrl(self, url, - parameters=None, - usecache=True, - extrasleep=None): - return self._fetchUrlOpened(url, - parameters, - usecache, - extrasleep)[0] - - def _fetchUrlRawOpened(self, url, - parameters=None, - extrasleep=None, - usecache=True): - ''' - When should cache be cleared or not used? logins... - - extrasleep is primarily for ffnet adapter which has extra - sleeps. Passed into fetchs so it can be bypassed when - cache hits. - ''' - cachekey=self._get_cachekey(url, parameters) - if usecache and self._has_cachekey(cachekey): - logger.debug("#####################################\npagecache(GET) HIT: %s"%safe_url(cachekey)) - data,redirecturl = self._get_from_pagecache(cachekey) - class FakeOpened: - def __init__(self,data,url): - self.data=data - self.url=url - def geturl(self): return self.url - def read(self): return self.data - return (data,FakeOpened(data,redirecturl)) - - logger.debug("#####################################\npagecache(GET) MISS: %s"%safe_url(cachekey)) - self.do_sleep(extrasleep) - - ## Specific UA because too many sites are blocking the default python UA. - self.opener.addheaders = [('User-Agent', self.getConfig('user_agent')), - ## starslibrary.net throws a "HTTP - ## Error 403: Bad Behavior" over the - ## X-Clacks-Overhead. Which is is - ## both against standard and rather - ## a dick-move. - #('X-Clacks-Overhead','GNU Terry Pratchett'), - ] - - if parameters != None: - opened = self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters),float(self.getConfig('connect_timeout',30.0))) - else: - opened = self.opener.open(url.replace(' ','%20'),None,float(self.getConfig('connect_timeout',30.0))) - data = opened.read() - self._set_to_pagecache(cachekey,data,opened.url) - - return (data,opened) - - def set_sleep(self,val): - logger.debug("\n===========\n set sleep time %s\n==========="%val) - self.override_sleep = val - - def do_sleep(self,extrasleep=None): - if extrasleep: - time.sleep(float(extrasleep)) - if self.override_sleep: - time.sleep(float(self.override_sleep)) - elif self.getConfig('slow_down_sleep_time'): - time.sleep(float(self.getConfig('slow_down_sleep_time'))) - - # parameters is a dict() - def _fetchUrlOpened(self, url, - parameters=None, - usecache=True, - extrasleep=None): - - excpt=None - for sleeptime in [0, 0.5, 4, 9]: - time.sleep(sleeptime) - try: - (data,opened)=self._fetchUrlRawOpened(url, - parameters=parameters, - usecache=usecache, - extrasleep=extrasleep) - return (self._decode(data),opened) - except u2.HTTPError, he: - excpt=he - if he.code in (403,404,410): - logger.warn("Caught an exception reading URL: %s Exception %s."%(unicode(safe_url(url)),unicode(he))) - break # break out on 404 - except Exception, e: - excpt=e - logger.warn("Caught an exception reading URL: %s sleeptime(%s) Exception %s."%(unicode(safe_url(url)),sleeptime,unicode(e))) - - logger.error("Giving up on %s" %safe_url(url)) - logger.debug(excpt, exc_info=True) - raise(excpt) - - -# extended by adapter, writer and story for ease of calling configuration. -class Configurable(object): - - def __init__(self, configuration): - self.configuration = configuration - - ## use_pagecache() is on adapters--not all have been updated - ## to deal with caching correctly - if hasattr(self, 'use_pagecache'): - self.configuration.use_pagecache = self.use_pagecache() - - def get_configuration(self): - return self.configuration - - def is_lightweight(self): - return self.configuration.lightweight - - def addUrlConfigSection(self,url): - self.configuration.addUrlConfigSection(url) - - def isListType(self,key): - return self.configuration.isListType(key) - - def isValidMetaEntry(self, key): - return self.configuration.isValidMetaEntry(key) - - def getValidMetaList(self): - return self.configuration.getValidMetaList() - - def hasConfig(self, key): - return self.configuration.hasConfig(key) - - def has_config(self, sections, key): - return self.configuration.has_config(sections, key) - - def getConfig(self, key, default=""): - return self.configuration.getConfig(key,default) - - def get_config(self, sections, key, default=""): - return self.configuration.get_config(sections,key,default) - - def getConfigList(self, key, default=[]): - return self.configuration.getConfigList(key,default) - - def get_config_list(self, sections, key): - return self.configuration.get_config_list(sections,key) - - def get_label(self, entry): - if self.hasConfig(entry+"_label"): - label=self.getConfig(entry+"_label") - elif entry in titleLabels: - label=titleLabels[entry] - else: - label=entry.title() - return label - - def do_sleep(self,extrasleep=None): - return self.configuration.do_sleep(extrasleep) - - def set_decode(self,decode): - self.configuration.decode = decode - - def _postUrl(self, url, - parameters={}, - headers={}, - extrasleep=None, - usecache=True): - return self.configuration._postUrl(url, - parameters, - headers, - extrasleep, - usecache) - - def _fetchUrlRawOpened(self, url, - parameters=None, - extrasleep=None, - usecache=True): - return self.configuration._fetchUrlRawOpened(url, - parameters, - extrasleep, - usecache) - - def _fetchUrlOpened(self, url, - parameters=None, - usecache=True, - extrasleep=None): - return self.configuration._fetchUrlOpened(url, - parameters, - usecache, - extrasleep) - - def _fetchUrl(self, url, - parameters=None, - usecache=True, - extrasleep=None): - return self._fetchUrlOpened(url, - parameters, - usecache, - extrasleep)[0] - def _fetchUrlRaw(self, url, - parameters=None, - extrasleep=None, - usecache=True): - return self._fetchUrlRawOpened(url, - parameters, - extrasleep, - usecache)[0] - - -# .? for AO3's ']' in param names. -safe_url_re = re.compile(r'(?P(password|name|login).?=)[^&]*(?P&|$)',flags=re.MULTILINE) -def safe_url(url): - # return url with password attr (if present) obscured. - return re.sub(safe_url_re,r'\gXXXXXXXX\g',url)