import os import struct import hashlib import glob import time import re import traceback from . import BaseBrowserCache, BrowserCacheException from ..six import ensure_binary, ensure_text from .share_open import share_open import logging logger = logging.getLogger(__name__) class SimpleCacheException(BrowserCacheException): pass SIMPLE_EOF = struct.Struct(' 0.0 : file_comp_time = time.time() - (self.age_limit*3600) else: file_comp_time = 0 self.count=0 if hasattr(os, 'scandir'): logger.debug("using scandir") for entry in os.scandir(self.cache_dir): self.do_cache_key_entry(entry.path,entry.stat(),file_comp_time) else: logger.debug("using listdir") for en_fl in os.listdir(self.cache_dir): en_path = os.path.join(self.cache_dir,en_fl) self.do_cache_key_entry(en_path,os.stat(en_path),file_comp_time) logger.debug("Read %s entries"%self.count) def do_cache_key_entry(self,path,stats,file_comp_time): ## there are some other files in simple cache dir. # logger.debug("%s: %s > %s"%(os.path.basename(path),stats.st_mtime,file_comp_time)) if( re.match(r'^[0-9a-fA-F]{16}_[0-9]+$',os.path.basename(path)) and stats.st_mtime > file_comp_time ): try: (cache_url,created) = _get_entry_file_created(path) if cache_url: self.add_key_mapping(cache_url,path,created) self.count+=1 except Exception as e: logger.warning("Cache file %s failed to load, skipping."%path) logger.debug(traceback.format_exc()) # key == filename for simple cache def get_data_key(self, key): headers = _get_headers(key) encoding = headers.get('content-encoding', '').strip().lower() try: return self.decompress(encoding,_get_data_from_entry_file(key)) except: # logger.debug("\n\n%s\n\n"%key) raise # def get_data_url(self, url): # """ Return decoded data for specified key (a URL string) or None """ # glob_pattern = os.path.join(self.cache_dir, _key_hash(url) + '_?') # # because hash collisions are so rare, this will usually only find zero or one file, # # so there is no real savings to be had by reading the index file instead of going straight to the entry files # url = ensure_text(url) # for en_fl in glob.glob(glob_pattern): # try: # file_key = _validate_entry_file(en_fl) # if file_key == url: # return self.get_data_key(en_fl) # except SimpleCacheException: # pass # return None # Here come the utility functions for the class import codecs def _key_hash(key): """Compute hash of key as used to generate name of cache entry file""" # py2 lacks convenient .hex() method on bytes key = ensure_binary(key) return ensure_text(codecs.encode(hashlib.sha1(key).digest()[7::-1],'hex')) # return hashlib.sha1(key).digest()[7::-1].hex() def _get_entry_file_created(path): with share_open(path, "rb") as entry_file: key = _read_entry_file(path,entry_file) (info_size, flags, request_time, response_time, header_size) = _read_meta_headers(entry_file) # logger.debug("\nkey:%s\n request_time:%s\nresponse_time:%s"%(key,request_time, response_time)) return (key, response_time) def _validate_entry_file(path): with share_open(path, "rb") as entry_file: return _read_entry_file(path,entry_file) def _read_entry_file(path,entry_file): """Validate that a file is a cache entry file, return the URL (key) if valid""" # read from path into SimpleFileHeader, use key_length field to determine size of key, return key as byte string shformat = struct.Struct('