Tweak chromagnon/cacheParse.py

This commit is contained in:
Jim Miller 2021-01-10 19:09:05 -06:00
parent 0be96953af
commit 9d4f587e23

View file

@ -40,6 +40,7 @@ import struct
import sys
import re
import brotli
import time
from . import csvOutput
from . import SuperFastHash
@ -112,12 +113,16 @@ class ChromeCache(object):
def __init__(self,path):
self.cache = parse(path)
self.hash_cache = {}
# t = time.time()
for entry in self.cache:
key = entry.keyToStr()
if 'fanfiction.net' not in key:
continue
self.hash_cache[key] = entry
normkey = re.sub(r'^(https://www.fanfiction.net/s/\d+/\d+/).+$',r'\1',key)
## either overwrites (no harm), or adds new.
self.hash_cache[normkey] = entry
# print("======:%s"%(time.time()-t))
def get_cached_file(self,url):
if url in self.hash_cache: