Adding browsercache_sqldb for Yet Another caching scheme in Chrome. #1341

This commit is contained in:
Jim Miller 2026-05-06 13:22:22 -05:00
parent b41a633821
commit cb77b12754
2 changed files with 113 additions and 1 deletions

View file

@ -22,6 +22,7 @@ from .base_browsercache import BaseBrowserCache, CACHE_DIR_CONFIG
from .browsercache_simple import SimpleCache
from .browsercache_blockfile import BlockfileCache
from .browsercache_firefox2 import FirefoxCache2
from .browsercache_sqldb import SqldbCache
import logging
logger = logging.getLogger(__name__)
@ -34,7 +35,7 @@ class BrowserCache(object):
def __init__(self, site, getConfig_fn, getConfigList_fn):
"""Constructor for BrowserCache"""
# import of child classes have to be inside the def to avoid circular import error
for browser_cache_class in [SimpleCache, BlockfileCache, FirefoxCache2]:
for browser_cache_class in [SimpleCache, BlockfileCache, FirefoxCache2, SqldbCache]:
self.browser_cache_impl = browser_cache_class.new_browser_cache(site,
getConfig_fn,
getConfigList_fn)

View file

@ -0,0 +1,111 @@
# -*- coding: utf-8 -*-
# Copyright 2026 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import os
import apsw
import ctypes
from .base_chromium import BaseChromiumCache
from .chromagnon import SuperFastHash
import logging
logger = logging.getLogger(__name__)
class SqldbCache(BaseChromiumCache):
"""Class to access data stream in Chrome Disk Sqldb Cache format cache files"""
def __init__(self, *args, **kargs):
"""Constructor for SqldbCache"""
super(SqldbCache,self).__init__(*args, **kargs)
logger.debug("Using SqldbCache")
# def scan_cache_keys(self):
## XXX will impl a scan if and when needed. It's a lot easier
## to peek inside an sqlite
@staticmethod
def is_cache_dir(cache_dir):
"""Return True only if a directory is a valid Cache for this class"""
if not os.path.isdir(cache_dir):
logger.debug("Cache dir not found")
return False
index_path = os.path.join(cache_dir, "index")
if not os.path.isfile(index_path):
logger.debug("index file not found")
return False
sqldb0_path = os.path.join(cache_dir, "sqldb0")
if not os.path.isfile(sqldb0_path):
logger.debug("sqldb0 file not found")
return False
## XXX check schema of db?
return True
## XXX others uses share_open() - will sqlite open work concurrently?
def get_data_key_impl(self, url, key):
"""
returns location, entry age(unix epoch), content-encoding and
raw(compressed) data
"""
location, age, encoding, data = '', None, None, None
qstr = 'SELECT last_used, head, blob FROM resources as r join blobs as b on b.res_id=r.res_id where cache_key_hash=?'
cache_key_hash = _key_hash(key)
logger.debug(" key:%s"%key)
logger.debug("cache_key_hash:%s"%cache_key_hash)
## XXX worth optimizing to keep sql conn open?
with apsw.Connection(os.path.join(self.cache_dir, "sqldb0")) as db:
for last, head, blob in db.execute(qstr,[cache_key_hash]):
row_age = self.make_age(last)
if age and row_age < age:
logger.debug("skipping an older row for same hash")
break
age = row_age
logger.debug("age from last_used:%s"%age)
## cheesy way to pull out the http headers, inspired
## by equal cheese in chromagnon/cacheData.py. Only
## actually care about location &content-encoding,
## ignore the rest.
head = head[head.index(b'HTTP'):]
head = head[:head.index(b'\x00\x00')]
# logger.debug(head)
for line in head.split(b'\0'):
logger.debug(line)
if b'content-encoding' in line.lower():
encoding = line.split(b':')[1].strip().lower()
logger.debug("encoding from header:%s"%encoding)
if b'location' in line.lower():
location = b':'.join(line.split(b':')[1:]).strip()
logger.debug("location from header:%s"%encoding)
## XXX might need entry age from header, too.
## Hoping db last_used is equiv.
data = blob
if data:
return (location, age, encoding, data)
else:
return None
## calculate SuperFashHash, but the sql saved it signed.
def _key_hash(key):
unsigned_hash = SuperFastHash.superFastHash(key)
number = unsigned_hash & 0xFFFFFFFF
return ctypes.c_int32(number).value