mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Clean up chromagnon code to only what we're using--rest probably wasn't updated completely anyway.
This commit is contained in:
parent
17cd3f3d04
commit
9819e0b214
8 changed files with 34 additions and 787 deletions
|
|
@ -41,33 +41,32 @@ import sys
|
|||
import re
|
||||
import time
|
||||
|
||||
def do_cprofile(func):
|
||||
def profiled_func(*args, **kwargs):
|
||||
t=0
|
||||
try:
|
||||
t = time.time()
|
||||
result = func(*args, **kwargs)
|
||||
t = time.time() - t
|
||||
return result
|
||||
finally:
|
||||
print("time:%s"%t)
|
||||
return profiled_func
|
||||
# def do_cprofile(func):
|
||||
# def profiled_func(*args, **kwargs):
|
||||
# t=0
|
||||
# try:
|
||||
# t = time.time()
|
||||
# result = func(*args, **kwargs)
|
||||
# t = time.time() - t
|
||||
# return result
|
||||
# finally:
|
||||
# print("time:%s"%t)
|
||||
# return profiled_func
|
||||
|
||||
try:
|
||||
from brotli import decompress
|
||||
@do_cprofile
|
||||
# @do_cprofile
|
||||
def brotli_decompress(inbuf):
|
||||
return decompress(inbuf)
|
||||
except:
|
||||
# Calibre doesn't include brotli, so use packaged brotlipython
|
||||
# which is waaaay slower, but pure python.
|
||||
from brotlipython import brotlidec
|
||||
@do_cprofile
|
||||
# @do_cprofile
|
||||
def brotli_decompress(inbuf):
|
||||
# wants the output, too, but returns it
|
||||
return brotlidec(inbuf,[])
|
||||
|
||||
from . import csvOutput
|
||||
from . import SuperFastHash
|
||||
|
||||
from .cacheAddress import CacheAddress
|
||||
|
|
@ -76,47 +75,23 @@ from .cacheData import CacheData
|
|||
from .cacheEntry import CacheEntry
|
||||
from six.moves import range
|
||||
|
||||
class ChromeCache(object):
|
||||
def __init__(self,path):
|
||||
self.path = os.path.abspath(path)
|
||||
self.cacheBlock = CacheBlock(os.path.join(path, "index"))
|
||||
|
||||
def parse(path, urls=None):
|
||||
"""
|
||||
Reads the whole cache and store the collected data in a table
|
||||
or find out if the given list of urls is in the cache. If yes it
|
||||
return a list of the corresponding entries.
|
||||
"""
|
||||
# Verifying that the path end with / (What happen on windows?)
|
||||
path = os.path.abspath(path)
|
||||
cacheBlock = CacheBlock(os.path.join(path, "index"))
|
||||
# Checking type
|
||||
if self.cacheBlock.type != CacheBlock.INDEX:
|
||||
raise Exception("Invalid Index File")
|
||||
|
||||
# Checking type
|
||||
if cacheBlock.type != CacheBlock.INDEX:
|
||||
raise Exception("Invalid Index File")
|
||||
|
||||
index = open(os.path.join(path, "index"), 'rb')
|
||||
|
||||
# Skipping Header
|
||||
index.seek(92*4)
|
||||
|
||||
cache = []
|
||||
# If no url is specified, parse the whole cache
|
||||
if urls == None:
|
||||
for key in range(cacheBlock.tableSize):
|
||||
raw = struct.unpack('I', index.read(4))[0]
|
||||
if raw != 0:
|
||||
entry = CacheEntry(CacheAddress(raw, path=path))
|
||||
# Checking if there is a next item in the bucket because
|
||||
# such entries are not stored in the Index File so they will
|
||||
# be ignored during iterative lookup in the hash table
|
||||
while entry.next != 0:
|
||||
cache.append(entry)
|
||||
entry = CacheEntry(CacheAddress(entry.next, path=path))
|
||||
cache.append(entry)
|
||||
else:
|
||||
# Find the entry for each url
|
||||
for url in urls:
|
||||
url = bytes(url,'utf8')
|
||||
# Compute the key and seeking to it
|
||||
hash = SuperFastHash.superFastHash(url)
|
||||
key = hash & (cacheBlock.tableSize - 1)
|
||||
def get_cache_entry(self,url):
|
||||
url = bytes(url,'utf8')
|
||||
# Compute the key and seeking to it
|
||||
# print("url:%s"%url)
|
||||
hash = SuperFastHash.superFastHash(url)
|
||||
# print("superFastHash:%s"%hash)
|
||||
key = hash & (self.cacheBlock.tableSize - 1)
|
||||
with open(os.path.join(self.path, "index"), 'rb') as index:
|
||||
index.seek(92*4 + key*4)
|
||||
|
||||
addr = struct.unpack('I', index.read(4))[0]
|
||||
|
|
@ -126,29 +101,16 @@ def parse(path, urls=None):
|
|||
|
||||
# Follow the chained list in the bucket
|
||||
else:
|
||||
entry = CacheEntry(CacheAddress(addr, path=path))
|
||||
entry = CacheEntry(CacheAddress(addr, path=self.path))
|
||||
while entry.hash != hash and entry.next != 0:
|
||||
entry = CacheEntry(CacheAddress(entry.next, path=path))
|
||||
entry = CacheEntry(CacheAddress(entry.next, path=self.path))
|
||||
if entry.hash == hash:
|
||||
cache.append(entry)
|
||||
index.close()
|
||||
return cache
|
||||
|
||||
class ChromeCache(object):
|
||||
def __init__(self,path):
|
||||
self.cache = parse(path)
|
||||
self.hash_cache = {}
|
||||
# t = time.time()
|
||||
for entry in self.cache:
|
||||
key = entry.keyToStr()
|
||||
if 'fanfiction.net' not in key:
|
||||
continue
|
||||
self.hash_cache[key] = entry
|
||||
# print("======:%s"%(time.time()-t))
|
||||
return entry
|
||||
|
||||
def get_cached_file(self,url):
|
||||
if url in self.hash_cache:
|
||||
entry = self.hash_cache[url]
|
||||
entry = self.get_cache_entry(url)
|
||||
if entry:
|
||||
# entry = self.hash_cache[url]
|
||||
for i in range(len(entry.data)):
|
||||
if entry.data[i].type == CacheData.UNKNOWN:
|
||||
# Extracting data into a file
|
||||
|
|
@ -163,157 +125,3 @@ class ChromeCache(object):
|
|||
data = brotli_decompress(data)
|
||||
return data
|
||||
return None
|
||||
|
||||
def exportToHTML(cache, outpath):
|
||||
"""
|
||||
Export the cache in html
|
||||
"""
|
||||
|
||||
# Checking that the directory exists and is writable
|
||||
if not os.path.exists(outpath):
|
||||
os.makedirs(outpath)
|
||||
outpath = os.path.abspath(outpath)
|
||||
|
||||
index = open(os.path.join(outpath,"index.html"), 'w')
|
||||
index.write("<UL>")
|
||||
|
||||
for entry in cache:
|
||||
# Adding a link in the index
|
||||
if entry.keyLength > 100:
|
||||
entry_name = entry.keyToStr()[:100] + "..."
|
||||
else:
|
||||
entry_name = entry.keyToStr()
|
||||
index.write('<LI><a href="%08x.html">%s</a></LI>'%(entry.hash, entry_name))
|
||||
# We handle the special case where entry_name ends with a slash
|
||||
page_basename = entry_name.split('/')[-2] if entry_name.endswith('/') else entry_name.split('/')[-1]
|
||||
|
||||
# Creating the entry page
|
||||
page = open(os.path.join(outpath,"%08x.html"%entry.hash), 'w')
|
||||
page.write("""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
</head>
|
||||
<body>""")
|
||||
|
||||
# Details of the entry
|
||||
page.write("<b>Hash</b>: 0x%08x<br />"%entry.hash)
|
||||
page.write("<b>Usage Counter</b>: %d<br />"%entry.usageCounter)
|
||||
page.write("<b>Reuse Counter</b>: %d<br />"%entry.reuseCounter)
|
||||
page.write("<b>Creation Time</b>: %s<br />"%entry.creationTime)
|
||||
page.write("<b>Key</b>: %s<br>"%entry.keyToStr())
|
||||
page.write("<b>State</b>: %s<br>"%CacheEntry.STATE[entry.state])
|
||||
|
||||
page.write("<hr>")
|
||||
## entry.data normally 2 or 1
|
||||
## 2 for headers and data, 1 for headers only.
|
||||
if len(entry.data) == 0:
|
||||
page.write("No data associated with this entry :-(")
|
||||
for i in range(len(entry.data)):
|
||||
if entry.data[i].type == CacheData.UNKNOWN:
|
||||
# Extracting data into a file
|
||||
name = hex(entry.hash) + "_" + str(i)
|
||||
entry.data[i].save(os.path.join(outpath,name))
|
||||
|
||||
# print("content-encoding:%s"%entry.httpHeader.headers.get(b'content-encoding',''))
|
||||
if entry.httpHeader != None and \
|
||||
b'content-encoding' in entry.httpHeader.headers:
|
||||
if entry.httpHeader.headers[b'content-encoding'] == b"gzip":
|
||||
# XXX Highly inefficient !!!!!
|
||||
try:
|
||||
input = gzip.open(os.path.join(outpath, name), 'rb')
|
||||
output = open(os.path.join(outpath, name + "u"), 'wb')
|
||||
output.write(input.read())
|
||||
input.close()
|
||||
output.close()
|
||||
page.write('<a href="%su">%s</a>'%(name, page_basename))
|
||||
# print("gunzip'ed:%s"%name)
|
||||
except IOError:
|
||||
page.write("Something wrong happened while unzipping")
|
||||
elif entry.httpHeader.headers[b'content-encoding'] == b"br":
|
||||
try:
|
||||
with open(os.path.join(outpath,name), 'rb') as input:
|
||||
with open(os.path.join(outpath,name + "u"), 'wb') as output:
|
||||
output.write(brotli.decompress(input.read()))
|
||||
page.write('<a href="%su">%s</a>'%(name, page_basename))
|
||||
# print("unbrotli'ed:%s"%name)
|
||||
except IOError:
|
||||
page.write("Something wrong happened while unzipping")
|
||||
else:
|
||||
page.write('<a href="%s">%s</a>'%(name ,
|
||||
entry.keyToStr().split('/')[-1]))
|
||||
|
||||
|
||||
# If it is a picture, display it
|
||||
if entry.httpHeader != None:
|
||||
if b'content-type' in entry.httpHeader.headers and\
|
||||
b"image" in entry.httpHeader.headers[b'content-type']:
|
||||
page.write('<br /><img src="%s">'%(name))
|
||||
# HTTP Header
|
||||
else:
|
||||
page.write("<u>HTTP Header</u><br />")
|
||||
for key, value in entry.data[i].headers.items():
|
||||
page.write("<b>%s</b>: %s<br />"%(key, value))
|
||||
page.write("<hr>")
|
||||
page.write("</body></html>")
|
||||
page.close()
|
||||
|
||||
index.write("</UL>")
|
||||
index.close()
|
||||
|
||||
def exportTol2t(cache):
|
||||
"""
|
||||
Export the cache in CSV log2timeline compliant format
|
||||
"""
|
||||
|
||||
output = []
|
||||
output.append(["date",
|
||||
"time",
|
||||
"timezone",
|
||||
"MACB",
|
||||
"source",
|
||||
"sourcetype",
|
||||
"type",
|
||||
"user",
|
||||
"host",
|
||||
"short",
|
||||
"desc",
|
||||
"version",
|
||||
"filename",
|
||||
"inode",
|
||||
"notes",
|
||||
"format",
|
||||
"extra"])
|
||||
|
||||
for entry in cache:
|
||||
date = entry.creationTime.date().strftime("%m/%d/%Y")
|
||||
time = entry.creationTime.time()
|
||||
# TODO get timezone
|
||||
timezone = 0
|
||||
short = entry.keyToStr()
|
||||
descr = "Hash: 0x%08x" % entry.hash
|
||||
descr += " Usage Counter: %d" % entry.usageCounter
|
||||
if entry.httpHeader != None:
|
||||
if 'content-type' in entry.httpHeader.headers:
|
||||
descr += " MIME: %s" % entry.httpHeader.headers['content-type']
|
||||
|
||||
output.append([date,
|
||||
time,
|
||||
timezone,
|
||||
"MACB",
|
||||
"WEBCACHE",
|
||||
"Chrome Cache",
|
||||
"Cache Entry",
|
||||
"-",
|
||||
"-",
|
||||
short,
|
||||
descr,
|
||||
"2",
|
||||
"-",
|
||||
"-",
|
||||
"-",
|
||||
"-",
|
||||
"-",
|
||||
])
|
||||
|
||||
csvOutput.csvOutput(output)
|
||||
|
|
|
|||
|
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the Chromagon Project nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
Classical Output Module
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
|
||||
def classicalOutput(queryResult, separator="\t"):
|
||||
"""
|
||||
Display the data separated by the specified separator
|
||||
"""
|
||||
|
||||
for line in queryResult:
|
||||
for element in line:
|
||||
sys.stdout.write(element)
|
||||
sys.stdout.write(separator)
|
||||
sys.stdout.write('\n')
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the Chromagon Project nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
Column Output Module
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
from six.moves import range
|
||||
def columnOutput(queryResult, separator=' '):
|
||||
"""
|
||||
Display the data in columns
|
||||
"""
|
||||
if len(queryResult) == 0:
|
||||
return
|
||||
|
||||
# Finding width of columns
|
||||
size = [max([len(str(line[i])) for line in queryResult])
|
||||
for i in range(len(queryResult[0]))]
|
||||
# Generating format string (without last separator)
|
||||
string = (''.join(["%%-%ds%s" % (x, separator) for x in size]))\
|
||||
[:-len(separator)]
|
||||
for line in queryResult:
|
||||
print(string % tuple(line))
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the Chromagon Project nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
CSV Output Module
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
import csv
|
||||
import sys
|
||||
|
||||
def csvOutput(queryResult, separator=',', quote='"'):
|
||||
"""
|
||||
Display the data according to csv format
|
||||
"""
|
||||
csvWriter = csv.writer(sys.stdout, delimiter=separator, quotechar=quote,
|
||||
quoting=csv.QUOTE_MINIMAL)
|
||||
for line in queryResult:
|
||||
csvWriter.writerow(line)
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the Chromagon Project nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
Parse the Chrome Download Table History File
|
||||
Its a SQLite3 table
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import datetime
|
||||
import sqlite3
|
||||
import sys
|
||||
import six
|
||||
|
||||
def parse(filename, urlLength):
|
||||
"""
|
||||
filename: path to the history file
|
||||
urlLength: maximum url length to display
|
||||
"""
|
||||
|
||||
# Connecting to the DB
|
||||
try:
|
||||
history = sqlite3.connect(filename)
|
||||
except sqlite3.Error as error:
|
||||
print("==> Error while opening the history file !")
|
||||
print("==> Details :", error.message)
|
||||
sys.exit("==> Exiting...")
|
||||
|
||||
# Retrieving all useful data
|
||||
result = history.execute("SELECT id, \
|
||||
full_path, \
|
||||
url, \
|
||||
start_time, \
|
||||
received_bytes, \
|
||||
total_bytes, \
|
||||
state \
|
||||
FROM downloads;")
|
||||
|
||||
output = []
|
||||
for line in result:
|
||||
output.append(DownloadEntry(line, urlLength))
|
||||
return output
|
||||
|
||||
class DownloadEntry(object):
|
||||
"""Object to store download entries"""
|
||||
COLUMN_STR = {'st': "startTime",
|
||||
'p': "path",
|
||||
'u': "url",
|
||||
'rb': "receivedBytes",
|
||||
'tb': "totalBytes",
|
||||
'pt': "percentReceived",
|
||||
's': "state"}
|
||||
STATE_STR = ["In Progress",
|
||||
"Complete",
|
||||
"Cancelled",
|
||||
"Removing",
|
||||
"Interrupted"]
|
||||
|
||||
def __init__(self, item, urlLength):
|
||||
"""Parse raw input"""
|
||||
self.path = item[1]
|
||||
if len(item[2]) > urlLength and urlLength > 0:
|
||||
self.url = item[2][0:urlLength - 3] + "..."
|
||||
else:
|
||||
self.url = item[2]
|
||||
self.startTime = datetime.datetime(1601, 1, 1) + \
|
||||
datetime.timedelta(microseconds=\
|
||||
item[3])
|
||||
self.receivedBytes = item[4]
|
||||
self.totalBytes = item[5]
|
||||
self.state = DownloadEntry.STATE_STR[item[6]]
|
||||
if int(item[5]) == 0:
|
||||
self.percentReceived = "0%"
|
||||
else:
|
||||
self.percentReceived = "%d%%" % \
|
||||
int(float(item[4])/float(item[5])*100)
|
||||
|
||||
def columnToStr(self, column):
|
||||
"""Returns column content specified by argument"""
|
||||
return six.text_type(self.__getattribute__(DownloadEntry.COLUMN_STR[column]))
|
||||
|
|
@ -1,178 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the Chromagon Project nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
Parse the Chrome History File
|
||||
Its a SQLite3 file
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import datetime
|
||||
import re
|
||||
import sqlite3
|
||||
import sys
|
||||
|
||||
from . import cacheParse
|
||||
import six
|
||||
|
||||
def parse(filename, start, end, checkCache, cachePath, urlLength):
|
||||
"""
|
||||
filename: path to the history file
|
||||
start: beginning of the time window
|
||||
end: end of the time window
|
||||
checkCache: check if each page in the history is in the cache
|
||||
cachePath: path to cache directory
|
||||
"""
|
||||
|
||||
# Connecting to the DB
|
||||
try:
|
||||
history = sqlite3.connect(filename)
|
||||
except sqlite3.Error as error:
|
||||
print("==> Error while opening the history file !")
|
||||
print("==> Details :", error.message)
|
||||
sys.exit("==> Exiting...")
|
||||
|
||||
reference = datetime.datetime(1601, 1, 1)
|
||||
|
||||
# Retrieving all useful data
|
||||
result = history.execute("SELECT visits.visit_time, \
|
||||
visits.from_visit, \
|
||||
visits.transition, \
|
||||
urls.url, \
|
||||
urls.title, \
|
||||
urls.visit_count, \
|
||||
urls.typed_count, \
|
||||
urls.last_visit_time \
|
||||
FROM urls,visits \
|
||||
WHERE urls.id=visits.url\
|
||||
AND visits.visit_time>%d\
|
||||
AND visits.visit_time<%d\
|
||||
ORDER BY visits.visit_time;"%\
|
||||
(int((start-reference).total_seconds()*1000000),\
|
||||
int((end-reference).total_seconds()*1000000)))\
|
||||
|
||||
# Parsing cache
|
||||
cache = None
|
||||
if checkCache:
|
||||
cache = cacheParse.parse(cachePath)
|
||||
|
||||
output = []
|
||||
for line in result:
|
||||
output.append(HistoryEntry(line, cache, urlLength))
|
||||
return output
|
||||
|
||||
class Transition():
|
||||
"""Object representing transition between history pages"""
|
||||
|
||||
CORE_STRING = ["Link",\
|
||||
"Typed",\
|
||||
"Auto Bookmark",\
|
||||
"Auto Subframe",\
|
||||
"Manual Subframe",\
|
||||
"Generated",\
|
||||
"Start Page",\
|
||||
"Form Submit",\
|
||||
"Reload",\
|
||||
"Keyword",\
|
||||
"Keywork Generated"]
|
||||
QUALIFIER_STRING = [(0x01000000, "Forward or Back Button"),
|
||||
(0x02000000, "Address Bar"),
|
||||
(0x04000000, "Home Page"),
|
||||
(0x10000000, "Beginning of Chain"),
|
||||
(0x20000000, "End of Chain"),
|
||||
(0x40000000, "Client Redirection"),
|
||||
(0x80000000, "Server Redirection")]
|
||||
|
||||
def __init__(self, transition):
|
||||
"""
|
||||
Parsing the transtion according to
|
||||
content/common/page_transition_types.h
|
||||
"""
|
||||
self.core = transition & 0xFF
|
||||
self.qualifier = transition & 0xFFFFFF00
|
||||
|
||||
def __str__(self):
|
||||
string = Transition.CORE_STRING[self.core]
|
||||
for mask, description in Transition.QUALIFIER_STRING:
|
||||
if self.qualifier & mask != 0:
|
||||
string += ", %s"%description
|
||||
return string
|
||||
|
||||
class HistoryEntry(object):
|
||||
"""Object to store database entries"""
|
||||
COLUMN_STR = {'vt': "visitTime",
|
||||
'fv': "fromVisit",
|
||||
'tr': "transition",
|
||||
'u': "url",
|
||||
'tl': "title",
|
||||
'vc': "visitCount",
|
||||
'tc': "typedCount",
|
||||
'lv': "lastVisitTime",
|
||||
'cc': "inCache"}
|
||||
|
||||
def __init__(self, item, cache, urlLength):
|
||||
"""Parse raw input"""
|
||||
self.visitTime = datetime.datetime(1601, 1, 1) + \
|
||||
datetime.timedelta(microseconds=\
|
||||
item[0])
|
||||
self.fromVisit = item[1]
|
||||
self.transition = Transition(item[2])
|
||||
if len(item[3]) > urlLength and urlLength > 0:
|
||||
self.url = item[3][0:urlLength - 3] + "..."
|
||||
else:
|
||||
self.url = item[3]
|
||||
self.title = item[4]
|
||||
self.visitCount = item[5]
|
||||
self.typedCount = item[6]
|
||||
self.lastVisitTime = datetime.datetime(1601, 1, 1) + \
|
||||
datetime.timedelta(microseconds=\
|
||||
item[7])
|
||||
|
||||
# Searching in the cache if there is a copy of the page
|
||||
# TODO use a hash table to search instead of heavy exhaustive search
|
||||
self.inCache = False
|
||||
if cache != None:
|
||||
for item in cache:
|
||||
if item.keyToStr() == self.url:
|
||||
self.inCache = True
|
||||
break
|
||||
|
||||
def toStr(self):
|
||||
return [six.text_type(self.visitTime),\
|
||||
six.text_type(self.fromVisit),\
|
||||
six.text_type(self.transition),\
|
||||
six.text_type(self.url),\
|
||||
six.text_type(self.title),\
|
||||
six.text_type(self.visitCount),\
|
||||
six.text_type(self.typedCount),\
|
||||
six.text_type(self.lastVisitTime)]
|
||||
|
||||
def columnToStr(self, column):
|
||||
"""Returns column content specified by argument"""
|
||||
return six.text_type(self.__getattribute__(HistoryEntry.COLUMN_STR[column]))
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the Chromagon Project nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
JSON Output Module
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import json
|
||||
|
||||
def jsonOutput(queryResult, separator=''):
|
||||
"""
|
||||
Display the data separated in JSON
|
||||
"""
|
||||
|
||||
print(json.JSONEncoder().encode(queryResult))
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the Chromagon Project nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
Parse the Chrome Visited Links
|
||||
Reverse engineered from
|
||||
chrome/common/visitedlink_common.*
|
||||
chrome/browser/visitedlink/visitedlink_*
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
import md5
|
||||
import struct
|
||||
import sys
|
||||
from six.moves import range
|
||||
|
||||
VISITED_LINKS_MAGIC = 0x6b6e4c56;
|
||||
|
||||
def isVisited(path, urls):
|
||||
"""
|
||||
Return the list of urls given in parameter with a boolean information
|
||||
about its presence in the given visited links file
|
||||
"""
|
||||
output = []
|
||||
|
||||
f = open(path, 'rb')
|
||||
|
||||
# Checking file type
|
||||
magic = struct.unpack('I', f.read(4))[0]
|
||||
if magic != VISITED_LINKS_MAGIC:
|
||||
raise Exception("Invalid file")
|
||||
|
||||
# Reading header values
|
||||
version = struct.unpack('I', f.read(4))[0]
|
||||
length = struct.unpack('I', f.read(4))[0]
|
||||
usedItems = struct.unpack('I', f.read(4))[0]
|
||||
|
||||
# Reading salt
|
||||
salt = ""
|
||||
for dummy in range(8):
|
||||
salt += struct.unpack('c', f.read(1))[0]
|
||||
|
||||
for url in urls:
|
||||
fingerprint = md5.new()
|
||||
fingerprint.update(salt)
|
||||
fingerprint.update(url)
|
||||
digest = fingerprint.hexdigest()
|
||||
|
||||
# Inverting the result
|
||||
# Why Chrome MD5 computation gives a reverse digest ?
|
||||
fingerprint = 0
|
||||
for i in range(0, 16, 2):
|
||||
fingerprint += int(digest[i:i+2], 16) << (i/2)*8
|
||||
key = fingerprint % length
|
||||
|
||||
# The hash table uses open addressing
|
||||
f.seek(key*8 + 24, 0)
|
||||
while True:
|
||||
finger = struct.unpack('q', f.read(8))[0]
|
||||
if finger == 0:
|
||||
output.append((url, False))
|
||||
break
|
||||
if finger == fingerprint:
|
||||
output.append((url, True))
|
||||
break
|
||||
if f.tell() >= length*8 + 24:
|
||||
f.seek(24)
|
||||
if f.tell() == key*8 + 24:
|
||||
output.append((url, False))
|
||||
break
|
||||
f.close()
|
||||
return output
|
||||
Loading…
Reference in a new issue