Clean up chromagnon code to only what we're using--rest probably wasn't updated completely anyway.

This commit is contained in:
Jim Miller 2021-01-21 17:57:28 -06:00
parent 17cd3f3d04
commit 9819e0b214
8 changed files with 34 additions and 787 deletions

View file

@ -41,33 +41,32 @@ import sys
import re
import time
def do_cprofile(func):
def profiled_func(*args, **kwargs):
t=0
try:
t = time.time()
result = func(*args, **kwargs)
t = time.time() - t
return result
finally:
print("time:%s"%t)
return profiled_func
# def do_cprofile(func):
# def profiled_func(*args, **kwargs):
# t=0
# try:
# t = time.time()
# result = func(*args, **kwargs)
# t = time.time() - t
# return result
# finally:
# print("time:%s"%t)
# return profiled_func
try:
from brotli import decompress
@do_cprofile
# @do_cprofile
def brotli_decompress(inbuf):
return decompress(inbuf)
except:
# Calibre doesn't include brotli, so use packaged brotlipython
# which is waaaay slower, but pure python.
from brotlipython import brotlidec
@do_cprofile
# @do_cprofile
def brotli_decompress(inbuf):
# wants the output, too, but returns it
return brotlidec(inbuf,[])
from . import csvOutput
from . import SuperFastHash
from .cacheAddress import CacheAddress
@ -76,47 +75,23 @@ from .cacheData import CacheData
from .cacheEntry import CacheEntry
from six.moves import range
class ChromeCache(object):
def __init__(self,path):
self.path = os.path.abspath(path)
self.cacheBlock = CacheBlock(os.path.join(path, "index"))
def parse(path, urls=None):
"""
Reads the whole cache and store the collected data in a table
or find out if the given list of urls is in the cache. If yes it
return a list of the corresponding entries.
"""
# Verifying that the path end with / (What happen on windows?)
path = os.path.abspath(path)
cacheBlock = CacheBlock(os.path.join(path, "index"))
# Checking type
if self.cacheBlock.type != CacheBlock.INDEX:
raise Exception("Invalid Index File")
# Checking type
if cacheBlock.type != CacheBlock.INDEX:
raise Exception("Invalid Index File")
index = open(os.path.join(path, "index"), 'rb')
# Skipping Header
index.seek(92*4)
cache = []
# If no url is specified, parse the whole cache
if urls == None:
for key in range(cacheBlock.tableSize):
raw = struct.unpack('I', index.read(4))[0]
if raw != 0:
entry = CacheEntry(CacheAddress(raw, path=path))
# Checking if there is a next item in the bucket because
# such entries are not stored in the Index File so they will
# be ignored during iterative lookup in the hash table
while entry.next != 0:
cache.append(entry)
entry = CacheEntry(CacheAddress(entry.next, path=path))
cache.append(entry)
else:
# Find the entry for each url
for url in urls:
url = bytes(url,'utf8')
# Compute the key and seeking to it
hash = SuperFastHash.superFastHash(url)
key = hash & (cacheBlock.tableSize - 1)
def get_cache_entry(self,url):
url = bytes(url,'utf8')
# Compute the key and seeking to it
# print("url:%s"%url)
hash = SuperFastHash.superFastHash(url)
# print("superFastHash:%s"%hash)
key = hash & (self.cacheBlock.tableSize - 1)
with open(os.path.join(self.path, "index"), 'rb') as index:
index.seek(92*4 + key*4)
addr = struct.unpack('I', index.read(4))[0]
@ -126,29 +101,16 @@ def parse(path, urls=None):
# Follow the chained list in the bucket
else:
entry = CacheEntry(CacheAddress(addr, path=path))
entry = CacheEntry(CacheAddress(addr, path=self.path))
while entry.hash != hash and entry.next != 0:
entry = CacheEntry(CacheAddress(entry.next, path=path))
entry = CacheEntry(CacheAddress(entry.next, path=self.path))
if entry.hash == hash:
cache.append(entry)
index.close()
return cache
class ChromeCache(object):
def __init__(self,path):
self.cache = parse(path)
self.hash_cache = {}
# t = time.time()
for entry in self.cache:
key = entry.keyToStr()
if 'fanfiction.net' not in key:
continue
self.hash_cache[key] = entry
# print("======:%s"%(time.time()-t))
return entry
def get_cached_file(self,url):
if url in self.hash_cache:
entry = self.hash_cache[url]
entry = self.get_cache_entry(url)
if entry:
# entry = self.hash_cache[url]
for i in range(len(entry.data)):
if entry.data[i].type == CacheData.UNKNOWN:
# Extracting data into a file
@ -163,157 +125,3 @@ class ChromeCache(object):
data = brotli_decompress(data)
return data
return None
def exportToHTML(cache, outpath):
"""
Export the cache in html
"""
# Checking that the directory exists and is writable
if not os.path.exists(outpath):
os.makedirs(outpath)
outpath = os.path.abspath(outpath)
index = open(os.path.join(outpath,"index.html"), 'w')
index.write("<UL>")
for entry in cache:
# Adding a link in the index
if entry.keyLength > 100:
entry_name = entry.keyToStr()[:100] + "..."
else:
entry_name = entry.keyToStr()
index.write('<LI><a href="%08x.html">%s</a></LI>'%(entry.hash, entry_name))
# We handle the special case where entry_name ends with a slash
page_basename = entry_name.split('/')[-2] if entry_name.endswith('/') else entry_name.split('/')[-1]
# Creating the entry page
page = open(os.path.join(outpath,"%08x.html"%entry.hash), 'w')
page.write("""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
</head>
<body>""")
# Details of the entry
page.write("<b>Hash</b>: 0x%08x<br />"%entry.hash)
page.write("<b>Usage Counter</b>: %d<br />"%entry.usageCounter)
page.write("<b>Reuse Counter</b>: %d<br />"%entry.reuseCounter)
page.write("<b>Creation Time</b>: %s<br />"%entry.creationTime)
page.write("<b>Key</b>: %s<br>"%entry.keyToStr())
page.write("<b>State</b>: %s<br>"%CacheEntry.STATE[entry.state])
page.write("<hr>")
## entry.data normally 2 or 1
## 2 for headers and data, 1 for headers only.
if len(entry.data) == 0:
page.write("No data associated with this entry :-(")
for i in range(len(entry.data)):
if entry.data[i].type == CacheData.UNKNOWN:
# Extracting data into a file
name = hex(entry.hash) + "_" + str(i)
entry.data[i].save(os.path.join(outpath,name))
# print("content-encoding:%s"%entry.httpHeader.headers.get(b'content-encoding',''))
if entry.httpHeader != None and \
b'content-encoding' in entry.httpHeader.headers:
if entry.httpHeader.headers[b'content-encoding'] == b"gzip":
# XXX Highly inefficient !!!!!
try:
input = gzip.open(os.path.join(outpath, name), 'rb')
output = open(os.path.join(outpath, name + "u"), 'wb')
output.write(input.read())
input.close()
output.close()
page.write('<a href="%su">%s</a>'%(name, page_basename))
# print("gunzip'ed:%s"%name)
except IOError:
page.write("Something wrong happened while unzipping")
elif entry.httpHeader.headers[b'content-encoding'] == b"br":
try:
with open(os.path.join(outpath,name), 'rb') as input:
with open(os.path.join(outpath,name + "u"), 'wb') as output:
output.write(brotli.decompress(input.read()))
page.write('<a href="%su">%s</a>'%(name, page_basename))
# print("unbrotli'ed:%s"%name)
except IOError:
page.write("Something wrong happened while unzipping")
else:
page.write('<a href="%s">%s</a>'%(name ,
entry.keyToStr().split('/')[-1]))
# If it is a picture, display it
if entry.httpHeader != None:
if b'content-type' in entry.httpHeader.headers and\
b"image" in entry.httpHeader.headers[b'content-type']:
page.write('<br /><img src="%s">'%(name))
# HTTP Header
else:
page.write("<u>HTTP Header</u><br />")
for key, value in entry.data[i].headers.items():
page.write("<b>%s</b>: %s<br />"%(key, value))
page.write("<hr>")
page.write("</body></html>")
page.close()
index.write("</UL>")
index.close()
def exportTol2t(cache):
"""
Export the cache in CSV log2timeline compliant format
"""
output = []
output.append(["date",
"time",
"timezone",
"MACB",
"source",
"sourcetype",
"type",
"user",
"host",
"short",
"desc",
"version",
"filename",
"inode",
"notes",
"format",
"extra"])
for entry in cache:
date = entry.creationTime.date().strftime("%m/%d/%Y")
time = entry.creationTime.time()
# TODO get timezone
timezone = 0
short = entry.keyToStr()
descr = "Hash: 0x%08x" % entry.hash
descr += " Usage Counter: %d" % entry.usageCounter
if entry.httpHeader != None:
if 'content-type' in entry.httpHeader.headers:
descr += " MIME: %s" % entry.httpHeader.headers['content-type']
output.append([date,
time,
timezone,
"MACB",
"WEBCACHE",
"Chrome Cache",
"Cache Entry",
"-",
"-",
short,
descr,
"2",
"-",
"-",
"-",
"-",
"-",
])
csvOutput.csvOutput(output)

View file

@ -1,45 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the Chromagon Project nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Classical Output Module
"""
from __future__ import absolute_import
import sys
def classicalOutput(queryResult, separator="\t"):
"""
Display the data separated by the specified separator
"""
for line in queryResult:
for element in line:
sys.stdout.write(element)
sys.stdout.write(separator)
sys.stdout.write('\n')

View file

@ -1,49 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the Chromagon Project nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Column Output Module
"""
from __future__ import print_function
from six.moves import range
def columnOutput(queryResult, separator=' '):
"""
Display the data in columns
"""
if len(queryResult) == 0:
return
# Finding width of columns
size = [max([len(str(line[i])) for line in queryResult])
for i in range(len(queryResult[0]))]
# Generating format string (without last separator)
string = (''.join(["%%-%ds%s" % (x, separator) for x in size]))\
[:-len(separator)]
for line in queryResult:
print(string % tuple(line))

View file

@ -1,44 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the Chromagon Project nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
CSV Output Module
"""
from __future__ import absolute_import
import csv
import sys
def csvOutput(queryResult, separator=',', quote='"'):
"""
Display the data according to csv format
"""
csvWriter = csv.writer(sys.stdout, delimiter=separator, quotechar=quote,
quoting=csv.QUOTE_MINIMAL)
for line in queryResult:
csvWriter.writerow(line)

View file

@ -1,106 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the Chromagon Project nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Parse the Chrome Download Table History File
Its a SQLite3 table
"""
from __future__ import absolute_import
from __future__ import print_function
import datetime
import sqlite3
import sys
import six
def parse(filename, urlLength):
"""
filename: path to the history file
urlLength: maximum url length to display
"""
# Connecting to the DB
try:
history = sqlite3.connect(filename)
except sqlite3.Error as error:
print("==> Error while opening the history file !")
print("==> Details :", error.message)
sys.exit("==> Exiting...")
# Retrieving all useful data
result = history.execute("SELECT id, \
full_path, \
url, \
start_time, \
received_bytes, \
total_bytes, \
state \
FROM downloads;")
output = []
for line in result:
output.append(DownloadEntry(line, urlLength))
return output
class DownloadEntry(object):
"""Object to store download entries"""
COLUMN_STR = {'st': "startTime",
'p': "path",
'u': "url",
'rb': "receivedBytes",
'tb': "totalBytes",
'pt': "percentReceived",
's': "state"}
STATE_STR = ["In Progress",
"Complete",
"Cancelled",
"Removing",
"Interrupted"]
def __init__(self, item, urlLength):
"""Parse raw input"""
self.path = item[1]
if len(item[2]) > urlLength and urlLength > 0:
self.url = item[2][0:urlLength - 3] + "..."
else:
self.url = item[2]
self.startTime = datetime.datetime(1601, 1, 1) + \
datetime.timedelta(microseconds=\
item[3])
self.receivedBytes = item[4]
self.totalBytes = item[5]
self.state = DownloadEntry.STATE_STR[item[6]]
if int(item[5]) == 0:
self.percentReceived = "0%"
else:
self.percentReceived = "%d%%" % \
int(float(item[4])/float(item[5])*100)
def columnToStr(self, column):
"""Returns column content specified by argument"""
return six.text_type(self.__getattribute__(DownloadEntry.COLUMN_STR[column]))

View file

@ -1,178 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the Chromagon Project nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Parse the Chrome History File
Its a SQLite3 file
"""
from __future__ import absolute_import
from __future__ import print_function
import datetime
import re
import sqlite3
import sys
from . import cacheParse
import six
def parse(filename, start, end, checkCache, cachePath, urlLength):
"""
filename: path to the history file
start: beginning of the time window
end: end of the time window
checkCache: check if each page in the history is in the cache
cachePath: path to cache directory
"""
# Connecting to the DB
try:
history = sqlite3.connect(filename)
except sqlite3.Error as error:
print("==> Error while opening the history file !")
print("==> Details :", error.message)
sys.exit("==> Exiting...")
reference = datetime.datetime(1601, 1, 1)
# Retrieving all useful data
result = history.execute("SELECT visits.visit_time, \
visits.from_visit, \
visits.transition, \
urls.url, \
urls.title, \
urls.visit_count, \
urls.typed_count, \
urls.last_visit_time \
FROM urls,visits \
WHERE urls.id=visits.url\
AND visits.visit_time>%d\
AND visits.visit_time<%d\
ORDER BY visits.visit_time;"%\
(int((start-reference).total_seconds()*1000000),\
int((end-reference).total_seconds()*1000000)))\
# Parsing cache
cache = None
if checkCache:
cache = cacheParse.parse(cachePath)
output = []
for line in result:
output.append(HistoryEntry(line, cache, urlLength))
return output
class Transition():
"""Object representing transition between history pages"""
CORE_STRING = ["Link",\
"Typed",\
"Auto Bookmark",\
"Auto Subframe",\
"Manual Subframe",\
"Generated",\
"Start Page",\
"Form Submit",\
"Reload",\
"Keyword",\
"Keywork Generated"]
QUALIFIER_STRING = [(0x01000000, "Forward or Back Button"),
(0x02000000, "Address Bar"),
(0x04000000, "Home Page"),
(0x10000000, "Beginning of Chain"),
(0x20000000, "End of Chain"),
(0x40000000, "Client Redirection"),
(0x80000000, "Server Redirection")]
def __init__(self, transition):
"""
Parsing the transtion according to
content/common/page_transition_types.h
"""
self.core = transition & 0xFF
self.qualifier = transition & 0xFFFFFF00
def __str__(self):
string = Transition.CORE_STRING[self.core]
for mask, description in Transition.QUALIFIER_STRING:
if self.qualifier & mask != 0:
string += ", %s"%description
return string
class HistoryEntry(object):
"""Object to store database entries"""
COLUMN_STR = {'vt': "visitTime",
'fv': "fromVisit",
'tr': "transition",
'u': "url",
'tl': "title",
'vc': "visitCount",
'tc': "typedCount",
'lv': "lastVisitTime",
'cc': "inCache"}
def __init__(self, item, cache, urlLength):
"""Parse raw input"""
self.visitTime = datetime.datetime(1601, 1, 1) + \
datetime.timedelta(microseconds=\
item[0])
self.fromVisit = item[1]
self.transition = Transition(item[2])
if len(item[3]) > urlLength and urlLength > 0:
self.url = item[3][0:urlLength - 3] + "..."
else:
self.url = item[3]
self.title = item[4]
self.visitCount = item[5]
self.typedCount = item[6]
self.lastVisitTime = datetime.datetime(1601, 1, 1) + \
datetime.timedelta(microseconds=\
item[7])
# Searching in the cache if there is a copy of the page
# TODO use a hash table to search instead of heavy exhaustive search
self.inCache = False
if cache != None:
for item in cache:
if item.keyToStr() == self.url:
self.inCache = True
break
def toStr(self):
return [six.text_type(self.visitTime),\
six.text_type(self.fromVisit),\
six.text_type(self.transition),\
six.text_type(self.url),\
six.text_type(self.title),\
six.text_type(self.visitCount),\
six.text_type(self.typedCount),\
six.text_type(self.lastVisitTime)]
def columnToStr(self, column):
"""Returns column content specified by argument"""
return six.text_type(self.__getattribute__(HistoryEntry.COLUMN_STR[column]))

View file

@ -1,42 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the Chromagon Project nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
JSON Output Module
"""
from __future__ import absolute_import
from __future__ import print_function
import json
def jsonOutput(queryResult, separator=''):
"""
Display the data separated in JSON
"""
print(json.JSONEncoder().encode(queryResult))

View file

@ -1,97 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the Chromagon Project nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Parse the Chrome Visited Links
Reverse engineered from
chrome/common/visitedlink_common.*
chrome/browser/visitedlink/visitedlink_*
"""
from __future__ import absolute_import
import md5
import struct
import sys
from six.moves import range
VISITED_LINKS_MAGIC = 0x6b6e4c56;
def isVisited(path, urls):
"""
Return the list of urls given in parameter with a boolean information
about its presence in the given visited links file
"""
output = []
f = open(path, 'rb')
# Checking file type
magic = struct.unpack('I', f.read(4))[0]
if magic != VISITED_LINKS_MAGIC:
raise Exception("Invalid file")
# Reading header values
version = struct.unpack('I', f.read(4))[0]
length = struct.unpack('I', f.read(4))[0]
usedItems = struct.unpack('I', f.read(4))[0]
# Reading salt
salt = ""
for dummy in range(8):
salt += struct.unpack('c', f.read(1))[0]
for url in urls:
fingerprint = md5.new()
fingerprint.update(salt)
fingerprint.update(url)
digest = fingerprint.hexdigest()
# Inverting the result
# Why Chrome MD5 computation gives a reverse digest ?
fingerprint = 0
for i in range(0, 16, 2):
fingerprint += int(digest[i:i+2], 16) << (i/2)*8
key = fingerprint % length
# The hash table uses open addressing
f.seek(key*8 + 24, 0)
while True:
finger = struct.unpack('q', f.read(8))[0]
if finger == 0:
output.append((url, False))
break
if finger == fingerprint:
output.append((url, True))
break
if f.tell() >= length*8 + 24:
f.seek(24)
if f.tell() == key*8 + 24:
output.append((url, False))
break
f.close()
return output