diff --git a/fanficfare/chromagnon/cacheParse.py b/fanficfare/chromagnon/cacheParse.py
index 2857ccb5..0d35b495 100644
--- a/fanficfare/chromagnon/cacheParse.py
+++ b/fanficfare/chromagnon/cacheParse.py
@@ -41,33 +41,32 @@ import sys
import re
import time
-def do_cprofile(func):
- def profiled_func(*args, **kwargs):
- t=0
- try:
- t = time.time()
- result = func(*args, **kwargs)
- t = time.time() - t
- return result
- finally:
- print("time:%s"%t)
- return profiled_func
+# def do_cprofile(func):
+# def profiled_func(*args, **kwargs):
+# t=0
+# try:
+# t = time.time()
+# result = func(*args, **kwargs)
+# t = time.time() - t
+# return result
+# finally:
+# print("time:%s"%t)
+# return profiled_func
try:
from brotli import decompress
- @do_cprofile
+ # @do_cprofile
def brotli_decompress(inbuf):
return decompress(inbuf)
except:
# Calibre doesn't include brotli, so use packaged brotlipython
# which is waaaay slower, but pure python.
from brotlipython import brotlidec
- @do_cprofile
+ # @do_cprofile
def brotli_decompress(inbuf):
# wants the output, too, but returns it
return brotlidec(inbuf,[])
-from . import csvOutput
from . import SuperFastHash
from .cacheAddress import CacheAddress
@@ -76,47 +75,23 @@ from .cacheData import CacheData
from .cacheEntry import CacheEntry
from six.moves import range
+class ChromeCache(object):
+ def __init__(self,path):
+ self.path = os.path.abspath(path)
+ self.cacheBlock = CacheBlock(os.path.join(path, "index"))
-def parse(path, urls=None):
- """
- Reads the whole cache and store the collected data in a table
- or find out if the given list of urls is in the cache. If yes it
- return a list of the corresponding entries.
- """
- # Verifying that the path end with / (What happen on windows?)
- path = os.path.abspath(path)
- cacheBlock = CacheBlock(os.path.join(path, "index"))
+ # Checking type
+ if self.cacheBlock.type != CacheBlock.INDEX:
+ raise Exception("Invalid Index File")
- # Checking type
- if cacheBlock.type != CacheBlock.INDEX:
- raise Exception("Invalid Index File")
-
- index = open(os.path.join(path, "index"), 'rb')
-
- # Skipping Header
- index.seek(92*4)
-
- cache = []
- # If no url is specified, parse the whole cache
- if urls == None:
- for key in range(cacheBlock.tableSize):
- raw = struct.unpack('I', index.read(4))[0]
- if raw != 0:
- entry = CacheEntry(CacheAddress(raw, path=path))
- # Checking if there is a next item in the bucket because
- # such entries are not stored in the Index File so they will
- # be ignored during iterative lookup in the hash table
- while entry.next != 0:
- cache.append(entry)
- entry = CacheEntry(CacheAddress(entry.next, path=path))
- cache.append(entry)
- else:
- # Find the entry for each url
- for url in urls:
- url = bytes(url,'utf8')
- # Compute the key and seeking to it
- hash = SuperFastHash.superFastHash(url)
- key = hash & (cacheBlock.tableSize - 1)
+ def get_cache_entry(self,url):
+ url = bytes(url,'utf8')
+ # Compute the key and seeking to it
+ # print("url:%s"%url)
+ hash = SuperFastHash.superFastHash(url)
+ # print("superFastHash:%s"%hash)
+ key = hash & (self.cacheBlock.tableSize - 1)
+ with open(os.path.join(self.path, "index"), 'rb') as index:
index.seek(92*4 + key*4)
addr = struct.unpack('I', index.read(4))[0]
@@ -126,29 +101,16 @@ def parse(path, urls=None):
# Follow the chained list in the bucket
else:
- entry = CacheEntry(CacheAddress(addr, path=path))
+ entry = CacheEntry(CacheAddress(addr, path=self.path))
while entry.hash != hash and entry.next != 0:
- entry = CacheEntry(CacheAddress(entry.next, path=path))
+ entry = CacheEntry(CacheAddress(entry.next, path=self.path))
if entry.hash == hash:
- cache.append(entry)
- index.close()
- return cache
-
-class ChromeCache(object):
- def __init__(self,path):
- self.cache = parse(path)
- self.hash_cache = {}
- # t = time.time()
- for entry in self.cache:
- key = entry.keyToStr()
- if 'fanfiction.net' not in key:
- continue
- self.hash_cache[key] = entry
- # print("======:%s"%(time.time()-t))
+ return entry
def get_cached_file(self,url):
- if url in self.hash_cache:
- entry = self.hash_cache[url]
+ entry = self.get_cache_entry(url)
+ if entry:
+ # entry = self.hash_cache[url]
for i in range(len(entry.data)):
if entry.data[i].type == CacheData.UNKNOWN:
# Extracting data into a file
@@ -163,157 +125,3 @@ class ChromeCache(object):
data = brotli_decompress(data)
return data
return None
-
-def exportToHTML(cache, outpath):
- """
- Export the cache in html
- """
-
- # Checking that the directory exists and is writable
- if not os.path.exists(outpath):
- os.makedirs(outpath)
- outpath = os.path.abspath(outpath)
-
- index = open(os.path.join(outpath,"index.html"), 'w')
- index.write("
")
-
- for entry in cache:
- # Adding a link in the index
- if entry.keyLength > 100:
- entry_name = entry.keyToStr()[:100] + "..."
- else:
- entry_name = entry.keyToStr()
- index.write('- %s
'%(entry.hash, entry_name))
- # We handle the special case where entry_name ends with a slash
- page_basename = entry_name.split('/')[-2] if entry_name.endswith('/') else entry_name.split('/')[-1]
-
- # Creating the entry page
- page = open(os.path.join(outpath,"%08x.html"%entry.hash), 'w')
- page.write("""
-
-
-
-
- """)
-
- # Details of the entry
- page.write("Hash: 0x%08x
"%entry.hash)
- page.write("Usage Counter: %d
"%entry.usageCounter)
- page.write("Reuse Counter: %d
"%entry.reuseCounter)
- page.write("Creation Time: %s
"%entry.creationTime)
- page.write("Key: %s
"%entry.keyToStr())
- page.write("State: %s
"%CacheEntry.STATE[entry.state])
-
- page.write("
")
- ## entry.data normally 2 or 1
- ## 2 for headers and data, 1 for headers only.
- if len(entry.data) == 0:
- page.write("No data associated with this entry :-(")
- for i in range(len(entry.data)):
- if entry.data[i].type == CacheData.UNKNOWN:
- # Extracting data into a file
- name = hex(entry.hash) + "_" + str(i)
- entry.data[i].save(os.path.join(outpath,name))
-
- # print("content-encoding:%s"%entry.httpHeader.headers.get(b'content-encoding',''))
- if entry.httpHeader != None and \
- b'content-encoding' in entry.httpHeader.headers:
- if entry.httpHeader.headers[b'content-encoding'] == b"gzip":
- # XXX Highly inefficient !!!!!
- try:
- input = gzip.open(os.path.join(outpath, name), 'rb')
- output = open(os.path.join(outpath, name + "u"), 'wb')
- output.write(input.read())
- input.close()
- output.close()
- page.write('%s'%(name, page_basename))
- # print("gunzip'ed:%s"%name)
- except IOError:
- page.write("Something wrong happened while unzipping")
- elif entry.httpHeader.headers[b'content-encoding'] == b"br":
- try:
- with open(os.path.join(outpath,name), 'rb') as input:
- with open(os.path.join(outpath,name + "u"), 'wb') as output:
- output.write(brotli.decompress(input.read()))
- page.write('%s'%(name, page_basename))
- # print("unbrotli'ed:%s"%name)
- except IOError:
- page.write("Something wrong happened while unzipping")
- else:
- page.write('%s'%(name ,
- entry.keyToStr().split('/')[-1]))
-
-
- # If it is a picture, display it
- if entry.httpHeader != None:
- if b'content-type' in entry.httpHeader.headers and\
- b"image" in entry.httpHeader.headers[b'content-type']:
- page.write('
'%(name))
- # HTTP Header
- else:
- page.write("HTTP Header
")
- for key, value in entry.data[i].headers.items():
- page.write("%s: %s
"%(key, value))
- page.write("
")
- page.write("")
- page.close()
-
- index.write("
")
- index.close()
-
-def exportTol2t(cache):
- """
- Export the cache in CSV log2timeline compliant format
- """
-
- output = []
- output.append(["date",
- "time",
- "timezone",
- "MACB",
- "source",
- "sourcetype",
- "type",
- "user",
- "host",
- "short",
- "desc",
- "version",
- "filename",
- "inode",
- "notes",
- "format",
- "extra"])
-
- for entry in cache:
- date = entry.creationTime.date().strftime("%m/%d/%Y")
- time = entry.creationTime.time()
- # TODO get timezone
- timezone = 0
- short = entry.keyToStr()
- descr = "Hash: 0x%08x" % entry.hash
- descr += " Usage Counter: %d" % entry.usageCounter
- if entry.httpHeader != None:
- if 'content-type' in entry.httpHeader.headers:
- descr += " MIME: %s" % entry.httpHeader.headers['content-type']
-
- output.append([date,
- time,
- timezone,
- "MACB",
- "WEBCACHE",
- "Chrome Cache",
- "Cache Entry",
- "-",
- "-",
- short,
- descr,
- "2",
- "-",
- "-",
- "-",
- "-",
- "-",
- ])
-
- csvOutput.csvOutput(output)
diff --git a/fanficfare/chromagnon/classicalOutput.py b/fanficfare/chromagnon/classicalOutput.py
deleted file mode 100644
index c8310888..00000000
--- a/fanficfare/chromagnon/classicalOutput.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Chromagon Project nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Classical Output Module
-"""
-
-from __future__ import absolute_import
-import sys
-
-def classicalOutput(queryResult, separator="\t"):
- """
- Display the data separated by the specified separator
- """
-
- for line in queryResult:
- for element in line:
- sys.stdout.write(element)
- sys.stdout.write(separator)
- sys.stdout.write('\n')
diff --git a/fanficfare/chromagnon/columnOutput.py b/fanficfare/chromagnon/columnOutput.py
deleted file mode 100644
index 38d2f1e2..00000000
--- a/fanficfare/chromagnon/columnOutput.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Chromagon Project nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Column Output Module
-"""
-
-from __future__ import print_function
-from six.moves import range
-def columnOutput(queryResult, separator=' '):
- """
- Display the data in columns
- """
- if len(queryResult) == 0:
- return
-
- # Finding width of columns
- size = [max([len(str(line[i])) for line in queryResult])
- for i in range(len(queryResult[0]))]
- # Generating format string (without last separator)
- string = (''.join(["%%-%ds%s" % (x, separator) for x in size]))\
- [:-len(separator)]
- for line in queryResult:
- print(string % tuple(line))
diff --git a/fanficfare/chromagnon/csvOutput.py b/fanficfare/chromagnon/csvOutput.py
deleted file mode 100644
index ecc622de..00000000
--- a/fanficfare/chromagnon/csvOutput.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Chromagon Project nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-CSV Output Module
-"""
-
-from __future__ import absolute_import
-import csv
-import sys
-
-def csvOutput(queryResult, separator=',', quote='"'):
- """
- Display the data according to csv format
- """
- csvWriter = csv.writer(sys.stdout, delimiter=separator, quotechar=quote,
- quoting=csv.QUOTE_MINIMAL)
- for line in queryResult:
- csvWriter.writerow(line)
diff --git a/fanficfare/chromagnon/downloadParse.py b/fanficfare/chromagnon/downloadParse.py
deleted file mode 100644
index a84859b9..00000000
--- a/fanficfare/chromagnon/downloadParse.py
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Chromagon Project nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Parse the Chrome Download Table History File
-Its a SQLite3 table
-"""
-
-from __future__ import absolute_import
-from __future__ import print_function
-import datetime
-import sqlite3
-import sys
-import six
-
-def parse(filename, urlLength):
- """
- filename: path to the history file
- urlLength: maximum url length to display
- """
-
- # Connecting to the DB
- try:
- history = sqlite3.connect(filename)
- except sqlite3.Error as error:
- print("==> Error while opening the history file !")
- print("==> Details :", error.message)
- sys.exit("==> Exiting...")
-
- # Retrieving all useful data
- result = history.execute("SELECT id, \
- full_path, \
- url, \
- start_time, \
- received_bytes, \
- total_bytes, \
- state \
- FROM downloads;")
-
- output = []
- for line in result:
- output.append(DownloadEntry(line, urlLength))
- return output
-
-class DownloadEntry(object):
- """Object to store download entries"""
- COLUMN_STR = {'st': "startTime",
- 'p': "path",
- 'u': "url",
- 'rb': "receivedBytes",
- 'tb': "totalBytes",
- 'pt': "percentReceived",
- 's': "state"}
- STATE_STR = ["In Progress",
- "Complete",
- "Cancelled",
- "Removing",
- "Interrupted"]
-
- def __init__(self, item, urlLength):
- """Parse raw input"""
- self.path = item[1]
- if len(item[2]) > urlLength and urlLength > 0:
- self.url = item[2][0:urlLength - 3] + "..."
- else:
- self.url = item[2]
- self.startTime = datetime.datetime(1601, 1, 1) + \
- datetime.timedelta(microseconds=\
- item[3])
- self.receivedBytes = item[4]
- self.totalBytes = item[5]
- self.state = DownloadEntry.STATE_STR[item[6]]
- if int(item[5]) == 0:
- self.percentReceived = "0%"
- else:
- self.percentReceived = "%d%%" % \
- int(float(item[4])/float(item[5])*100)
-
- def columnToStr(self, column):
- """Returns column content specified by argument"""
- return six.text_type(self.__getattribute__(DownloadEntry.COLUMN_STR[column]))
diff --git a/fanficfare/chromagnon/historyParse.py b/fanficfare/chromagnon/historyParse.py
deleted file mode 100644
index 9f331452..00000000
--- a/fanficfare/chromagnon/historyParse.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Chromagon Project nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Parse the Chrome History File
-Its a SQLite3 file
-"""
-
-from __future__ import absolute_import
-from __future__ import print_function
-import datetime
-import re
-import sqlite3
-import sys
-
-from . import cacheParse
-import six
-
-def parse(filename, start, end, checkCache, cachePath, urlLength):
- """
- filename: path to the history file
- start: beginning of the time window
- end: end of the time window
- checkCache: check if each page in the history is in the cache
- cachePath: path to cache directory
- """
-
- # Connecting to the DB
- try:
- history = sqlite3.connect(filename)
- except sqlite3.Error as error:
- print("==> Error while opening the history file !")
- print("==> Details :", error.message)
- sys.exit("==> Exiting...")
-
- reference = datetime.datetime(1601, 1, 1)
-
- # Retrieving all useful data
- result = history.execute("SELECT visits.visit_time, \
- visits.from_visit, \
- visits.transition, \
- urls.url, \
- urls.title, \
- urls.visit_count, \
- urls.typed_count, \
- urls.last_visit_time \
- FROM urls,visits \
- WHERE urls.id=visits.url\
- AND visits.visit_time>%d\
- AND visits.visit_time<%d\
- ORDER BY visits.visit_time;"%\
- (int((start-reference).total_seconds()*1000000),\
- int((end-reference).total_seconds()*1000000)))\
-
- # Parsing cache
- cache = None
- if checkCache:
- cache = cacheParse.parse(cachePath)
-
- output = []
- for line in result:
- output.append(HistoryEntry(line, cache, urlLength))
- return output
-
-class Transition():
- """Object representing transition between history pages"""
-
- CORE_STRING = ["Link",\
- "Typed",\
- "Auto Bookmark",\
- "Auto Subframe",\
- "Manual Subframe",\
- "Generated",\
- "Start Page",\
- "Form Submit",\
- "Reload",\
- "Keyword",\
- "Keywork Generated"]
- QUALIFIER_STRING = [(0x01000000, "Forward or Back Button"),
- (0x02000000, "Address Bar"),
- (0x04000000, "Home Page"),
- (0x10000000, "Beginning of Chain"),
- (0x20000000, "End of Chain"),
- (0x40000000, "Client Redirection"),
- (0x80000000, "Server Redirection")]
-
- def __init__(self, transition):
- """
- Parsing the transtion according to
- content/common/page_transition_types.h
- """
- self.core = transition & 0xFF
- self.qualifier = transition & 0xFFFFFF00
-
- def __str__(self):
- string = Transition.CORE_STRING[self.core]
- for mask, description in Transition.QUALIFIER_STRING:
- if self.qualifier & mask != 0:
- string += ", %s"%description
- return string
-
-class HistoryEntry(object):
- """Object to store database entries"""
- COLUMN_STR = {'vt': "visitTime",
- 'fv': "fromVisit",
- 'tr': "transition",
- 'u': "url",
- 'tl': "title",
- 'vc': "visitCount",
- 'tc': "typedCount",
- 'lv': "lastVisitTime",
- 'cc': "inCache"}
-
- def __init__(self, item, cache, urlLength):
- """Parse raw input"""
- self.visitTime = datetime.datetime(1601, 1, 1) + \
- datetime.timedelta(microseconds=\
- item[0])
- self.fromVisit = item[1]
- self.transition = Transition(item[2])
- if len(item[3]) > urlLength and urlLength > 0:
- self.url = item[3][0:urlLength - 3] + "..."
- else:
- self.url = item[3]
- self.title = item[4]
- self.visitCount = item[5]
- self.typedCount = item[6]
- self.lastVisitTime = datetime.datetime(1601, 1, 1) + \
- datetime.timedelta(microseconds=\
- item[7])
-
- # Searching in the cache if there is a copy of the page
- # TODO use a hash table to search instead of heavy exhaustive search
- self.inCache = False
- if cache != None:
- for item in cache:
- if item.keyToStr() == self.url:
- self.inCache = True
- break
-
- def toStr(self):
- return [six.text_type(self.visitTime),\
- six.text_type(self.fromVisit),\
- six.text_type(self.transition),\
- six.text_type(self.url),\
- six.text_type(self.title),\
- six.text_type(self.visitCount),\
- six.text_type(self.typedCount),\
- six.text_type(self.lastVisitTime)]
-
- def columnToStr(self, column):
- """Returns column content specified by argument"""
- return six.text_type(self.__getattribute__(HistoryEntry.COLUMN_STR[column]))
diff --git a/fanficfare/chromagnon/jsonOutput.py b/fanficfare/chromagnon/jsonOutput.py
deleted file mode 100644
index 95e0531d..00000000
--- a/fanficfare/chromagnon/jsonOutput.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Chromagon Project nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-JSON Output Module
-"""
-
-from __future__ import absolute_import
-from __future__ import print_function
-import json
-
-def jsonOutput(queryResult, separator=''):
- """
- Display the data separated in JSON
- """
-
- print(json.JSONEncoder().encode(queryResult))
diff --git a/fanficfare/chromagnon/visitedLinks.py b/fanficfare/chromagnon/visitedLinks.py
deleted file mode 100644
index 6fd894b0..00000000
--- a/fanficfare/chromagnon/visitedLinks.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the Chromagon Project nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Parse the Chrome Visited Links
-Reverse engineered from
- chrome/common/visitedlink_common.*
- chrome/browser/visitedlink/visitedlink_*
-"""
-
-from __future__ import absolute_import
-import md5
-import struct
-import sys
-from six.moves import range
-
-VISITED_LINKS_MAGIC = 0x6b6e4c56;
-
-def isVisited(path, urls):
- """
- Return the list of urls given in parameter with a boolean information
- about its presence in the given visited links file
- """
- output = []
-
- f = open(path, 'rb')
-
- # Checking file type
- magic = struct.unpack('I', f.read(4))[0]
- if magic != VISITED_LINKS_MAGIC:
- raise Exception("Invalid file")
-
- # Reading header values
- version = struct.unpack('I', f.read(4))[0]
- length = struct.unpack('I', f.read(4))[0]
- usedItems = struct.unpack('I', f.read(4))[0]
-
- # Reading salt
- salt = ""
- for dummy in range(8):
- salt += struct.unpack('c', f.read(1))[0]
-
- for url in urls:
- fingerprint = md5.new()
- fingerprint.update(salt)
- fingerprint.update(url)
- digest = fingerprint.hexdigest()
-
- # Inverting the result
- # Why Chrome MD5 computation gives a reverse digest ?
- fingerprint = 0
- for i in range(0, 16, 2):
- fingerprint += int(digest[i:i+2], 16) << (i/2)*8
- key = fingerprint % length
-
- # The hash table uses open addressing
- f.seek(key*8 + 24, 0)
- while True:
- finger = struct.unpack('q', f.read(8))[0]
- if finger == 0:
- output.append((url, False))
- break
- if finger == fingerprint:
- output.append((url, True))
- break
- if f.tell() >= length*8 + 24:
- f.seek(24)
- if f.tell() == key*8 + 24:
- output.append((url, False))
- break
- f.close()
- return output