Clean up chromagnon code to only what we're using--rest probably wasn't updated completely anyway.

2026-05-09 05:21:13 +02:00 · 2021-01-21 17:57:28 -06:00 · 2021-01-21 17:57:28 -06:00 · 9819e0b214
commit 9819e0b214
parent 17cd3f3d04
8 changed files with 34 additions and 787 deletions
--- a/fanficfare/chromagnon/cacheParse.py
+++ b/fanficfare/chromagnon/cacheParse.py
@ -41,33 +41,32 @@ import sys
 import re
 import time

-def do_cprofile(func):
-    def profiled_func(*args, **kwargs):
-        t=0
-        try:
-            t = time.time()
-            result = func(*args, **kwargs)
-            t = time.time() - t
-            return result
-        finally:
-            print("time:%s"%t)
-    return profiled_func
+# def do_cprofile(func):
+#     def profiled_func(*args, **kwargs):
+#         t=0
+#         try:
+#             t = time.time()
+#             result = func(*args, **kwargs)
+#             t = time.time() - t
+#             return result
+#         finally:
+#             print("time:%s"%t)
+#     return profiled_func

 try:
    from brotli import decompress
-    @do_cprofile
+    # @do_cprofile
    def brotli_decompress(inbuf):
        return decompress(inbuf)
 except:
    # Calibre doesn't include brotli, so use packaged brotlipython
    # which is waaaay slower, but pure python.
    from brotlipython import brotlidec
-    @do_cprofile
+    # @do_cprofile
    def brotli_decompress(inbuf):
        # wants the output, too, but returns it
        return brotlidec(inbuf,[])

-from . import csvOutput
 from . import SuperFastHash

 from .cacheAddress import CacheAddress
@ -76,47 +75,23 @@ from .cacheData import CacheData
 from .cacheEntry import CacheEntry
 from six.moves import range

+class ChromeCache(object):
+    def __init__(self,path):
+        self.path = os.path.abspath(path)
+        self.cacheBlock = CacheBlock(os.path.join(path, "index"))

-def parse(path, urls=None):
-    """
-    Reads the whole cache and store the collected data in a table
-    or find out if the given list of urls is in the cache. If yes it
-    return a list of the corresponding entries.
-    """
-    # Verifying that the path end with / (What happen on windows?)
-    path = os.path.abspath(path)
-    cacheBlock = CacheBlock(os.path.join(path, "index"))
+        # Checking type
+        if self.cacheBlock.type != CacheBlock.INDEX:
+            raise Exception("Invalid Index File")

-    # Checking type
-    if cacheBlock.type != CacheBlock.INDEX:
-        raise Exception("Invalid Index File")
-
-    index = open(os.path.join(path, "index"), 'rb')
-
-    # Skipping Header
-    index.seek(92*4)
-
-    cache = []
-    # If no url is specified, parse the whole cache
-    if urls == None:
-        for key in range(cacheBlock.tableSize):
-            raw = struct.unpack('I', index.read(4))[0]
-            if raw != 0:
-                entry = CacheEntry(CacheAddress(raw, path=path))
-                # Checking if there is a next item in the bucket because
-                # such entries are not stored in the Index File so they will
-                # be ignored during iterative lookup in the hash table
-                while entry.next != 0:
-                    cache.append(entry)
-                    entry = CacheEntry(CacheAddress(entry.next, path=path))
-                cache.append(entry)
-    else:
-        # Find the entry for each url
-        for url in urls:
-            url = bytes(url,'utf8')
-            # Compute the key and seeking to it
-            hash = SuperFastHash.superFastHash(url)
-            key = hash & (cacheBlock.tableSize - 1)
+    def get_cache_entry(self,url):
+        url = bytes(url,'utf8')
+        # Compute the key and seeking to it
+        # print("url:%s"%url)
+        hash = SuperFastHash.superFastHash(url)
+        # print("superFastHash:%s"%hash)
+        key = hash & (self.cacheBlock.tableSize - 1)
+        with open(os.path.join(self.path, "index"), 'rb') as index:
            index.seek(92*4 + key*4)

            addr = struct.unpack('I', index.read(4))[0]
@ -126,29 +101,16 @@ def parse(path, urls=None):

            # Follow the chained list in the bucket
            else:
-                entry = CacheEntry(CacheAddress(addr, path=path))
+                entry = CacheEntry(CacheAddress(addr, path=self.path))
                while entry.hash != hash and entry.next != 0:
-                    entry = CacheEntry(CacheAddress(entry.next, path=path))
+                    entry = CacheEntry(CacheAddress(entry.next, path=self.path))
                if entry.hash == hash:
-                    cache.append(entry)
-    index.close()
-    return cache
-
-class ChromeCache(object):
-    def __init__(self,path):
-        self.cache = parse(path)
-        self.hash_cache = {}
-        # t = time.time()
-        for entry in self.cache:
-            key = entry.keyToStr()
-            if 'fanfiction.net' not in key:
-                continue
-            self.hash_cache[key] = entry
-        # print("======:%s"%(time.time()-t))
+                    return entry

    def get_cached_file(self,url):
-        if url in self.hash_cache:
-            entry = self.hash_cache[url]
+        entry = self.get_cache_entry(url)
+        if entry:
+            # entry = self.hash_cache[url]
            for i in range(len(entry.data)):
                if entry.data[i].type == CacheData.UNKNOWN:
                    # Extracting data into a file
@ -163,157 +125,3 @@ class ChromeCache(object):
                            data = brotli_decompress(data)
                    return data
        return None
-
-def exportToHTML(cache, outpath):
-    """
-    Export the cache in html
-    """
-
-    # Checking that the directory exists and is writable
-    if not os.path.exists(outpath):
-        os.makedirs(outpath)
-    outpath = os.path.abspath(outpath)
-
-    index = open(os.path.join(outpath,"index.html"), 'w')
-    index.write("<UL>")
-
-    for entry in cache:
-        # Adding a link in the index
-        if entry.keyLength > 100:
-            entry_name = entry.keyToStr()[:100] + "..."
-        else:
-            entry_name = entry.keyToStr()
-        index.write('<LI><a href="%08x.html">%s</a></LI>'%(entry.hash, entry_name))
-        # We handle the special case where entry_name ends with a slash
-        page_basename = entry_name.split('/')[-2] if entry_name.endswith('/') else entry_name.split('/')[-1]
-
-        # Creating the entry page
-        page = open(os.path.join(outpath,"%08x.html"%entry.hash), 'w')
-        page.write("""<!DOCTYPE html>
-                      <html lang="en">
-                      <head>
-                      <meta charset="utf-8">
-                      </head>
-                      <body>""")
-
-        # Details of the entry
-        page.write("<b>Hash</b>: 0x%08x<br />"%entry.hash)
-        page.write("<b>Usage Counter</b>: %d<br />"%entry.usageCounter)
-        page.write("<b>Reuse Counter</b>: %d<br />"%entry.reuseCounter)
-        page.write("<b>Creation Time</b>: %s<br />"%entry.creationTime)
-        page.write("<b>Key</b>: %s<br>"%entry.keyToStr())
-        page.write("<b>State</b>: %s<br>"%CacheEntry.STATE[entry.state])
-
-        page.write("<hr>")
-        ## entry.data normally 2 or 1
-        ## 2 for headers and data, 1 for headers only.
-        if len(entry.data) == 0:
-            page.write("No data associated with this entry :-(")
-        for i in range(len(entry.data)):
-            if entry.data[i].type == CacheData.UNKNOWN:
-                # Extracting data into a file
-                name = hex(entry.hash) + "_" + str(i)
-                entry.data[i].save(os.path.join(outpath,name))
-
-                # print("content-encoding:%s"%entry.httpHeader.headers.get(b'content-encoding',''))
-                if entry.httpHeader != None and \
-                   b'content-encoding' in entry.httpHeader.headers:
-                    if entry.httpHeader.headers[b'content-encoding'] == b"gzip":
-                        # XXX Highly inefficient !!!!!
-                        try:
-                            input = gzip.open(os.path.join(outpath, name), 'rb')
-                            output = open(os.path.join(outpath, name + "u"), 'wb')
-                            output.write(input.read())
-                            input.close()
-                            output.close()
-                            page.write('<a href="%su">%s</a>'%(name, page_basename))
-                            # print("gunzip'ed:%s"%name)
-                        except IOError:
-                            page.write("Something wrong happened while unzipping")
-                    elif entry.httpHeader.headers[b'content-encoding'] == b"br":
-                        try:
-                            with open(os.path.join(outpath,name), 'rb') as input:
-                                with open(os.path.join(outpath,name + "u"), 'wb') as output:
-                                    output.write(brotli.decompress(input.read()))
-                            page.write('<a href="%su">%s</a>'%(name, page_basename))
-                            # print("unbrotli'ed:%s"%name)
-                        except IOError:
-                            page.write("Something wrong happened while unzipping")
-                else:
-                    page.write('<a href="%s">%s</a>'%(name ,
-                               entry.keyToStr().split('/')[-1]))
-
-
-                # If it is a picture, display it
-                if entry.httpHeader != None:
-                    if b'content-type' in entry.httpHeader.headers and\
-                       b"image" in entry.httpHeader.headers[b'content-type']:
-                        page.write('<br /><img src="%s">'%(name))
-            # HTTP Header
-            else:
-                page.write("<u>HTTP Header</u><br />")
-                for key, value in entry.data[i].headers.items():
-                    page.write("<b>%s</b>: %s<br />"%(key, value))
-            page.write("<hr>")
-        page.write("</body></html>")
-        page.close()
-
-    index.write("</UL>")
-    index.close()
-
-def exportTol2t(cache):
-    """
-    Export the cache in CSV log2timeline compliant format
-    """
-
-    output = []
-    output.append(["date",
-                   "time",
-                   "timezone",
-                   "MACB",
-                   "source",
-                   "sourcetype",
-                   "type",
-                   "user",
-                   "host",
-                   "short",
-                   "desc",
-                   "version",
-                   "filename",
-                   "inode",
-                   "notes",
-                   "format",
-                   "extra"])
-
-    for entry in cache:
-        date = entry.creationTime.date().strftime("%m/%d/%Y")
-        time = entry.creationTime.time()
-        # TODO get timezone
-        timezone = 0
-        short = entry.keyToStr()
-        descr = "Hash: 0x%08x" % entry.hash
-        descr += " Usage Counter: %d" % entry.usageCounter
-        if entry.httpHeader != None:
-            if 'content-type' in entry.httpHeader.headers:
-                descr += " MIME: %s" % entry.httpHeader.headers['content-type']
-
-        output.append([date,
-                       time,
-                       timezone,
-                       "MACB",
-                       "WEBCACHE",
-                       "Chrome Cache",
-                       "Cache Entry",
-                       "-",
-                       "-",
-                       short,
-                       descr,
-                       "2",
-                       "-",
-                       "-",
-                       "-",
-                       "-",
-                       "-",
-                       ])
-
-    csvOutput.csvOutput(output)
--- a/fanficfare/chromagnon/classicalOutput.py
+++ b/fanficfare/chromagnon/classicalOutput.py
@ -1,45 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the Chromagon Project nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Classical Output Module
-"""
-
-from __future__ import absolute_import
-import sys
-
-def classicalOutput(queryResult, separator="\t"):
-    """
-    Display the data separated by the specified separator
-    """
-
-    for line in queryResult:
-        for element in line:
-            sys.stdout.write(element)
-            sys.stdout.write(separator)
-        sys.stdout.write('\n')
--- a/fanficfare/chromagnon/columnOutput.py
+++ b/fanficfare/chromagnon/columnOutput.py
@ -1,49 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the Chromagon Project nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Column Output Module
-"""
-
-from __future__ import print_function
-from six.moves import range
-def columnOutput(queryResult, separator=' '):
-    """
-    Display the data in columns
-    """
-    if len(queryResult) == 0:
-        return
-
-    # Finding width of columns
-    size = [max([len(str(line[i])) for line in queryResult])
-            for i in range(len(queryResult[0]))]
-    # Generating format string (without last separator)
-    string = (''.join(["%%-%ds%s" % (x, separator) for x in size]))\
-                 [:-len(separator)]
-    for line in queryResult:
-        print(string % tuple(line))
--- a/fanficfare/chromagnon/csvOutput.py
+++ b/fanficfare/chromagnon/csvOutput.py
@ -1,44 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the Chromagon Project nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-CSV Output Module
-"""
-
-from __future__ import absolute_import
-import csv
-import sys
-
-def csvOutput(queryResult, separator=',', quote='"'):
-    """
-    Display the data according to csv format
-    """
-    csvWriter = csv.writer(sys.stdout, delimiter=separator, quotechar=quote,
-                           quoting=csv.QUOTE_MINIMAL)
-    for line in queryResult:
-        csvWriter.writerow(line)
--- a/fanficfare/chromagnon/downloadParse.py
+++ b/fanficfare/chromagnon/downloadParse.py
@ -1,106 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the Chromagon Project nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Parse the Chrome Download Table History File
-Its a SQLite3 table
-"""
-
-from __future__ import absolute_import
-from __future__ import print_function
-import datetime
-import sqlite3
-import sys
-import six
-
-def parse(filename, urlLength):
-    """
-    filename: path to the history file
-    urlLength: maximum url length to display
-    """
-
-    # Connecting to the DB
-    try:
-        history = sqlite3.connect(filename)
-    except sqlite3.Error as error:
-        print("==> Error while opening the history file !")
-        print("==> Details :", error.message)
-        sys.exit("==> Exiting...")
-
-    # Retrieving all useful data
-    result = history.execute("SELECT id, \
-                              full_path, \
-                              url, \
-                              start_time, \
-                              received_bytes, \
-                              total_bytes, \
-                              state \
-                              FROM downloads;")
-
-    output = []
-    for line in result:
-        output.append(DownloadEntry(line, urlLength))
-    return output
-
-class DownloadEntry(object):
-    """Object to store download entries"""
-    COLUMN_STR = {'st': "startTime",
-                  'p': "path",
-                  'u': "url",
-                  'rb': "receivedBytes",
-                  'tb': "totalBytes",
-                  'pt': "percentReceived",
-                  's': "state"}
-    STATE_STR = ["In Progress",
-                 "Complete",
-                 "Cancelled",
-                 "Removing",
-                 "Interrupted"]
-
-    def __init__(self, item, urlLength):
-        """Parse raw input"""
-        self.path = item[1]
-        if len(item[2]) > urlLength and urlLength > 0:
-            self.url = item[2][0:urlLength - 3] + "..."
-        else:
-            self.url = item[2]
-        self.startTime = datetime.datetime(1601, 1, 1) + \
-                         datetime.timedelta(microseconds=\
-                         item[3])
-        self.receivedBytes = item[4]
-        self.totalBytes = item[5]
-        self.state = DownloadEntry.STATE_STR[item[6]]
-        if int(item[5]) == 0:
-            self.percentReceived = "0%"
-        else:
-            self.percentReceived = "%d%%" % \
-                                   int(float(item[4])/float(item[5])*100)
-
-    def columnToStr(self, column):
-        """Returns column content specified by argument"""
-        return six.text_type(self.__getattribute__(DownloadEntry.COLUMN_STR[column]))
--- a/fanficfare/chromagnon/historyParse.py
+++ b/fanficfare/chromagnon/historyParse.py
@ -1,178 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the Chromagon Project nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Parse the Chrome History File
-Its a SQLite3 file
-"""
-
-from __future__ import absolute_import
-from __future__ import print_function
-import datetime
-import re
-import sqlite3
-import sys
-
-from . import cacheParse
-import six
-
-def parse(filename, start, end, checkCache, cachePath, urlLength):
-    """
-    filename: path to the history file
-    start: beginning of the time window
-    end: end of the time window
-    checkCache: check if each page in the history is in the cache
-    cachePath: path to cache directory
-    """
-
-    # Connecting to the DB
-    try:
-        history = sqlite3.connect(filename)
-    except sqlite3.Error as error:
-        print("==> Error while opening the history file !")
-        print("==> Details :", error.message)
-        sys.exit("==> Exiting...")
-
-    reference = datetime.datetime(1601, 1, 1)
-
-    # Retrieving all useful data
-    result = history.execute("SELECT visits.visit_time, \
-                               visits.from_visit, \
-                               visits.transition, \
-                               urls.url, \
-                               urls.title, \
-                               urls.visit_count, \
-                               urls.typed_count, \
-                               urls.last_visit_time \
-                               FROM urls,visits \
-                               WHERE urls.id=visits.url\
-                               AND visits.visit_time>%d\
-                               AND visits.visit_time<%d\
-                               ORDER BY visits.visit_time;"%\
-                               (int((start-reference).total_seconds()*1000000),\
-                               int((end-reference).total_seconds()*1000000)))\
-
-    # Parsing cache
-    cache = None
-    if checkCache:
-        cache = cacheParse.parse(cachePath)
-
-    output = []
-    for line in result:
-        output.append(HistoryEntry(line, cache, urlLength))
-    return output
-
-class Transition():
-    """Object representing transition between history pages"""
-
-    CORE_STRING = ["Link",\
-                   "Typed",\
-                   "Auto Bookmark",\
-                   "Auto Subframe",\
-                   "Manual Subframe",\
-                   "Generated",\
-                   "Start Page",\
-                   "Form Submit",\
-                   "Reload",\
-                   "Keyword",\
-                   "Keywork Generated"]
-    QUALIFIER_STRING = [(0x01000000, "Forward or Back Button"),
-                        (0x02000000, "Address Bar"),
-                        (0x04000000, "Home Page"),
-                        (0x10000000, "Beginning of Chain"),
-                        (0x20000000, "End of Chain"),
-                        (0x40000000, "Client Redirection"),
-                        (0x80000000, "Server Redirection")]
-
-    def __init__(self, transition):
-        """
-        Parsing the transtion according to
-        content/common/page_transition_types.h
-        """
-        self.core = transition & 0xFF
-        self.qualifier = transition & 0xFFFFFF00
-
-    def __str__(self):
-        string = Transition.CORE_STRING[self.core]
-        for mask, description in Transition.QUALIFIER_STRING:
-            if self.qualifier & mask != 0:
-                string += ", %s"%description
-        return string
-
-class HistoryEntry(object):
-    """Object to store database entries"""
-    COLUMN_STR = {'vt': "visitTime",
-                  'fv': "fromVisit",
-                  'tr': "transition",
-                  'u':  "url",
-                  'tl': "title",
-                  'vc': "visitCount",
-                  'tc': "typedCount",
-                  'lv': "lastVisitTime",
-                  'cc': "inCache"}
-
-    def __init__(self, item, cache, urlLength):
-        """Parse raw input"""
-        self.visitTime = datetime.datetime(1601, 1, 1) + \
-                         datetime.timedelta(microseconds=\
-                         item[0])
-        self.fromVisit = item[1]
-        self.transition = Transition(item[2])
-        if len(item[3]) > urlLength and urlLength > 0:
-            self.url = item[3][0:urlLength - 3] + "..."
-        else:
-            self.url = item[3]
-        self.title = item[4]
-        self.visitCount = item[5]
-        self.typedCount = item[6]
-        self.lastVisitTime = datetime.datetime(1601, 1, 1) + \
-                             datetime.timedelta(microseconds=\
-                             item[7])
-
-        # Searching in the cache if there is a copy of the page
-        # TODO use a hash table to search instead of heavy exhaustive search
-        self.inCache = False
-        if cache != None:
-            for item in cache:
-                if item.keyToStr() == self.url:
-                    self.inCache = True
-                    break
-
-    def toStr(self):
-        return [six.text_type(self.visitTime),\
-                six.text_type(self.fromVisit),\
-                six.text_type(self.transition),\
-                six.text_type(self.url),\
-                six.text_type(self.title),\
-                six.text_type(self.visitCount),\
-                six.text_type(self.typedCount),\
-                six.text_type(self.lastVisitTime)]
-
-    def columnToStr(self, column):
-        """Returns column content specified by argument"""
-        return six.text_type(self.__getattribute__(HistoryEntry.COLUMN_STR[column]))
--- a/fanficfare/chromagnon/jsonOutput.py
+++ b/fanficfare/chromagnon/jsonOutput.py
@ -1,42 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the Chromagon Project nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-JSON Output Module
-"""
-
-from __future__ import absolute_import
-from __future__ import print_function
-import json
-
-def jsonOutput(queryResult, separator=''):
-    """
-    Display the data separated in JSON
-    """
-
-    print(json.JSONEncoder().encode(queryResult))
--- a/fanficfare/chromagnon/visitedLinks.py
+++ b/fanficfare/chromagnon/visitedLinks.py
@ -1,97 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the Chromagon Project nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Parse the Chrome Visited Links
-Reverse engineered from
-  chrome/common/visitedlink_common.*
-  chrome/browser/visitedlink/visitedlink_*
-"""
-
-from __future__ import absolute_import
-import md5
-import struct
-import sys
-from six.moves import range
-
-VISITED_LINKS_MAGIC = 0x6b6e4c56;
-
-def isVisited(path, urls):
-    """
-    Return the list of urls given in parameter with a boolean information
-    about its presence in the given visited links file
-    """
-    output = []
-
-    f = open(path, 'rb')
-
-    # Checking file type
-    magic = struct.unpack('I', f.read(4))[0]
-    if magic != VISITED_LINKS_MAGIC:
-        raise Exception("Invalid file")
-
-    # Reading header values
-    version = struct.unpack('I', f.read(4))[0]
-    length = struct.unpack('I', f.read(4))[0]
-    usedItems = struct.unpack('I', f.read(4))[0]
-
-    # Reading salt
-    salt = ""
-    for dummy in range(8):
-        salt += struct.unpack('c', f.read(1))[0]
-
-    for url in urls:
-        fingerprint = md5.new()
-        fingerprint.update(salt)
-        fingerprint.update(url)
-        digest = fingerprint.hexdigest()
-
-        # Inverting the result
-        # Why Chrome MD5 computation gives a reverse digest ?
-        fingerprint = 0
-        for i in range(0, 16, 2):
-            fingerprint += int(digest[i:i+2], 16) << (i/2)*8
-        key = fingerprint % length
-
-        # The hash table uses open addressing
-        f.seek(key*8 + 24, 0)
-        while True:
-            finger = struct.unpack('q', f.read(8))[0]
-            if finger == 0:
-                output.append((url, False))
-                break
-            if finger == fingerprint:
-                output.append((url, True))
-                break
-            if f.tell() >= length*8 + 24:
-                f.seek(24)
-            if f.tell() == key*8 + 24:
-                output.append((url, False))
-                break
-    f.close()
-    return output