CLI Only working with ffnet *only* reading from Chrome browser cache.

2026-05-08 12:36:11 +02:00 · 2021-01-09 16:01:57 -06:00 · 2021-01-09 16:01:57 -06:00 · 95297b58e0
commit 95297b58e0
parent 10a7cf8aa7
17 changed files with 1526 additions and 8 deletions
--- a/fanficfare/adapters/adapter_fanfictionnet.py
+++ b/fanficfare/adapters/adapter_fanfictionnet.py
@ -25,6 +25,7 @@ import re
 from ..six import text_type as unicode
 from ..six.moves.urllib.error import HTTPError

+from ..chromagnon.cacheParse import ChromeCache

 from .. import exceptions as exceptions
 from ..htmlcleanup import stripHTML
@ -60,6 +61,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
            ## accept m(mobile)url, but use www.
            self.origurl = self.origurl.replace("https://m.","https://www.")

+        self.chromagnon_cache = None
    @staticmethod
    def getSiteDomain():
        return 'www.fanfiction.net'
@ -75,14 +77,50 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
    def getSiteURLPattern(self):
        return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$"

+    def _postUrl(self, url,
+                 parameters={},
+                 headers={},
+                 extrasleep=None,
+                 usecache=True):
+        logger.debug("_postUrl")
+        raise NotImplementedError
+
+    def _fetchUrlRawOpened(self, url,
+                           parameters=None,
+                           extrasleep=None,
+                           usecache=True,
+                           referer=None):
+        logger.debug("_fetchUrlRawOpened")
+        raise NotImplementedError
+
+    def _fetchUrlOpened(self, url,
+                        parameters=None,
+                        usecache=True,
+                        extrasleep=None,
+                        referer=None):
+        logger.debug("_fetchUrlOpened")
+        raise NotImplementedError
+
+    def _fetchUrlRaw(self, url,
+                     parameters=None,
+                     extrasleep=None,
+                     usecache=True,
+                     referer=None):
+        ## This should be the one called for images.
+        logger.debug("_fetchUrlRaw")
+        raise NotImplementedError
+    
    def _fetchUrl(self,url,parameters=None,extrasleep=1.0,usecache=True):
-        ## ffnet(and, I assume, fpcom) tends to fail more if hit too
-        ## fast.  This is in additional to what ever the
-        ## slow_down_sleep_time setting is.
-        return BaseSiteAdapter._fetchUrl(self,url,
-                                         parameters=parameters,
-                                         extrasleep=extrasleep,
-                                         usecache=usecache)
+
+        if self.chromagnon_cache is None:
+            logger.debug("Start making self.chromagnon_cache")
+            self.chromagnon_cache = ChromeCache(self.getConfig("chrome_cache_path"))
+            logger.debug("Done making self.chromagnon_cache")
+        data = self.chromagnon_cache.get_cached_file(url)
+        logger.debug("%s:len(%s)"%(url,len(data)))
+        if data is None:
+            raise HTTPError(404,"Not found in Chrome Cache")
+        return self.configuration._decode(data)

    def use_pagecache(self):
        '''
@ -103,8 +141,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
        # use BeautifulSoup HTML parser to make everything easier to find.
        try:
            data = self._fetchUrl(url)
-            #logger.debug("\n===================\n%s\n===================\n"%data)
+            # logger.debug("\n===================\n%s\n===================\n"%data)
            soup = self.make_soup(data)
+            # logger.debug("\n===================\n%s\n===================\n"%soup)
        except HTTPError as e:
            if e.code == 404:
                raise exceptions.StoryDoesNotExist(url)
--- a/fanficfare/chromagnon/LICENSE
+++ b/fanficfare/chromagnon/LICENSE
@ -0,0 +1,24 @@
+Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Chromagon Project nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/fanficfare/chromagnon/SuperFastHash.py
+++ b/fanficfare/chromagnon/SuperFastHash.py
@ -0,0 +1,89 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Python implementation of SuperFastHash algorithm
+Maybe it is better to use c_uint32 to limit the size of variables to 32bits
+instead of using 0xFFFFFFFF mask.
+"""
+
+from __future__ import absolute_import
+from __future__ import print_function
+import binascii
+import sys
+
+def get16bits(data):
+    """Returns the first 16bits of a string"""
+    return int(binascii.hexlify(data[1::-1]), 16)
+
+def superFastHash(data):
+    hash = length = len(data)
+    if length == 0:
+        return 0
+
+    rem = length & 3
+    length >>= 2
+
+    while length > 0:
+        hash += get16bits(data) & 0xFFFFFFFF
+        tmp = (get16bits(data[2:])<< 11) ^ hash
+        hash = ((hash << 16) & 0xFFFFFFFF) ^ tmp
+        data = data[4:]
+        hash += hash >> 11
+        hash = hash & 0xFFFFFFFF
+        length -= 1
+
+    if rem == 3:
+        hash += get16bits (data)
+        hash ^= (hash << 16) & 0xFFFFFFFF
+        hash ^= (int(binascii.hexlify(data[2]), 16) << 18) & 0xFFFFFFFF
+        hash += hash >> 11
+    elif rem == 2:
+        hash += get16bits (data)
+        hash ^= (hash << 11) & 0xFFFFFFFF
+        hash += hash >> 17
+    elif rem == 1:
+        hash += int(binascii.hexlify(data[0]), 16)
+        hash ^= (hash << 10) & 0xFFFFFFFF
+        hash += hash >> 1
+
+    hash = hash & 0xFFFFFFFF
+    hash ^= (hash << 3) & 0xFFFFFFFF
+    hash += hash >> 5
+    hash = hash & 0xFFFFFFFF
+    hash ^= (hash << 4) & 0xFFFFFFFF
+    hash += hash >> 17
+    hash = hash & 0xFFFFFFFF
+    hash ^= (hash << 25) & 0xFFFFFFFF
+    hash += hash >> 6
+    hash = hash & 0xFFFFFFFF
+
+    return hash
+
+if __name__ == "__main__":
+    print("%08x"%superFastHash(sys.argv[1]))
--- a/fanficfare/chromagnon/init.py
+++ b/fanficfare/chromagnon/init.py
--- a/fanficfare/chromagnon/cacheAddress.py
+++ b/fanficfare/chromagnon/cacheAddress.py
@ -0,0 +1,92 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Chrome Cache Address
+See /net/disk_cache/addr.h for design details
+"""
+
+class CacheAddressError(Exception):
+    def __init__(self, value):
+        self.value = value
+    def __str__(self):
+        return repr(self.value)
+
+class CacheAddress():
+    """
+    Object representing a Chrome Cache Address
+    """
+    SEPARATE_FILE = 0
+    RANKING_BLOCK = 1
+    BLOCK_256 = 2
+    BLOCK_1024 = 3
+    BLOCK_4096 = 4
+
+    typeArray = [("Separate file", 0),
+                 ("Ranking block file", 36),
+                 ("256 bytes block file", 256),
+                 ("1k bytes block file", 1024),
+                 ("4k bytes block file", 4096)]
+
+    def __init__(self, uint_32, path):
+        """
+        Parse the 32 bits of the uint_32
+        """
+        if uint_32 == 0:
+            raise CacheAddressError("Null Address")
+
+        #XXX Is self.binary useful ??
+        self.addr = uint_32
+        self.path = path
+
+        # Checking that the MSB is set
+        self.binary = bin(uint_32)
+        if len(self.binary) != 34:
+            raise CacheAddressError("Uninitialized Address")
+
+        self.blockType = int(self.binary[3:6], 2)
+
+        # If it is an address of a separate file
+        if self.blockType == CacheAddress.SEPARATE_FILE:
+            self.fileSelector = "f_%06x" % int(self.binary[6:], 2)
+        elif self.blockType == CacheAddress.RANKING_BLOCK:
+            self.fileSelector = "data_" + str(int(self.binary[10:18], 2))
+        else:
+            self.entrySize = CacheAddress.typeArray[self.blockType][1]
+            self.contiguousBlock = int(self.binary[8:10], 2)
+            self.fileSelector = "data_" + str(int(self.binary[10:18], 2))
+            self.blockNumber = int(self.binary[18:], 2)
+
+    def __str__(self):
+        string = hex(self.addr) + " ("
+        if self.blockType >= CacheAddress.BLOCK_256:
+            string += str(self.contiguousBlock) +\
+                      " contiguous blocks in "
+        string += CacheAddress.typeArray[self.blockType][0] +\
+                  " : " + self.fileSelector + ")"
+        return string
--- a/fanficfare/chromagnon/cacheAddressTest.py
+++ b/fanficfare/chromagnon/cacheAddressTest.py
@ -0,0 +1,64 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import absolute_import
+import unittest
+
+from . import cacheAddress
+
+class CacheAddressTest(unittest.TestCase):
+
+    def testFileType(self):
+        """Parse Block Type From Address"""
+        address = cacheAddress.CacheAddress(0x8000002A)
+        self.assertEqual(address.blockType,
+                         cacheAddress.CacheAddress.SEPARATE_FILE)
+        address = cacheAddress.CacheAddress(0x9DFF0000)
+        self.assertEqual(address.blockType,
+                         cacheAddress.CacheAddress.RANKING_BLOCK)
+        address = cacheAddress.CacheAddress(0xA0010003)
+        self.assertEqual(address.blockType,
+                         cacheAddress.CacheAddress.BLOCK_256)
+        address = cacheAddress.CacheAddress(0xBDFF0108)
+        self.assertEqual(address.blockType,
+                         cacheAddress.CacheAddress.BLOCK_1024)
+        address = cacheAddress.CacheAddress(0xCDFF0108)
+        self.assertEqual(address.blockType,
+                         cacheAddress.CacheAddress.BLOCK_4096)
+
+    def testFilename(self):
+        """Parse Filename from Address"""
+        address = cacheAddress.CacheAddress(0x8000002A)
+        self.assertEqual(address.fileSelector,
+                         "f_0002A")
+        address = cacheAddress.CacheAddress(0xA001135C)
+        self.assertEqual(address.fileSelector,
+                         "data_1")
+
+if __name__ == "__main__":
+    unittest.main()
--- a/fanficfare/chromagnon/cacheBlock.py
+++ b/fanficfare/chromagnon/cacheBlock.py
@ -0,0 +1,86 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Parse the header of a Chrome Cache File
+See http://www.chromium.org/developers/design-documents/network-stack/disk-cache
+for design details
+"""
+from __future__ import absolute_import
+import struct
+from six.moves import range
+
+class CacheBlock():
+    """
+    Object representing a block of the cache. It can be the index file or any
+    other block type : 256B, 1024B, 4096B, Ranking Block.
+    See /net/disk_cache/disk_format.h for details.
+    """
+
+    INDEX_MAGIC = 0xC103CAC3
+    BLOCK_MAGIC = 0xC104CAC3
+    INDEX = 0
+    BLOCK = 1
+
+    def __init__(self, filename):
+        """
+        Parse the header of a cache file
+        """
+        header = open(filename, 'rb')
+
+        # Read Magic Number
+        magic = struct.unpack('I', header.read(4))[0]
+        if magic == CacheBlock.BLOCK_MAGIC:
+            self.type = CacheBlock.BLOCK
+            header.seek(2, 1)
+            self.version = struct.unpack('h', header.read(2))[0]
+            self.header = struct.unpack('h', header.read(2))[0]
+            self.nextFile = struct.unpack('h', header.read(2))[0]
+            self.blockSize = struct.unpack('I', header.read(4))[0]
+            self.entryCount = struct.unpack('I', header.read(4))[0]
+            self.entryMax = struct.unpack('I', header.read(4))[0]
+            self.empty = []
+            for _ in range(4):
+                self.empty.append(struct.unpack('I', header.read(4))[0])
+            self.position = []
+            for _ in range(4):
+                self.position.append(struct.unpack('I', header.read(4))[0])
+        elif magic == CacheBlock.INDEX_MAGIC:
+            self.type = CacheBlock.INDEX
+            header.seek(2, 1)
+            self.version = struct.unpack('h', header.read(2))[0]
+            self.entryCount = struct.unpack('I', header.read(4))[0]
+            self.byteCount = struct.unpack('I', header.read(4))[0]
+            self.lastFileCreated = "f_%06x" % \
+                                       struct.unpack('I', header.read(4))[0]
+            header.seek(4*2, 1)
+            self.tableSize = struct.unpack('I', header.read(4))[0]
+        else:
+            header.close()
+            raise Exception("Invalid Chrome Cache File")
+        header.close()
--- a/fanficfare/chromagnon/cacheData.py
+++ b/fanficfare/chromagnon/cacheData.py
@ -0,0 +1,130 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Stores the data fetched in the cache.
+Parse the HTTP header if asked.
+"""
+
+from __future__ import absolute_import
+import re
+import shutil
+import struct
+import os
+
+from . import cacheAddress
+from six.moves import range
+
+class CacheData():
+    """
+    Retrieve data at the given address
+    Can save it to a separate file for export
+    """
+
+    HTTP_HEADER = 0
+    UNKNOWN = 1
+
+    def __init__(self, address, size, isHTTPHeader=False):
+        """
+        It is a lazy evaluation object : the file is open only if it is
+        needed. It can parse the HTTP header if asked to do so.
+        See net/http/http_util.cc LocateStartOfStatusLine and
+        LocateEndOfHeaders for details.
+        """
+        self.size = size
+        self.address = address
+        self.type = CacheData.UNKNOWN
+
+        if isHTTPHeader and\
+           self.address.blockType != cacheAddress.CacheAddress.SEPARATE_FILE:
+            # Getting raw data
+            string = b""
+            block = open(os.path.join(self.address.path,self.address.fileSelector), 'rb')
+            block.seek(8192 + self.address.blockNumber*self.address.entrySize)
+            for _ in range(self.size):
+                string += struct.unpack('c', block.read(1))[0]
+            block.close()
+
+            # Finding the beginning of the request
+            start = re.search(b"HTTP", string)
+            if start == None:
+                return
+            else:
+                string = string[start.start():]
+
+            # Finding the end (some null characters : verified by experience)
+            end = re.search(b"\x00\x00", string)
+            if end == None:
+                return
+            else:
+                string = string[:end.end()-2]
+
+            # Creating the dictionary of headers
+            self.headers = {}
+            for line in string.split(b'\0'):
+                stripped = line.split(b':')
+                self.headers[stripped[0].lower()] = \
+                    b':'.join(stripped[1:]).strip()
+            self.type = CacheData.HTTP_HEADER
+
+    def save(self, filename=None):
+        """Save the data to the specified filename"""
+        if self.address.blockType == cacheAddress.CacheAddress.SEPARATE_FILE:
+            shutil.copy(os.path.join(self.address.path,self.address.fileSelector),
+                        filename)
+        else:
+            output = open(filename, 'wb')
+            block = open(os.path.join(self.address.path,self.address.fileSelector), 'rb')
+            block.seek(8192 + self.address.blockNumber*self.address.entrySize)
+            output.write(block.read(self.size))
+            block.close()
+            output.close()
+
+    def data(self):
+        """Returns a string representing the data"""
+        if self.address.blockType == cacheAddress.CacheAddress.SEPARATE_FILE:
+            with open(os.path.join(self.address.path,self.address.fileSelector), 'rb') as infile:
+                data = infile.read()
+        else:
+            block = open(os.path.join(self.address.path,self.address.fileSelector), 'rb')
+            block.seek(8192 + self.address.blockNumber*self.address.entrySize)
+            data = block.read(self.size).decode('utf-8')
+            block.close()
+        return data
+
+    def __str__(self):
+        """
+        Display the type of cacheData
+        """
+        if self.type == CacheData.HTTP_HEADER:
+            if 'content-type' in self.headers:
+                return "HTTP Header %s" % self.headers['content-type']
+            else:
+                return "HTTP Header"
+        else:
+            return "Data"
--- a/fanficfare/chromagnon/cacheEntry.py
+++ b/fanficfare/chromagnon/cacheEntry.py
@ -0,0 +1,140 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Chrome Cache Entry
+See http://www.chromium.org/developers/design-documents/network-stack/disk-cache
+for design details
+"""
+
+from __future__ import absolute_import
+import datetime
+import struct
+import os
+
+from . import cacheAddress
+from . import cacheData
+from six.moves import range
+
+class CacheEntry():
+    """
+    See /net/disk_cache/disk_format.h for details.
+    """
+    STATE = ["Normal",
+             "Evicted (data were deleted)",
+             "Doomed (shit happened)"]
+
+    def __init__(self, address):
+        """
+        Parse a Chrome Cache Entry at the given address
+        """
+        self.httpHeader = None
+        block = open(os.path.join(address.path,address.fileSelector), 'rb')
+
+        # Going to the right entry
+        block.seek(8192 + address.blockNumber*address.entrySize)
+
+        # Parsing basic fields
+        self.hash = struct.unpack('I', block.read(4))[0]
+        self.next = struct.unpack('I', block.read(4))[0]
+        self.rankingNode = struct.unpack('I', block.read(4))[0]
+        self.usageCounter = struct.unpack('I', block.read(4))[0]
+        self.reuseCounter = struct.unpack('I', block.read(4))[0]
+        self.state = struct.unpack('I', block.read(4))[0]
+        self.creationTime = datetime.datetime(1601, 1, 1) + \
+                            datetime.timedelta(microseconds=\
+                                struct.unpack('Q', block.read(8))[0])
+        self.keyLength = struct.unpack('I', block.read(4))[0]
+        self.keyAddress = struct.unpack('I', block.read(4))[0]
+
+
+        dataSize = []
+        for _ in range(4):
+            dataSize.append(struct.unpack('I', block.read(4))[0])
+
+        self.data = []
+        for index in range(4):
+            addr = struct.unpack('I', block.read(4))[0]
+            try:
+                addr = cacheAddress.CacheAddress(addr, address.path)
+                self.data.append(cacheData.CacheData(addr, dataSize[index],
+                                                     True))
+            except cacheAddress.CacheAddressError:
+                pass
+
+        # Find the HTTP header if there is one
+        for data in self.data:
+            if data.type == cacheData.CacheData.HTTP_HEADER:
+                self.httpHeader = data
+                break
+
+        self.flags = struct.unpack('I', block.read(4))[0]
+
+        # Skipping pad
+        block.seek(5*4, 1)
+
+        # Reading local key
+        if self.keyAddress == 0:
+            self.key = block.read(self.keyLength).decode('ascii')
+        # Key stored elsewhere
+        else:
+            addr = cacheAddress.CacheAddress(self.keyAddress, address.path)
+
+            # It is probably an HTTP header
+            self.key = cacheData.CacheData(addr, self.keyLength, True)
+
+        block.close()
+
+    def keyToStr(self):
+        """
+        Since the key can be a string or a CacheData object, this function is an
+        utility to display the content of the key whatever type is it.
+        """
+        if self.keyAddress == 0:
+            return self.key
+        else:
+            return self.key.data()
+
+    def __str__(self):
+        string = "Hash: 0x%08x" % self.hash + '\n'
+        if self.next != 0:
+            string += "Next: 0x%08x" % self.next + '\n'
+        string += "Usage Counter: %d" % self.usageCounter + '\n'\
+                  "Reuse Counter: %d" % self.reuseCounter + '\n'\
+                  "Creation Time: %s" % self.creationTime + '\n'
+        if self.keyAddress != 0:
+            string += "Key Address: 0x%08x" % self.keyAddress + '\n'
+        string += "Key: %s" % self.key + '\n'
+        if self.flags != 0:
+            string += "Flags: 0x%08x" % self.flags + '\n'
+        string += "State: %s" % CacheEntry.STATE[self.state]
+        for data in self.data:
+            string += "\nData (%d bytes) at 0x%08x : %s" % (data.size,
+                                                            data.address.addr,
+                                                            data)
+        return string
--- a/fanficfare/chromagnon/cacheParse.py
+++ b/fanficfare/chromagnon/cacheParse.py
@ -0,0 +1,293 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Parse the Chrome Cache File
+See http://www.chromium.org/developers/design-documents/network-stack/disk-cache
+for design details
+"""
+
+from __future__ import absolute_import
+from __future__ import print_function
+import gzip
+import os
+import struct
+import sys
+import re
+import brotli
+
+from . import csvOutput
+from . import SuperFastHash
+
+from .cacheAddress import CacheAddress
+from .cacheBlock import CacheBlock
+from .cacheData import CacheData
+from .cacheEntry import CacheEntry
+from six.moves import range
+
+
+def parse(path, urls=None):
+    """
+    Reads the whole cache and store the collected data in a table
+    or find out if the given list of urls is in the cache. If yes it
+    return a list of the corresponding entries.
+    """
+    # Verifying that the path end with / (What happen on windows?)
+    path = os.path.abspath(path)
+    cacheBlock = CacheBlock(os.path.join(path, "index"))
+
+    # Checking type
+    if cacheBlock.type != CacheBlock.INDEX:
+        raise Exception("Invalid Index File")
+
+    index = open(os.path.join(path, "index"), 'rb')
+
+    # Skipping Header
+    index.seek(92*4)
+
+    cache = []
+    # If no url is specified, parse the whole cache
+    if urls == None:
+        for key in range(cacheBlock.tableSize):
+            raw = struct.unpack('I', index.read(4))[0]
+            if raw != 0:
+                entry = CacheEntry(CacheAddress(raw, path=path))
+                # Checking if there is a next item in the bucket because
+                # such entries are not stored in the Index File so they will
+                # be ignored during iterative lookup in the hash table
+                while entry.next != 0:
+                    cache.append(entry)
+                    entry = CacheEntry(CacheAddress(entry.next, path=path))
+                cache.append(entry)
+    else:
+        # Find the entry for each url
+        for url in urls:
+            url = bytes(url,'utf8')
+            # Compute the key and seeking to it
+            hash = SuperFastHash.superFastHash(url)
+            key = hash & (cacheBlock.tableSize - 1)
+            index.seek(92*4 + key*4)
+
+            addr = struct.unpack('I', index.read(4))[0]
+            # Checking if the address is initialized (i.e. used)
+            if addr & 0x80000000 == 0:
+                print("%s is not in the cache" % url, file=sys.stderr)
+
+            # Follow the chained list in the bucket
+            else:
+                entry = CacheEntry(CacheAddress(addr, path=path))
+                while entry.hash != hash and entry.next != 0:
+                    entry = CacheEntry(CacheAddress(entry.next, path=path))
+                if entry.hash == hash:
+                    cache.append(entry)
+    index.close()
+    return cache
+
+class ChromeCache(object):
+    def __init__(self,path):
+        self.cache = parse(path)
+        self.hash_cache = {}
+        for entry in self.cache:
+            key = entry.keyToStr()
+            self.hash_cache[key] = entry
+            normkey = re.sub(r'^(https://www.fanfiction.net/s/\d+/\d+/).+$',r'\1',key)
+            ## either overwrites (no harm), or adds new.
+            self.hash_cache[normkey] = entry
+
+    def get_cached_file(self,url):
+        if url in self.hash_cache:
+            entry = self.hash_cache[url]
+            for i in range(len(entry.data)):
+                if entry.data[i].type == CacheData.UNKNOWN:
+                    # Extracting data into a file
+                    data = entry.data[i].data()
+
+                    # print("content-encoding:%s"%entry.httpHeader.headers.get(b'content-encoding',''))
+                    if entry.httpHeader != None and \
+                       b'content-encoding' in entry.httpHeader.headers:
+                        if entry.httpHeader.headers[b'content-encoding'] == b"gzip":
+                            data = gzip.decompress(data)
+                        elif entry.httpHeader.headers[b'content-encoding'] == b"br":
+                            data = brotli.decompress(data)
+                    return data
+        return None
+
+def exportToHTML(cache, outpath):
+    """
+    Export the cache in html
+    """
+
+    # Checking that the directory exists and is writable
+    if not os.path.exists(outpath):
+        os.makedirs(outpath)
+    outpath = os.path.abspath(outpath)
+
+    index = open(os.path.join(outpath,"index.html"), 'w')
+    index.write("<UL>")
+
+    for entry in cache:
+        # Adding a link in the index
+        if entry.keyLength > 100:
+            entry_name = entry.keyToStr()[:100] + "..."
+        else:
+            entry_name = entry.keyToStr()
+        index.write('<LI><a href="%08x.html">%s</a></LI>'%(entry.hash, entry_name))
+        # We handle the special case where entry_name ends with a slash
+        page_basename = entry_name.split('/')[-2] if entry_name.endswith('/') else entry_name.split('/')[-1]
+
+        # Creating the entry page
+        page = open(os.path.join(outpath,"%08x.html"%entry.hash), 'w')
+        page.write("""<!DOCTYPE html>
+                      <html lang="en">
+                      <head>
+                      <meta charset="utf-8">
+                      </head>
+                      <body>""")
+
+        # Details of the entry
+        page.write("<b>Hash</b>: 0x%08x<br />"%entry.hash)
+        page.write("<b>Usage Counter</b>: %d<br />"%entry.usageCounter)
+        page.write("<b>Reuse Counter</b>: %d<br />"%entry.reuseCounter)
+        page.write("<b>Creation Time</b>: %s<br />"%entry.creationTime)
+        page.write("<b>Key</b>: %s<br>"%entry.keyToStr())
+        page.write("<b>State</b>: %s<br>"%CacheEntry.STATE[entry.state])
+
+        page.write("<hr>")
+        ## entry.data normally 2 or 1
+        ## 2 for headers and data, 1 for headers only.
+        if len(entry.data) == 0:
+            page.write("No data associated with this entry :-(")
+        for i in range(len(entry.data)):
+            if entry.data[i].type == CacheData.UNKNOWN:
+                # Extracting data into a file
+                name = hex(entry.hash) + "_" + str(i)
+                entry.data[i].save(os.path.join(outpath,name))
+
+                # print("content-encoding:%s"%entry.httpHeader.headers.get(b'content-encoding',''))
+                if entry.httpHeader != None and \
+                   b'content-encoding' in entry.httpHeader.headers:
+                    if entry.httpHeader.headers[b'content-encoding'] == b"gzip":
+                        # XXX Highly inefficient !!!!!
+                        try:
+                            input = gzip.open(os.path.join(outpath, name), 'rb')
+                            output = open(os.path.join(outpath, name + "u"), 'wb')
+                            output.write(input.read())
+                            input.close()
+                            output.close()
+                            page.write('<a href="%su">%s</a>'%(name, page_basename))
+                            # print("gunzip'ed:%s"%name)
+                        except IOError:
+                            page.write("Something wrong happened while unzipping")
+                    elif entry.httpHeader.headers[b'content-encoding'] == b"br":
+                        try:
+                            with open(os.path.join(outpath,name), 'rb') as input:
+                                with open(os.path.join(outpath,name + "u"), 'wb') as output:
+                                    output.write(brotli.decompress(input.read()))
+                            page.write('<a href="%su">%s</a>'%(name, page_basename))
+                            # print("unbrotli'ed:%s"%name)
+                        except IOError:
+                            page.write("Something wrong happened while unzipping")
+                        brotli
+                else:
+                    page.write('<a href="%s">%s</a>'%(name ,
+                               entry.keyToStr().split('/')[-1]))
+
+
+                # If it is a picture, display it
+                if entry.httpHeader != None:
+                    if b'content-type' in entry.httpHeader.headers and\
+                       b"image" in entry.httpHeader.headers[b'content-type']:
+                        page.write('<br /><img src="%s">'%(name))
+            # HTTP Header
+            else:
+                page.write("<u>HTTP Header</u><br />")
+                for key, value in entry.data[i].headers.items():
+                    page.write("<b>%s</b>: %s<br />"%(key, value))
+            page.write("<hr>")
+        page.write("</body></html>")
+        page.close()
+
+    index.write("</UL>")
+    index.close()
+
+def exportTol2t(cache):
+    """
+    Export the cache in CSV log2timeline compliant format
+    """
+
+    output = []
+    output.append(["date",
+                   "time",
+                   "timezone",
+                   "MACB",
+                   "source",
+                   "sourcetype",
+                   "type",
+                   "user",
+                   "host",
+                   "short",
+                   "desc",
+                   "version",
+                   "filename",
+                   "inode",
+                   "notes",
+                   "format",
+                   "extra"])
+
+    for entry in cache:
+        date = entry.creationTime.date().strftime("%m/%d/%Y")
+        time = entry.creationTime.time()
+        # TODO get timezone
+        timezone = 0
+        short = entry.keyToStr()
+        descr = "Hash: 0x%08x" % entry.hash
+        descr += " Usage Counter: %d" % entry.usageCounter
+        if entry.httpHeader != None:
+            if 'content-type' in entry.httpHeader.headers:
+                descr += " MIME: %s" % entry.httpHeader.headers['content-type']
+
+        output.append([date,
+                       time,
+                       timezone,
+                       "MACB",
+                       "WEBCACHE",
+                       "Chrome Cache",
+                       "Cache Entry",
+                       "-",
+                       "-",
+                       short,
+                       descr,
+                       "2",
+                       "-",
+                       "-",
+                       "-",
+                       "-",
+                       "-",
+                       ])
+
+    csvOutput.csvOutput(output)
--- a/fanficfare/chromagnon/classicalOutput.py
+++ b/fanficfare/chromagnon/classicalOutput.py
@ -0,0 +1,45 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Classical Output Module
+"""
+
+from __future__ import absolute_import
+import sys
+
+def classicalOutput(queryResult, separator="\t"):
+    """
+    Display the data separated by the specified separator
+    """
+
+    for line in queryResult:
+        for element in line:
+            sys.stdout.write(element)
+            sys.stdout.write(separator)
+        sys.stdout.write('\n')
--- a/fanficfare/chromagnon/columnOutput.py
+++ b/fanficfare/chromagnon/columnOutput.py
@ -0,0 +1,49 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Column Output Module
+"""
+
+from __future__ import print_function
+from six.moves import range
+def columnOutput(queryResult, separator=' '):
+    """
+    Display the data in columns
+    """
+    if len(queryResult) == 0:
+        return
+
+    # Finding width of columns
+    size = [max([len(str(line[i])) for line in queryResult])
+            for i in range(len(queryResult[0]))]
+    # Generating format string (without last separator)
+    string = (''.join(["%%-%ds%s" % (x, separator) for x in size]))\
+                 [:-len(separator)]
+    for line in queryResult:
+        print(string % tuple(line))
--- a/fanficfare/chromagnon/csvOutput.py
+++ b/fanficfare/chromagnon/csvOutput.py
@ -0,0 +1,44 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+CSV Output Module
+"""
+
+from __future__ import absolute_import
+import csv
+import sys
+
+def csvOutput(queryResult, separator=',', quote='"'):
+    """
+    Display the data according to csv format
+    """
+    csvWriter = csv.writer(sys.stdout, delimiter=separator, quotechar=quote,
+                           quoting=csv.QUOTE_MINIMAL)
+    for line in queryResult:
+        csvWriter.writerow(line)
--- a/fanficfare/chromagnon/downloadParse.py
+++ b/fanficfare/chromagnon/downloadParse.py
@ -0,0 +1,106 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Parse the Chrome Download Table History File
+Its a SQLite3 table
+"""
+
+from __future__ import absolute_import
+from __future__ import print_function
+import datetime
+import sqlite3
+import sys
+import six
+
+def parse(filename, urlLength):
+    """
+    filename: path to the history file
+    urlLength: maximum url length to display
+    """
+
+    # Connecting to the DB
+    try:
+        history = sqlite3.connect(filename)
+    except sqlite3.Error as error:
+        print("==> Error while opening the history file !")
+        print("==> Details :", error.message)
+        sys.exit("==> Exiting...")
+
+    # Retrieving all useful data
+    result = history.execute("SELECT id, \
+                              full_path, \
+                              url, \
+                              start_time, \
+                              received_bytes, \
+                              total_bytes, \
+                              state \
+                              FROM downloads;")
+
+    output = []
+    for line in result:
+        output.append(DownloadEntry(line, urlLength))
+    return output
+
+class DownloadEntry(object):
+    """Object to store download entries"""
+    COLUMN_STR = {'st': "startTime",
+                  'p': "path",
+                  'u': "url",
+                  'rb': "receivedBytes",
+                  'tb': "totalBytes",
+                  'pt': "percentReceived",
+                  's': "state"}
+    STATE_STR = ["In Progress",
+                 "Complete",
+                 "Cancelled",
+                 "Removing",
+                 "Interrupted"]
+
+    def __init__(self, item, urlLength):
+        """Parse raw input"""
+        self.path = item[1]
+        if len(item[2]) > urlLength and urlLength > 0:
+            self.url = item[2][0:urlLength - 3] + "..."
+        else:
+            self.url = item[2]
+        self.startTime = datetime.datetime(1601, 1, 1) + \
+                         datetime.timedelta(microseconds=\
+                         item[3])
+        self.receivedBytes = item[4]
+        self.totalBytes = item[5]
+        self.state = DownloadEntry.STATE_STR[item[6]]
+        if int(item[5]) == 0:
+            self.percentReceived = "0%"
+        else:
+            self.percentReceived = "%d%%" % \
+                                   int(float(item[4])/float(item[5])*100)
+
+    def columnToStr(self, column):
+        """Returns column content specified by argument"""
+        return six.text_type(self.__getattribute__(DownloadEntry.COLUMN_STR[column]))
--- a/fanficfare/chromagnon/historyParse.py
+++ b/fanficfare/chromagnon/historyParse.py
@ -0,0 +1,178 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Parse the Chrome History File
+Its a SQLite3 file
+"""
+
+from __future__ import absolute_import
+from __future__ import print_function
+import datetime
+import re
+import sqlite3
+import sys
+
+from . import cacheParse
+import six
+
+def parse(filename, start, end, checkCache, cachePath, urlLength):
+    """
+    filename: path to the history file
+    start: beginning of the time window
+    end: end of the time window
+    checkCache: check if each page in the history is in the cache
+    cachePath: path to cache directory
+    """
+
+    # Connecting to the DB
+    try:
+        history = sqlite3.connect(filename)
+    except sqlite3.Error as error:
+        print("==> Error while opening the history file !")
+        print("==> Details :", error.message)
+        sys.exit("==> Exiting...")
+
+    reference = datetime.datetime(1601, 1, 1)
+
+    # Retrieving all useful data
+    result = history.execute("SELECT visits.visit_time, \
+                               visits.from_visit, \
+                               visits.transition, \
+                               urls.url, \
+                               urls.title, \
+                               urls.visit_count, \
+                               urls.typed_count, \
+                               urls.last_visit_time \
+                               FROM urls,visits \
+                               WHERE urls.id=visits.url\
+                               AND visits.visit_time>%d\
+                               AND visits.visit_time<%d\
+                               ORDER BY visits.visit_time;"%\
+                               (int((start-reference).total_seconds()*1000000),\
+                               int((end-reference).total_seconds()*1000000)))\
+
+    # Parsing cache
+    cache = None
+    if checkCache:
+        cache = cacheParse.parse(cachePath)
+
+    output = []
+    for line in result:
+        output.append(HistoryEntry(line, cache, urlLength))
+    return output
+
+class Transition():
+    """Object representing transition between history pages"""
+
+    CORE_STRING = ["Link",\
+                   "Typed",\
+                   "Auto Bookmark",\
+                   "Auto Subframe",\
+                   "Manual Subframe",\
+                   "Generated",\
+                   "Start Page",\
+                   "Form Submit",\
+                   "Reload",\
+                   "Keyword",\
+                   "Keywork Generated"]
+    QUALIFIER_STRING = [(0x01000000, "Forward or Back Button"),
+                        (0x02000000, "Address Bar"),
+                        (0x04000000, "Home Page"),
+                        (0x10000000, "Beginning of Chain"),
+                        (0x20000000, "End of Chain"),
+                        (0x40000000, "Client Redirection"),
+                        (0x80000000, "Server Redirection")]
+
+    def __init__(self, transition):
+        """
+        Parsing the transtion according to
+        content/common/page_transition_types.h
+        """
+        self.core = transition & 0xFF
+        self.qualifier = transition & 0xFFFFFF00
+
+    def __str__(self):
+        string = Transition.CORE_STRING[self.core]
+        for mask, description in Transition.QUALIFIER_STRING:
+            if self.qualifier & mask != 0:
+                string += ", %s"%description
+        return string
+
+class HistoryEntry(object):
+    """Object to store database entries"""
+    COLUMN_STR = {'vt': "visitTime",
+                  'fv': "fromVisit",
+                  'tr': "transition",
+                  'u':  "url",
+                  'tl': "title",
+                  'vc': "visitCount",
+                  'tc': "typedCount",
+                  'lv': "lastVisitTime",
+                  'cc': "inCache"}
+
+    def __init__(self, item, cache, urlLength):
+        """Parse raw input"""
+        self.visitTime = datetime.datetime(1601, 1, 1) + \
+                         datetime.timedelta(microseconds=\
+                         item[0])
+        self.fromVisit = item[1]
+        self.transition = Transition(item[2])
+        if len(item[3]) > urlLength and urlLength > 0:
+            self.url = item[3][0:urlLength - 3] + "..."
+        else:
+            self.url = item[3]
+        self.title = item[4]
+        self.visitCount = item[5]
+        self.typedCount = item[6]
+        self.lastVisitTime = datetime.datetime(1601, 1, 1) + \
+                             datetime.timedelta(microseconds=\
+                             item[7])
+
+        # Searching in the cache if there is a copy of the page
+        # TODO use a hash table to search instead of heavy exhaustive search
+        self.inCache = False
+        if cache != None:
+            for item in cache:
+                if item.keyToStr() == self.url:
+                    self.inCache = True
+                    break
+
+    def toStr(self):
+        return [six.text_type(self.visitTime),\
+                six.text_type(self.fromVisit),\
+                six.text_type(self.transition),\
+                six.text_type(self.url),\
+                six.text_type(self.title),\
+                six.text_type(self.visitCount),\
+                six.text_type(self.typedCount),\
+                six.text_type(self.lastVisitTime)]
+
+    def columnToStr(self, column):
+        """Returns column content specified by argument"""
+        return six.text_type(self.__getattribute__(HistoryEntry.COLUMN_STR[column]))
--- a/fanficfare/chromagnon/jsonOutput.py
+++ b/fanficfare/chromagnon/jsonOutput.py
@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+JSON Output Module
+"""
+
+from __future__ import absolute_import
+from __future__ import print_function
+import json
+
+def jsonOutput(queryResult, separator=''):
+    """
+    Display the data separated in JSON
+    """
+
+    print(json.JSONEncoder().encode(queryResult))
--- a/fanficfare/chromagnon/visitedLinks.py
+++ b/fanficfare/chromagnon/visitedLinks.py
@ -0,0 +1,97 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2012, Jean-Rémy Bancel <jean-remy.bancel@telecom-paristech.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Chromagon Project nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Parse the Chrome Visited Links
+Reverse engineered from
+  chrome/common/visitedlink_common.*
+  chrome/browser/visitedlink/visitedlink_*
+"""
+
+from __future__ import absolute_import
+import md5
+import struct
+import sys
+from six.moves import range
+
+VISITED_LINKS_MAGIC = 0x6b6e4c56;
+
+def isVisited(path, urls):
+    """
+    Return the list of urls given in parameter with a boolean information
+    about its presence in the given visited links file
+    """
+    output = []
+
+    f = open(path, 'rb')
+
+    # Checking file type
+    magic = struct.unpack('I', f.read(4))[0]
+    if magic != VISITED_LINKS_MAGIC:
+        raise Exception("Invalid file")
+
+    # Reading header values
+    version = struct.unpack('I', f.read(4))[0]
+    length = struct.unpack('I', f.read(4))[0]
+    usedItems = struct.unpack('I', f.read(4))[0]
+
+    # Reading salt
+    salt = ""
+    for dummy in range(8):
+        salt += struct.unpack('c', f.read(1))[0]
+
+    for url in urls:
+        fingerprint = md5.new()
+        fingerprint.update(salt)
+        fingerprint.update(url)
+        digest = fingerprint.hexdigest()
+
+        # Inverting the result
+        # Why Chrome MD5 computation gives a reverse digest ?
+        fingerprint = 0
+        for i in range(0, 16, 2):
+            fingerprint += int(digest[i:i+2], 16) << (i/2)*8
+        key = fingerprint % length
+
+        # The hash table uses open addressing
+        f.seek(key*8 + 24, 0)
+        while True:
+            finger = struct.unpack('q', f.read(8))[0]
+            if finger == 0:
+                output.append((url, False))
+                break
+            if finger == fingerprint:
+                output.append((url, True))
+                break
+            if f.tell() >= length*8 + 24:
+                f.seek(24)
+            if f.tell() == key*8 + 24:
+                output.append((url, False))
+                break
+    f.close()
+    return output