From 9819e0b2148ef216858b97fcf04f06f46cdcee50 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Thu, 21 Jan 2021 17:57:28 -0600 Subject: [PATCH] Clean up chromagnon code to only what we're using--rest probably wasn't updated completely anyway. --- fanficfare/chromagnon/cacheParse.py | 260 +++-------------------- fanficfare/chromagnon/classicalOutput.py | 45 ---- fanficfare/chromagnon/columnOutput.py | 49 ----- fanficfare/chromagnon/csvOutput.py | 44 ---- fanficfare/chromagnon/downloadParse.py | 106 --------- fanficfare/chromagnon/historyParse.py | 178 ---------------- fanficfare/chromagnon/jsonOutput.py | 42 ---- fanficfare/chromagnon/visitedLinks.py | 97 --------- 8 files changed, 34 insertions(+), 787 deletions(-) delete mode 100644 fanficfare/chromagnon/classicalOutput.py delete mode 100644 fanficfare/chromagnon/columnOutput.py delete mode 100644 fanficfare/chromagnon/csvOutput.py delete mode 100644 fanficfare/chromagnon/downloadParse.py delete mode 100644 fanficfare/chromagnon/historyParse.py delete mode 100644 fanficfare/chromagnon/jsonOutput.py delete mode 100644 fanficfare/chromagnon/visitedLinks.py diff --git a/fanficfare/chromagnon/cacheParse.py b/fanficfare/chromagnon/cacheParse.py index 2857ccb5..0d35b495 100644 --- a/fanficfare/chromagnon/cacheParse.py +++ b/fanficfare/chromagnon/cacheParse.py @@ -41,33 +41,32 @@ import sys import re import time -def do_cprofile(func): - def profiled_func(*args, **kwargs): - t=0 - try: - t = time.time() - result = func(*args, **kwargs) - t = time.time() - t - return result - finally: - print("time:%s"%t) - return profiled_func +# def do_cprofile(func): +# def profiled_func(*args, **kwargs): +# t=0 +# try: +# t = time.time() +# result = func(*args, **kwargs) +# t = time.time() - t +# return result +# finally: +# print("time:%s"%t) +# return profiled_func try: from brotli import decompress - @do_cprofile + # @do_cprofile def brotli_decompress(inbuf): return decompress(inbuf) except: # Calibre doesn't include brotli, so use packaged brotlipython # which is waaaay slower, but pure python. from brotlipython import brotlidec - @do_cprofile + # @do_cprofile def brotli_decompress(inbuf): # wants the output, too, but returns it return brotlidec(inbuf,[]) -from . import csvOutput from . import SuperFastHash from .cacheAddress import CacheAddress @@ -76,47 +75,23 @@ from .cacheData import CacheData from .cacheEntry import CacheEntry from six.moves import range +class ChromeCache(object): + def __init__(self,path): + self.path = os.path.abspath(path) + self.cacheBlock = CacheBlock(os.path.join(path, "index")) -def parse(path, urls=None): - """ - Reads the whole cache and store the collected data in a table - or find out if the given list of urls is in the cache. If yes it - return a list of the corresponding entries. - """ - # Verifying that the path end with / (What happen on windows?) - path = os.path.abspath(path) - cacheBlock = CacheBlock(os.path.join(path, "index")) + # Checking type + if self.cacheBlock.type != CacheBlock.INDEX: + raise Exception("Invalid Index File") - # Checking type - if cacheBlock.type != CacheBlock.INDEX: - raise Exception("Invalid Index File") - - index = open(os.path.join(path, "index"), 'rb') - - # Skipping Header - index.seek(92*4) - - cache = [] - # If no url is specified, parse the whole cache - if urls == None: - for key in range(cacheBlock.tableSize): - raw = struct.unpack('I', index.read(4))[0] - if raw != 0: - entry = CacheEntry(CacheAddress(raw, path=path)) - # Checking if there is a next item in the bucket because - # such entries are not stored in the Index File so they will - # be ignored during iterative lookup in the hash table - while entry.next != 0: - cache.append(entry) - entry = CacheEntry(CacheAddress(entry.next, path=path)) - cache.append(entry) - else: - # Find the entry for each url - for url in urls: - url = bytes(url,'utf8') - # Compute the key and seeking to it - hash = SuperFastHash.superFastHash(url) - key = hash & (cacheBlock.tableSize - 1) + def get_cache_entry(self,url): + url = bytes(url,'utf8') + # Compute the key and seeking to it + # print("url:%s"%url) + hash = SuperFastHash.superFastHash(url) + # print("superFastHash:%s"%hash) + key = hash & (self.cacheBlock.tableSize - 1) + with open(os.path.join(self.path, "index"), 'rb') as index: index.seek(92*4 + key*4) addr = struct.unpack('I', index.read(4))[0] @@ -126,29 +101,16 @@ def parse(path, urls=None): # Follow the chained list in the bucket else: - entry = CacheEntry(CacheAddress(addr, path=path)) + entry = CacheEntry(CacheAddress(addr, path=self.path)) while entry.hash != hash and entry.next != 0: - entry = CacheEntry(CacheAddress(entry.next, path=path)) + entry = CacheEntry(CacheAddress(entry.next, path=self.path)) if entry.hash == hash: - cache.append(entry) - index.close() - return cache - -class ChromeCache(object): - def __init__(self,path): - self.cache = parse(path) - self.hash_cache = {} - # t = time.time() - for entry in self.cache: - key = entry.keyToStr() - if 'fanfiction.net' not in key: - continue - self.hash_cache[key] = entry - # print("======:%s"%(time.time()-t)) + return entry def get_cached_file(self,url): - if url in self.hash_cache: - entry = self.hash_cache[url] + entry = self.get_cache_entry(url) + if entry: + # entry = self.hash_cache[url] for i in range(len(entry.data)): if entry.data[i].type == CacheData.UNKNOWN: # Extracting data into a file @@ -163,157 +125,3 @@ class ChromeCache(object): data = brotli_decompress(data) return data return None - -def exportToHTML(cache, outpath): - """ - Export the cache in html - """ - - # Checking that the directory exists and is writable - if not os.path.exists(outpath): - os.makedirs(outpath) - outpath = os.path.abspath(outpath) - - index = open(os.path.join(outpath,"index.html"), 'w') - index.write("") - index.close() - -def exportTol2t(cache): - """ - Export the cache in CSV log2timeline compliant format - """ - - output = [] - output.append(["date", - "time", - "timezone", - "MACB", - "source", - "sourcetype", - "type", - "user", - "host", - "short", - "desc", - "version", - "filename", - "inode", - "notes", - "format", - "extra"]) - - for entry in cache: - date = entry.creationTime.date().strftime("%m/%d/%Y") - time = entry.creationTime.time() - # TODO get timezone - timezone = 0 - short = entry.keyToStr() - descr = "Hash: 0x%08x" % entry.hash - descr += " Usage Counter: %d" % entry.usageCounter - if entry.httpHeader != None: - if 'content-type' in entry.httpHeader.headers: - descr += " MIME: %s" % entry.httpHeader.headers['content-type'] - - output.append([date, - time, - timezone, - "MACB", - "WEBCACHE", - "Chrome Cache", - "Cache Entry", - "-", - "-", - short, - descr, - "2", - "-", - "-", - "-", - "-", - "-", - ]) - - csvOutput.csvOutput(output) diff --git a/fanficfare/chromagnon/classicalOutput.py b/fanficfare/chromagnon/classicalOutput.py deleted file mode 100644 index c8310888..00000000 --- a/fanficfare/chromagnon/classicalOutput.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright (c) 2012, Jean-Rémy Bancel -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Chromagon Project nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -Classical Output Module -""" - -from __future__ import absolute_import -import sys - -def classicalOutput(queryResult, separator="\t"): - """ - Display the data separated by the specified separator - """ - - for line in queryResult: - for element in line: - sys.stdout.write(element) - sys.stdout.write(separator) - sys.stdout.write('\n') diff --git a/fanficfare/chromagnon/columnOutput.py b/fanficfare/chromagnon/columnOutput.py deleted file mode 100644 index 38d2f1e2..00000000 --- a/fanficfare/chromagnon/columnOutput.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright (c) 2012, Jean-Rémy Bancel -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Chromagon Project nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -Column Output Module -""" - -from __future__ import print_function -from six.moves import range -def columnOutput(queryResult, separator=' '): - """ - Display the data in columns - """ - if len(queryResult) == 0: - return - - # Finding width of columns - size = [max([len(str(line[i])) for line in queryResult]) - for i in range(len(queryResult[0]))] - # Generating format string (without last separator) - string = (''.join(["%%-%ds%s" % (x, separator) for x in size]))\ - [:-len(separator)] - for line in queryResult: - print(string % tuple(line)) diff --git a/fanficfare/chromagnon/csvOutput.py b/fanficfare/chromagnon/csvOutput.py deleted file mode 100644 index ecc622de..00000000 --- a/fanficfare/chromagnon/csvOutput.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright (c) 2012, Jean-Rémy Bancel -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Chromagon Project nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -CSV Output Module -""" - -from __future__ import absolute_import -import csv -import sys - -def csvOutput(queryResult, separator=',', quote='"'): - """ - Display the data according to csv format - """ - csvWriter = csv.writer(sys.stdout, delimiter=separator, quotechar=quote, - quoting=csv.QUOTE_MINIMAL) - for line in queryResult: - csvWriter.writerow(line) diff --git a/fanficfare/chromagnon/downloadParse.py b/fanficfare/chromagnon/downloadParse.py deleted file mode 100644 index a84859b9..00000000 --- a/fanficfare/chromagnon/downloadParse.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright (c) 2012, Jean-Rémy Bancel -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Chromagon Project nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -Parse the Chrome Download Table History File -Its a SQLite3 table -""" - -from __future__ import absolute_import -from __future__ import print_function -import datetime -import sqlite3 -import sys -import six - -def parse(filename, urlLength): - """ - filename: path to the history file - urlLength: maximum url length to display - """ - - # Connecting to the DB - try: - history = sqlite3.connect(filename) - except sqlite3.Error as error: - print("==> Error while opening the history file !") - print("==> Details :", error.message) - sys.exit("==> Exiting...") - - # Retrieving all useful data - result = history.execute("SELECT id, \ - full_path, \ - url, \ - start_time, \ - received_bytes, \ - total_bytes, \ - state \ - FROM downloads;") - - output = [] - for line in result: - output.append(DownloadEntry(line, urlLength)) - return output - -class DownloadEntry(object): - """Object to store download entries""" - COLUMN_STR = {'st': "startTime", - 'p': "path", - 'u': "url", - 'rb': "receivedBytes", - 'tb': "totalBytes", - 'pt': "percentReceived", - 's': "state"} - STATE_STR = ["In Progress", - "Complete", - "Cancelled", - "Removing", - "Interrupted"] - - def __init__(self, item, urlLength): - """Parse raw input""" - self.path = item[1] - if len(item[2]) > urlLength and urlLength > 0: - self.url = item[2][0:urlLength - 3] + "..." - else: - self.url = item[2] - self.startTime = datetime.datetime(1601, 1, 1) + \ - datetime.timedelta(microseconds=\ - item[3]) - self.receivedBytes = item[4] - self.totalBytes = item[5] - self.state = DownloadEntry.STATE_STR[item[6]] - if int(item[5]) == 0: - self.percentReceived = "0%" - else: - self.percentReceived = "%d%%" % \ - int(float(item[4])/float(item[5])*100) - - def columnToStr(self, column): - """Returns column content specified by argument""" - return six.text_type(self.__getattribute__(DownloadEntry.COLUMN_STR[column])) diff --git a/fanficfare/chromagnon/historyParse.py b/fanficfare/chromagnon/historyParse.py deleted file mode 100644 index 9f331452..00000000 --- a/fanficfare/chromagnon/historyParse.py +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright (c) 2012, Jean-Rémy Bancel -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Chromagon Project nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -Parse the Chrome History File -Its a SQLite3 file -""" - -from __future__ import absolute_import -from __future__ import print_function -import datetime -import re -import sqlite3 -import sys - -from . import cacheParse -import six - -def parse(filename, start, end, checkCache, cachePath, urlLength): - """ - filename: path to the history file - start: beginning of the time window - end: end of the time window - checkCache: check if each page in the history is in the cache - cachePath: path to cache directory - """ - - # Connecting to the DB - try: - history = sqlite3.connect(filename) - except sqlite3.Error as error: - print("==> Error while opening the history file !") - print("==> Details :", error.message) - sys.exit("==> Exiting...") - - reference = datetime.datetime(1601, 1, 1) - - # Retrieving all useful data - result = history.execute("SELECT visits.visit_time, \ - visits.from_visit, \ - visits.transition, \ - urls.url, \ - urls.title, \ - urls.visit_count, \ - urls.typed_count, \ - urls.last_visit_time \ - FROM urls,visits \ - WHERE urls.id=visits.url\ - AND visits.visit_time>%d\ - AND visits.visit_time<%d\ - ORDER BY visits.visit_time;"%\ - (int((start-reference).total_seconds()*1000000),\ - int((end-reference).total_seconds()*1000000)))\ - - # Parsing cache - cache = None - if checkCache: - cache = cacheParse.parse(cachePath) - - output = [] - for line in result: - output.append(HistoryEntry(line, cache, urlLength)) - return output - -class Transition(): - """Object representing transition between history pages""" - - CORE_STRING = ["Link",\ - "Typed",\ - "Auto Bookmark",\ - "Auto Subframe",\ - "Manual Subframe",\ - "Generated",\ - "Start Page",\ - "Form Submit",\ - "Reload",\ - "Keyword",\ - "Keywork Generated"] - QUALIFIER_STRING = [(0x01000000, "Forward or Back Button"), - (0x02000000, "Address Bar"), - (0x04000000, "Home Page"), - (0x10000000, "Beginning of Chain"), - (0x20000000, "End of Chain"), - (0x40000000, "Client Redirection"), - (0x80000000, "Server Redirection")] - - def __init__(self, transition): - """ - Parsing the transtion according to - content/common/page_transition_types.h - """ - self.core = transition & 0xFF - self.qualifier = transition & 0xFFFFFF00 - - def __str__(self): - string = Transition.CORE_STRING[self.core] - for mask, description in Transition.QUALIFIER_STRING: - if self.qualifier & mask != 0: - string += ", %s"%description - return string - -class HistoryEntry(object): - """Object to store database entries""" - COLUMN_STR = {'vt': "visitTime", - 'fv': "fromVisit", - 'tr': "transition", - 'u': "url", - 'tl': "title", - 'vc': "visitCount", - 'tc': "typedCount", - 'lv': "lastVisitTime", - 'cc': "inCache"} - - def __init__(self, item, cache, urlLength): - """Parse raw input""" - self.visitTime = datetime.datetime(1601, 1, 1) + \ - datetime.timedelta(microseconds=\ - item[0]) - self.fromVisit = item[1] - self.transition = Transition(item[2]) - if len(item[3]) > urlLength and urlLength > 0: - self.url = item[3][0:urlLength - 3] + "..." - else: - self.url = item[3] - self.title = item[4] - self.visitCount = item[5] - self.typedCount = item[6] - self.lastVisitTime = datetime.datetime(1601, 1, 1) + \ - datetime.timedelta(microseconds=\ - item[7]) - - # Searching in the cache if there is a copy of the page - # TODO use a hash table to search instead of heavy exhaustive search - self.inCache = False - if cache != None: - for item in cache: - if item.keyToStr() == self.url: - self.inCache = True - break - - def toStr(self): - return [six.text_type(self.visitTime),\ - six.text_type(self.fromVisit),\ - six.text_type(self.transition),\ - six.text_type(self.url),\ - six.text_type(self.title),\ - six.text_type(self.visitCount),\ - six.text_type(self.typedCount),\ - six.text_type(self.lastVisitTime)] - - def columnToStr(self, column): - """Returns column content specified by argument""" - return six.text_type(self.__getattribute__(HistoryEntry.COLUMN_STR[column])) diff --git a/fanficfare/chromagnon/jsonOutput.py b/fanficfare/chromagnon/jsonOutput.py deleted file mode 100644 index 95e0531d..00000000 --- a/fanficfare/chromagnon/jsonOutput.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright (c) 2012, Jean-Rémy Bancel -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Chromagon Project nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -JSON Output Module -""" - -from __future__ import absolute_import -from __future__ import print_function -import json - -def jsonOutput(queryResult, separator=''): - """ - Display the data separated in JSON - """ - - print(json.JSONEncoder().encode(queryResult)) diff --git a/fanficfare/chromagnon/visitedLinks.py b/fanficfare/chromagnon/visitedLinks.py deleted file mode 100644 index 6fd894b0..00000000 --- a/fanficfare/chromagnon/visitedLinks.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright (c) 2012, Jean-Rémy Bancel -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Chromagon Project nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL Jean-Rémy Bancel BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -Parse the Chrome Visited Links -Reverse engineered from - chrome/common/visitedlink_common.* - chrome/browser/visitedlink/visitedlink_* -""" - -from __future__ import absolute_import -import md5 -import struct -import sys -from six.moves import range - -VISITED_LINKS_MAGIC = 0x6b6e4c56; - -def isVisited(path, urls): - """ - Return the list of urls given in parameter with a boolean information - about its presence in the given visited links file - """ - output = [] - - f = open(path, 'rb') - - # Checking file type - magic = struct.unpack('I', f.read(4))[0] - if magic != VISITED_LINKS_MAGIC: - raise Exception("Invalid file") - - # Reading header values - version = struct.unpack('I', f.read(4))[0] - length = struct.unpack('I', f.read(4))[0] - usedItems = struct.unpack('I', f.read(4))[0] - - # Reading salt - salt = "" - for dummy in range(8): - salt += struct.unpack('c', f.read(1))[0] - - for url in urls: - fingerprint = md5.new() - fingerprint.update(salt) - fingerprint.update(url) - digest = fingerprint.hexdigest() - - # Inverting the result - # Why Chrome MD5 computation gives a reverse digest ? - fingerprint = 0 - for i in range(0, 16, 2): - fingerprint += int(digest[i:i+2], 16) << (i/2)*8 - key = fingerprint % length - - # The hash table uses open addressing - f.seek(key*8 + 24, 0) - while True: - finger = struct.unpack('q', f.read(8))[0] - if finger == 0: - output.append((url, False)) - break - if finger == fingerprint: - output.append((url, True)) - break - if f.tell() >= length*8 + 24: - f.seek(24) - if f.tell() == key*8 + 24: - output.append((url, False)) - break - f.close() - return output