From c0459faa4398e77211507d6a92b4696ef9555b0a Mon Sep 17 00:00:00 2001 From: sigizmund Date: Tue, 15 Dec 2009 15:23:48 +0000 Subject: [PATCH 001/482] added --- BeautifulSoup.py | 1711 ++++++++++++++++++++++++++++++++++++++++++++++ constants.py | 135 ++++ downaloder.py | 74 ++ ffa.py | 187 +++++ fictionalley.py | 75 ++ ficwad.py | 97 +++ output.py | 136 ++++ 7 files changed, 2415 insertions(+) create mode 100644 BeautifulSoup.py create mode 100644 constants.py create mode 100644 downaloder.py create mode 100644 ffa.py create mode 100644 fictionalley.py create mode 100644 ficwad.py create mode 100644 output.py diff --git a/BeautifulSoup.py b/BeautifulSoup.py new file mode 100644 index 00000000..458f08a1 --- /dev/null +++ b/BeautifulSoup.py @@ -0,0 +1,1711 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +v3.0.0 +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.0.0" +__date__ = "$Date: 2004/10/18 00:14:20 $" +__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson" +__license__ = "PSF" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import types +import re +import sgmllib +from htmlentitydefs import name2codepoint + +#This code makes Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +# First, the classes that represent markup elements. + +class PageElement: + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.contents.index(self) + if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent: + # We're replacing this element with one of its siblings. + index = self.parent.contents.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + self.parent.contents.remove(self) + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if (isinstance(newChild, basestring) + or isinstance(newChild, unicode)) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent != None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent == self: + index = self.find(newChild) + if index and index < position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before after Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + else: + # Build a SoupStrainer + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return __str__(self, None) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs == None: + attrs = [] + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isString(val): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + if '"' in val: + fmt = "%s='%s'" + # This can't happen naturally, but it can happen + # if you modify an attribute value and print it out. + if "'" in val: + val = val.replace("'", "&squot;") + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this + Tag matching the given criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findAllChildren = findAll + + #Utility methods + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.contents.append(tag) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + for i in range(0, len(self.contents)): + yield self.contents[i] + raise StopIteration + + def recursiveChildGenerator(self): + stack = [(self, 0)] + while stack: + tag, start = stack.pop() + if isinstance(tag, Tag): + for i in range(start, len(tag.contents)): + a = tag.contents[i] + yield a + if isinstance(a, Tag) and tag.contents: + if i < len(tag.contents) - 1: + stack.append((tag, i+1)) + stack.append((a, 0)) + break + raise StopIteration + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name=name + self.attrs=attrs.copy() + self.attrs.update(kwargs) + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if isList(markup) and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isString(markup): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst == True and type(matchAgainst) == types.BooleanType: + result = markup != None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isString(markup): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif isList(matchAgainst): + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isString(markup): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def isList(l): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is listlike.""" + return hasattr(l, '__iter__') \ + or (type(l) in (types.ListType, types.TupleType)) + +def isString(s): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is stringlike.""" + try: + return isinstance(s, unicode) or isintance(s, basestring) + except NameError: + return isinstance(s, str) + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif isList(portion): + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + XML_ENTITY_LIST = {} + for i in ["quot", "apos", "amp", "lt", "gt"]: + XML_ENTITY_LIST[i] = True + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
(No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed() + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def _feed(self, inDocumentEncoding=None): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + if markup: + if self.markupMassage: + if not isList(self.markupMassage): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.find('start_') == 0 or methodName.find('end_') == 0 \ + or methodName.find('do_') == 0: + return SGMLParser.__getattr__(self, methodName) + elif methodName.find('__') != 0: + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + # Tags with just one string-owning child get the child as a + # 'string' property, so that soup.tag.string is shorthand for + # soup.tag.contents[0] + if len(self.currentTag.contents) == 1 and \ + isinstance(self.currentTag.contents[0], NavigableString): + self.currentTag.string = self.currentTag.contents[0] + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = ''.join(self.currentData) + if not currentData.strip(): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

FooBar

should pop to 'p', not 'b'. +

FooBar

should pop to 'table', not 'p'. +

Foo

Bar

should pop to 'tr', not 'p'. +

FooBar

should pop to 'p', not 'b'. + +

    • *
    • * should pop to 'ul', not the first 'li'. +
  • ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers != None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers == None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s" % name + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = "xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities in [self.HTML_ENTITIES, + self.XML_ENTITIES]: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertEntities == self.HTML_ENTITIES or \ + (self.convertEntities == self.XML_ENTITIES and \ + self.XML_ENTITY_LIST.get(ref)): + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + if not data: + data = '&%s;' % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

    tag should implicitly close the previous

    tag. + +

    Para1

    Para2 + should be transformed into: +

    Para1

    Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

    tag should _not_ implicitly close the previous +
    tag. + + Alice said:
    Bob said:
    Blah + should NOT be transformed into: + Alice said:
    Bob said:
    Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
    , + but not close a tag in another table. + +
    BlahBlah + should be transformed into: +
    BlahBlah + but, + Blah
    Blah + should NOT be transformed into + Blah
    Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ['br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base']) + + QUOTE_TAGS = {'script': None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center'] + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)") + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if getattr(self, 'declaredHTMLEncoding') or \ + (self.originalEncoding == self.fromEncoding): + # This is our second pass through the document, or + # else an encoding was specified explicitly and it + # worked. Rewrite the meta tag. + newAttr = self.CHARSET_RE.sub\ + (lambda(match):match.group(1) + + "%SOUP-ENCODING%", value) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the new information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + tag = self.unknown_starttag("meta", attrs) + if tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big'] + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + + + + + + + + +
    +

    + FanFiction Downloader +

    + + +
    +
    + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
    + +
    + Ebook format   +
    + +
    + +
    + + + +
    + + + +
    +
    + +

    + Login and Password +

    +
    + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
    +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    +
    + + +
    + + +
    + +
    +
    + Few things to know, which will make your life substantially easier: +
      +
    1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
    2. +
    3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
    4. +
    5. Paste a URL of the first chapter of the fanfic, not the index page
    6. +
    7. Fics with a single chapter are not supported (you can just copy and paste it)
    8. +
    9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
    10. +
    11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
    12. +
    13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
    14. +
    15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
    16. +
    17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
    18. +
    + Otherwise, just have fun, and if you want to say thank you — use the email above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..4987804d --- /dev/null +++ b/index.html @@ -0,0 +1,189 @@ + + + + + Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com to epub and HTML to Stanza, Kindle, Nook, Sony Reader + + + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + + {{yourfile}} + + + {% if authorized %} +
    +
    +
    + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier. +
      +
    • + For fictionalley.org, please paste the URL of the story's chapter list in the box, such as + this. Or the story text URL for + fictionalley.org one-shots, such + as this. +
    • +
    • + For all other supported sites, please paste the URL of the first chapter in the box. For + one-shots, the first chapter is the whole story. +
    • +
    • + Alternatively, see your personal list of previously downloaded fanfics. +
    • +
    +
    +
    + {{ error_message }} +
    + +
    + +
    +
    Ebook format
    +
    + EPub + HTML + Plain Text +
    +
    + +

    Login and Password

    +
    + + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide + your credentials to download it, otherwise just leave it empty +
    +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    +
    + +
    + + {% else %} +
    +
    +

    + This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. +

    +

    Login using Google account

    +
    +
    + {% endif %} + +
    + Few things to know, which will make your life substantially easier: +
      +
    1. + First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
    2. +
    3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
    4. +
    5. + Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, and twilighted.net. + (fanficauthors.net withdrawn as they offer native ePub functionality now.) +
    6. +
    7. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
    8. +
    9. + Paste a URL of the first chapter of the fanfic, not the index page, except for fictionalley.org. +
    10. +
    11. + For fictionalley.org, you need to use the URL of the story's chapter list, such as + this. Or the story text URL for fictionalley.org + one-shots, such as this. +
    12. +
    13. + One-shots, fics with a single chapter, are now supported. +
    14. +
    15. + You can download fanfics and store them for 'later' by just downloading them and visiting recent + downloads section. +
    16. +
    17. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
    18. +
    19. + If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is + too large to save in the database and you need to download it straight away. +
    20. +
    21. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
    22. +
    23. + If you think that something that should work in fact doesn't, drop me a mail + to sigizmund@gmail.com, or, even better, write an email to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
    24. +
    + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + +
    + +
    + + + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..bbed2dff --- /dev/null +++ b/index.yaml @@ -0,0 +1,22 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
    " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "
    ", "
    " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + + + + + {{yourfile}} + + +

    +
    + Hi, {{ nickname }}! These fanfics you've downloaded previously. +
    +
    + +
    + {% for fic in fics %} +

    {{ fic.name }} by {{ fic.author }} ({{ fic.format }})
    {{ fic.url }}

    + {% endfor %} +
    + + + + + + + + + + diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 00000000..d5b4d399 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/__init__.pyc b/simplejson/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f01003d4f81d37513d0f8a2a5fb857b8448ae2bd GIT binary patch literal 12071 zcmeHNL37+jc5aXoC52lM??!ES`^Va5uF#&l87#d{Ux!3 zn-|3n?q3p7OB|dN$$7DJUN}^KM;t7P z_xSsL5nU6}&*=IaadGDz>Vo#>kD8f3w86!7@%u|+hsD0O&EIev42oGpIC}l95x%f< zrIyx+#l_FX@3E|_XH|W`RXp2m??ckj2g^eIdi&8s>HRu*9&Cq2oR{*^@Rna)x_EB5coSj#}_YN%Byvr%iNvp!DC;7EE8?)~QTmG#@}@@5f9 z6~#tUrBx&Y>YT*;?9r*L2+zFO@cy?gJgjIku=it zI6O$yKw_vWQQDVVC9RKys3XiN4U*(oP6A929~HHpV;JbA9?3{Cv$KQAFtd$ioXRhc z%Q2d-`?tGtSe1<^-3qfw4jm7%gz{J(#^re0_!dvG>H9Gky|5|@m6pkIM~(yC((!&8 zkK!;$OPQ;J^_GT82GMie3ig%mO7&c&EIY&4m5$SWUR##amIR5s*P>;nyd(&aI#(*H zat-xANW(0m4#PmlVLi9Z*vB|lP;7`Fy|K}1N&LHe7p5`Ev!ayKJ)`|5?KCaejG}6i zYj4*bWtrQRFWg~JxEs>L@7E|l%u>~rYyL-Fx!yT>+Tp(LZX2!JXx&EZ_J-WW@7E}& zRg%=LpPoE*o00MYo5q9tX1w+uiP)p=M&`_o*Y~R2y=ra!<}J7G!=?7?JGgs$P20Wi zX!oKWVi{OuduV?H`aS7N4ITCm)Un=tTvW=8`=ZUYGp)JzNi&a8kxk@wiAC>kJ*qdN zE;p^>Ol~%eL#+kpb%I85+Dcc=8GuNYyJ!st{ z8`R+21;w0HWLzV4KEj*5=_9?0)o==6&jfOlQ&B&Q%x(N&GdP9*(Wn zUq*IYzTb{SY6J(`r$~{gBQFZe&IXU>`#zb1j7QS#*Y*9rOZJ0S^Npw>48JN;gr-K) zu8UKi(D6oxT{oTt`>wUMTDt9o`g&0QZTyAZ@)zxyDZmy>Y$yB_iAQM-mn0mQ>nE-; z+j;<_oc=h=4mPLjG|KnZe!2c^x(_z8K#vfXoH>s*e+|(CPC={w2y-hpZEGKgf_kw5 zox10_)Xj{;cKG@|d^=x8d&oUiy-yyN{pvo(2+o9CLPho6daF(~oY~7=H1kQxT{=iU z>DU~}TDwIMYb75a=juUGWQA9#yzsJ){H1IY#!0i%m?)4F+iWmQl#PrKF|T41LD$iD z4Rgbqf+{ID=htPF=mhd|eQ&^Hzw1I*0} zKSO$}^@JhP6u_e~QbSX{Pn zImHbun2iFO;RRRaXyw!L0$N!E4QW|4!u$CA3qJbK_FN%{Tkvir!S~ADQoIrCiB`{sg2xOP$a6!=E7X^)aNnkh(@~yZy zEC1+2@p4(*e|k-vTox}eHl$ppv7N8}UHlFc-}Xou`VUagwsjdw4hAsn0VoymI*xdT zzm?#61{TtB84N}_8hHWR@nGN|7C4YzXE0dN6%3+a+Z@G-T1nyqMGg)2+5rn8hqpC? zfZ6~ch6oyB3^AD$HBlUvxJ%Z7TR|y6_SjB#L0mQp7&_~?TDhn!KM>;Ms%#-y9n0V`o9<)gl;VvG->!xMNIJT&8OrK31S zV#Cg2TAWjiamz+40qitgN!31*BF_~DFV(&(?1|v%1w{dqSBaYNaI{&*QShWDYBHo8 zP`#(KQ5olx6D;f=%%CzsZY1&L=P8CFqGoQeDCqbjBPUCd{(&A4&}6C(nUncYz3~Kf zs%VmFqf?^11hbTeKu(~|n(#F8*cFond2oc2ep3Z-g$;?Aaz(gL;9?SVG_VUddlDT zC1Qh_Wrmp)aRm1QrC3e-_2OtFCJH&hh`f2d0Jyx!q)FkY*)?_G!TB`8{K!R=hk`JsN_&yRZ~ z?rVJX-_cd~s&mD0owjpr;j+_m-pBuC=lbFWvE+obaMD);`~D3DfTHCr1Ka{=8{-G) zFTfAb-wu9&wG6oX4GIlT`wWE!1KvadFwDTde?$Rj%=GyHg%+g0HxK)^8QnFKE$BOM zKp$=c^x@gYB11pGH9$XrK0rT!K7PnSLj&|-On^SdeY2p?0=hrLVzyrabg<4>0G)*n zb3V=Da&urA#E{vOXZ!2KWi~0oF}H~|HLjhaA9BhZU*72mP19r zQt2=t%t?EMM&#qDHV8qBGT71omI$hO zJ7ORE3JXl4EUbtMBAGy7#Xc@KYnjGDW+r$f&zuo%%I2a#Kg_mSYS;u)WQ`D7xsGCO zhW-VQ3JQ`+&JbF}pMc?|D{Fx2jCfER8b3M3F~Gyp02_%<=CFw+g@4kP=<>IQ4XJz3o7aY3LxcX7!n3JEHD|%d%5jwB8HTX zEc__%c=bX-Ozg8} z(jz|C!@(7t?B`QTUZF>?Se+8yyFvHpouVEQtG|dZqv`w?KAI*Wu3u%c*7z#&$?U5Z z;qxdDzQhT5qGj+@ra-i8u`UoT4}yxX`%wrGH@r;hiKV_*U_?O7)#3*3@tNb zToTz8bOtp81#`q5CURyFTLr#`ss&qRCS$8W;!w3{tITX{l}YEHvsIV^+B($yTj4a> zeCv1r15|z9?^4=;PYQDdI_@(^yGhc_xLdeYvR*c?H*>e(A@;08wi%}3LgX;s(C_ky z?0aG6ukpKY=5|>Uxn9oJ{}8v!lIn*G*6p71x-)KbR1tZ z^&43~xy3qq%(yFxO?cO#37Xx8d7p|GoZz-70r3Kfq2ky+mZD@i0R=d2yGy?Of&v{s z%Z4q%GRZibgf<%Uj&qvbODkk)%YuUonw<&(2nDbNW3Ti^Bjxbuc<~wxe4ytKoKIzW zb!@;?=+y)t{+?e^XpE$B&OgH1;o{$F<>>b#t{c)QqhGI_)ldh)!C*f8yxF2D<%(TK z95fr1(KBBHZN%B}NOwc7)DIrIa(ab_6!m+9=3Ny|TbZOd$NE#7YjVhNLh2|~d``(2 z;}kidI5K+*>!61ZjfWsqRnbeW0C>ip|3YD&g8S+COF7<$Uc*!7Q@h6Y3+656+B|Nj zxQZvvh_L?;(VcL{6%4Nb0T-g}Iyao_j^Qbnk))mdJoMf}6MjbD?;|Aj`;2Y+eVhMB znMg;!4+o8F;##<_kZ~_;mDK<*o7*x3R>d^E{VRGF%eaSL37V2UP9XP4Nj;iqufa$j zNvlbN??bUIMMVdWXMnUth%ajb-P5 E0OpUxq5uE@ literal 0 HcmV?d00001 diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 00000000..23b5f4a6 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,2329 @@ +#include "Python.h" +#include "structmember.h" +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#ifndef Py_IS_FINITE +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *skipkeys; + int fast_encode; + int allow_nan; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {NULL} +}; + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +void init_speedups(void); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *const); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()); + return 1; + return 0; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t max_output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + max_output_size = 2 + (input_chars * MAX_EXPANSION); + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + Py_ssize_t new_output_size = output_size * 2; + /* This is an upper bound */ + if (new_output_size > max_output_size) { + new_output_size = max_output_size; + } + /* Make sure that the output size changed before resizing */ + if (new_output_size != output_size) { + output_size = new_output_size; + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (!S_CHAR(c)) { + /* If we have to escape something, scan the string for unicode */ + Py_ssize_t j; + for (j = i; j < input_chars; j++) { + c = (Py_UNICODE)(unsigned char)input_str[j]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + } + break; + } + } + + if (i == input_chars) { + /* Input is already ASCII */ + output_size = 2 + input_chars; + } + else { + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + } + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + output[0] = '"'; + + /* We know that everything up to i is ASCII already */ + chars = i + 1; + memcpy(&output[1], input_str, i); + + for (; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use the Python function simplejson.decoder.errmsg to raise a nice + looking ValueError exception */ + static PyObject *errmsg_fn = NULL; + PyObject *pymsg; + if (errmsg_fn == NULL) { + PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); + if (decoder == NULL) + return; + errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + Py_DECREF(decoder); + if (errmsg_fn == NULL) + return; + } + pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (pymsg) { + PyErr_SetObject(PyExc_ValueError, pymsg); + Py_DECREF(pymsg); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + if (c > 0x7f) { + has_unicode = 1; + } + if (has_unicode) { + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + chunk = PyUnicode_FromUnicode(&buf[end], next - end); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_DECREF(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } + else if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return 0; +} + +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = PyString_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += PyString_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromUnicode(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromString(numstr, NULL); + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_str(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_unicode(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } + else if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + /* PyString_AS_STRING is used on encoding */ + s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == Py_None) { + Py_DECREF(Py_None); + s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + } + else if (PyUnicode_Check(s->encoding)) { + PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); + Py_DECREF(s->encoding); + s->encoding = tmp; + } + if (s->encoding == NULL || !PyString_Check(s->encoding)) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->sort_keys = NULL; + s->skipkeys = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + + PyEncoderObject *s; + PyObject *allow_nan; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, + &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + return -1; + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->sort_keys); + Py_INCREF(s->skipkeys); + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + PyObject *rval; + Py_ssize_t indent_level; + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + rval = PyList_New(0); + if (rval == NULL) + return NULL; + if (encoder_listencode_obj(s, rval, obj, indent_level)) { + Py_DECREF(rval); + return NULL; + } + return rval; +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = PyString_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = PyString_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = PyString_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (i > 0) { + return PyString_FromString("Infinity"); + } + else if (i < 0) { + return PyString_FromString("-Infinity"); + } + else { + return PyString_FromString("NaN"); + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_list_append(PyObject *lst, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = PyList_Append(lst, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + PyObject *newobj; + int rv; + + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr == NULL) + return -1; + return _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + return encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + return encoder_listencode_dict(s, rval, obj, indent_level); + } + else { + PyObject *ident = NULL; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + return -1; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + return -1; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + return -1; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + return -1; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + return -1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + return -1; + } + Py_XDECREF(ident); + } + return rv; + } +} + +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term, rval is a PyList */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *key, *value; + Py_ssize_t pos; + int skipkeys; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = PyString_InternFromString("{"); + close_dict = PyString_InternFromString("}"); + empty_dict = PyString_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return PyList_Append(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (PyList_Append(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + /* TODO: C speedup not implemented for sort_keys */ + + pos = 0; + skipkeys = PyObject_IsTrue(s->skipkeys); + idx = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + PyObject *encoded; + + if (PyString_Check(key) || PyUnicode_Check(key)) { + Py_INCREF(key); + kstr = key; + } + else if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); + if (kstr == NULL) + goto bail; + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + if (kstr == NULL) + goto bail; + } + else if (key == Py_True || key == Py_False || key == Py_None) { + kstr = _encoded_const(key); + if (kstr == NULL) + goto bail; + } + else if (skipkeys) { + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_ValueError, "keys must be a string"); + goto bail; + } + + if (idx) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyList_Append(rval, encoded)) { + Py_DECREF(encoded); + goto bail; + } + Py_DECREF(encoded); + if (PyList_Append(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + idx += 1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term, rval is a PyList */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *s_fast = NULL; + Py_ssize_t num_items; + PyObject **seq_items; + Py_ssize_t i; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = PyString_InternFromString("["); + close_array = PyString_InternFromString("]"); + empty_array = PyString_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + if (s_fast == NULL) + return -1; + num_items = PySequence_Fast_GET_SIZE(s_fast); + if (num_items == 0) { + Py_DECREF(s_fast); + return PyList_Append(rval, empty_array); + } + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + seq_items = PySequence_Fast_ITEMS(s_fast); + if (PyList_Append(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + for (i = 0; i < num_items; i++) { + PyObject *obj = seq_items[i]; + if (i) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_array)) + goto bail; + Py_DECREF(s_fast); + return 0; + +bail: + Py_XDECREF(ident); + Py_DECREF(s_fast); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->sort_keys); + Py_VISIT(s->skipkeys); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->skipkeys); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +void +init_speedups(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); +} diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 00000000..b769ea48 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/decoder.pyc b/simplejson/decoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ae9b3591ee9c6400d5cd09eb0a05999ef680bdc GIT binary patch literal 11292 zcmcIq&2tmkcE2sjACi&37_iM}!weOf0h^grg(*VjJp*RO4@U6XFwFBak=p8(-B{|D zyIWwr$eTbG*{8C4o2;@-Ro*I9S){Vde~_xAD%-5H%x3cYo!i}#{XArtI09eY&vVZ` z=iKvk^}GKnmp=LJZ`T?s`IPbfbNogBXei|K(wGQ9BBIomM%g zsO}crSyW1_%%C!%?rxlw9p;pSt)|pl;4v>toH8a%Vx}{N$}}rnGRq2~eZplA;DT$P?H3UqU?swTMdF{uF3{A%N4Zo6Ljgj~FYVy)e2 zxoy{v?br)^GiaK>SbtJ|;5Gus4NKnl_*5(4wu(=oXxqDPEo#`l?}oa!L_p|>;?VQA z;|Vl}0Epz|(Km*_`<&WE0Xgwd=G0q59RM<#;7(q>%B$d(GWr-RC=>F<|0C)z@@hf% zT|U$|hqb<;j5x2n0)+sRz^%=kIsn|9>Km$$GN&e*c^sK2RAUq6?S~J)xHa@ad#H8m z762->rtLSq#v6BQIr7NLS5e?EYplYhPA{GUj9Rl5*l{BOldw-TgRY2tQ^u4rZQL^E zjRwzEz#k7Ld|F2Z^dz6;R)d&tag+0Ej+E8x%Bi3Pc7fP=<)4!pmtg+Tl6ef04SCh1 z;Bk{`M1d*+w#>$HxqZyN8$s9C)PguU?!L?y(l|jwzB@rA)N6UZ>j%lAmLHrj-6$>G zq7k&tqnglb!6+N0+=9{IG01F6hR|ytl^7%wnM=2d3=!+kmbMrmx%$r;+T(Zvcpu)w#cPpfUu%#VsTeE0br_&zScdU=tb0+O+ zwqv;=7)iKC@}UJxPn?0goo*NcuTM~mCQ+!|8KayZg}k3Z84>rJYq&nX7x!YKk~Mfo zt9CUa>XoJIKU=K)`s(K9=Kif)H#he`Yrb6UQi88SbD@;_rpWZB)R(1yS?c3b-;(+% zsW+uwlDaSTveZMV7o{FYybRsAdGm_~4??0+wTh_y9N6&&&Wlvx24V>=B_(KECBg!Q zdH^wzt2hrS+W+qyg-rwu(s@u+{bCB?f~ZjN6r#o%JqYVKq5le1X(r{!7;2OlVKOq_)Dg+L}Q~!zXDTobN zK?9~>5%&c+Ptby$Jvc=<1Csqy=`m6y_pTU5AHXMww6XVxvRiL5nYM6HRxkWdRRN+$ z@8Ys}7aG>Vf$vh2n2s90sky`LJ>5635^Tj)KDu zsJ0|4q_Et$8~T`Q#$EU$38fNToaEqg1TG(@=3pnGq(B=`6J|nFbO{ayct>mZ+y}aC z&$mh2lz${OTu~6hGC7g6H)Z--LIrq2hfii+1IJ_0yj^jR(wUo@_IFHZK=>*&dtwu^ z@$Pr{BpmXlIY<2~itqo8#rK%~G5QYA-qdTNF1WT0%*~Ds=Z^kHL^o)&>rvCgIf7eE zFU?c`j)GQzzO$-U#LUj28gs)3s}EP6KYmtQt=@m~V6FOmr&K z1z+ZDhe5X!$s8~8#5ZiX(Bs(0_vrr1&>Fok5iqfkkKCSakQAZY3B~xg5I^M6;rX#e zJ1Q&N_I?UTkO$D6P&|iMf#+AmPY_OOl(Z{=eHlh@0r?5fnKY?w>94j7GWD(LvU zf`JqC&lys8dJwn3>uA8N%WPOF2>M&B$I7GY#i#%V0z_SKGiK~6RC`}n{U;Yq&vQOGBg2ROmu*XPY} z`pP(hQ}36_oP~r=cd=E5v0k1IT^z!K)E*||-rs1*FZ|PRRe&{oo-uJ%#0BXsFd|8? z(ZTX5o>sd^;LyND(&Pl*hf&v_4EzLe1L>09y%cZF@&x-ocn^xnVG_SkQa=fzN$O`@ zd`5A_1{BC7K^(>MHW>s4U93jDDF;Cd)9I<#I2#}sEUCU<0mHVAfmBY-S4;! zF^nCg5RjednZDh2WyT#bYJ(YfV6IzUo6bN)8#5RQwq5}W0$nIjD(@RdR^Aw_tWcwi zZnv(pM(ZP5^jX+-EiSf}*hST{9tWK@x*kkISwy$OZqfQnx2)T!TMS(#e1x;)*8+G# zl5Nr$)pmklR~v~pqPuHvBI}R1Oza)40?llcDS6TORPwuSPmBf}q^^6y%ANL??%orj z+Fzj+?W4%6qA{AEfzB;M51+yJENUk8yc#pk7Z9H!Mm2M#T;7;L?~HK`T6zSn1$;2XoBhJrE{ zU{GwdGDRj*w1xs20}WMR(g^k;nVc$Dt)XC1g9L~P{pJwRs8_)p&_dJA0qhPKPRew?=PY1weHrc9v#MaEM7nz1GMM{ zI#pUX$kly$?A5j&Vb*&tHJt%{sTWy$%;Fms*&UZ#U7?s+RTfWJ{E~&mVsI&mkKDS* zdd88vEWSl?!ncrl9^S&0FtmIBfkt!>gYx;Eae)2 zA|KL(AfpE;T)ade+(ijr1WXfI~(x*y^F)w7aeNjt21Q&BLFgZH|5_TlFn} z`bj<_ZT~cweix~f?9(@^NLFV}seA~YBFbwqavxCucH?b_FDXuB*6YX;*XuV>Npr_+ zF+o$WXSwBi-LxZtfs}_AA~7F^Zqx~U#=4j&jKL(W3~!ikb`UBvGZS><$a5q&qi?Bfdh}-}lO{DA$yzLN^s0m-IUPq!zB6*w&i3CZ~ zAX~+ZPdZ|DBPPTGin<$iLKo!qd!PocSG_LOBA>KUh!R!jP;13IAWfREFe{QJAJS%v zOQs24?bwm&2N@i<*@WW?(jv>1sla+Yxw}9!*(jkdx2|UCy9RPKNF%g*X5gbpkH7;V zxhGjMGwQ;rj&KU323OYaudSJHf^hdnhJe-@;AztgK!#p92gIvgK`dL4{-Mqa^RCrT;95r4d< zR5!X&9JDh)j*FL0582P(B*v=ZE^RMSLO|M15XOS*P3!4>v(W;b%pFJ$F3{p3;&f~< zK|gr@c;aZ@WSAT#j#isM14l?Gv}E4PjAtT_k%{!IhZ_tz0nQmLSzUp8uVE92=5Yi8 zVL)>&6^@OU_+AGT4IvFUi%#rP9+{Vxo`tJ)*iez-Tj8!kzZWk`g%!TT9~Un0c|b_hT4S+2k+u;=sH8fo@9b0IDQJ%D`Bh< zK?38Un_zv;Vfw+k6NHnLXzl)!>iV;l>a%rOQLAmcF@pYftp+`xPEM&O0l4*okoZ#& z4M}Sb3qJ(nGg#UyUYCUULsK>B7z5~-Kz@)R34zFMHFb>7V_2IkUb3jOVBCMg5dkw3 zwHn`jYqcu~fg`?Ct4l@XTBjwq${PPQm?2>DfQD9#?4O|mlZUJ&U%6oZU{_I*hfYMY zZ)p<^#;kdD023>t2=uu92$O$m4k;^{)z=Lfr@^E zLcg%-EF1q}A(4zJ_ zD3Ow1@tl3o0Jt#FTir*LVN!MJU&gR6wBOXUy42y~XXqOQi89YZl#~R7j1L7a1WM+g zEXOVK?-%4WQBxz=CI5ZRkRxPFX4EB2{u-6ug#n+z#v~IVTz%nRM!XH?ArpbOZhXK` z$U?#@$y$ zJN9cRI|`HU23D-lpJzf>%;Ujj5?O_+Bu69@S*tMz3w2Aetkv3q(`~Uyajn%r@mfu?p*qik z)PjCmd-3%eemVAZ<^HPZ{5F2vwId>Kam&Feq;?WNBeN*P`Sd8G7e;eMW41CiA-^NE zc8M2fgswdQSv*b5XUf5E(w3OD#jgfM|Hss~Lt`~Ku&#sG$brph3_Xd7Wx)>>;RDL! zqmU$_5!~E_Uf-@-_nxe;ikbTcFQRLY*Q%@XVEEw4vz5n>g;Wn8uY9#`@kenMACP7E zX%NALOSS55qbk2~(yyg|%1Pu#BWQQLmQFdZKeO=AdjIR^)km`1y_Nfq)*r8|e{GTA znpbVszGBfrA?9?zwHhFZCDn$LdM7cYq~iYr|Dtigy{pCROHoL7(J_jbKm5y0> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/encoder.pyc b/simplejson/encoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e59d372a7ab88749c287a7a2a77dfad41ebd616b GIT binary patch literal 13938 zcmcgz%WoVu6UVGPCtt?yCBhuPSIZfHJC2EJw=^C=d zVfU!JMb#3`xaN zKxe3_sjlZ&Rp0mf9@YBQzmHU2|K^)Jb(MZjryf^SIHD|k&>9s%L0O-t?Mu>rKs^|IUi(4QUR%m9E3Z#| zjp-^Xee9RVq3m%$9*5PcQYCLt6}+;#Qt~Ru3^6l|%m_21$c!;Fj?6J;jw5q|nF(Z0 zsw)ph)aF}Ck1B7HW8Qf5ntCv%^oa7ul|H8Q7)!k=W*U~7{2EJo@^9+CKCZk|Lc|HB zC)9(J$~&!|VY4QccSbVkQc^AKj-FBvipqOQ=~EoSdzsZ=EA_>L(@LLF8Wk@q{fg4x zQy5jhsjS#LGY=BjivMeT=BqeFl}sVgQeInjV;v`vAFR~exbFL=!7v)EyKA~;T1D|G6g*tq zym)bLar3Rl;^Oq;@?v8#Sd12vNAVacs}E85X#3$W7VWv;dNdOcnGToOq0x907x28e z7<2kX^Dv$8VcI+_OUltJd}MK!^Hg@xc&W$?`$Rm$iAh2 z%uk(DaOx7i*YSacf#GCtmF18)Ll(Ezkd?^3 zw3kgeR~S3o?IWg$yLEs)r#TysQI*Pt{9L&y*IBcK#OyHG3I`d^C@U!_zbFTbO)sg( zWfh)K77w8>J%k;l0!tB}QidohbWi$~KhWU`9M+UqBwKK{_!^RoEdu-6!u30M=E9)4 z^+GHXbEtOGn7brP;S;~YEPsp)i4;b~aP}fExl1SEa``SY7(|flrfqyWZcBA@g!iCOODgBW zJ`VR)WJ=cjU5L~f%+T6O;rsyTzlqwIt5c;ntP@swJRy8{X^%SV_~Ds+h;O`zL_@(r zvXFvxpq{}-qUfRfb$|8`qBbc1P>W1SJu6e|gZZAJBUF38WT4v7azH)9I77-YycWZ1 zP&YRd9mM`}QyU4pvYI5V%NH&X$Ige*%2hiBGrJK+8610V;u4eFb>O#-&l_M~Xt!Em zwb)KpHCh8MH=y|JaQU&Wqtp#NJK1XK*k0bUJ>78I&E&G_`R3fg&;GJE1^l-&5~vB% z&$gjFd$hNu<(uyv{Om8w#`OGpl6RNPc1m_unvQvp%(>^$+4SRN)=t{c3F#o)wi`w6 zRyJft=L?L9EuXd9flod+!)&LjF&p&7>GT7T*$jh~S-a5;UF^fQ9kkb%b@YE9^Ip`> z7M%{VZG&67%3hq#TW&M%8UW*N+l`*G7ax<*k*(7Fa_!A#r|d<>6t=q;+L|H-USIVg z3L(szdbkq!UurvqU__Zb%-V5#x!!bRBN<{m(7d!=8E0wfd@3W=nWZIrUBeFY>)M(z0X~hZcz?g`sSiE56(GzjPteGLn%$5#Oby5M%ibyA{SF^RtGg4If+UCw9CV zwws<^cd-Q&;<(+?kgU10vBTbJZXG7NPerYf(X41A~2rVt6M^COMB^|rAxp90qn+SA<-0GS%n?8 zlU7?6veI@VH%PPwtB3Y7227%S_6+n5!-AcBI7I7QR|a48RdlZ>Fs^MK*Ikm(L;rpf zIy;%sumOvl^JXKOIeQmUU)A-sTHTN8?WP;`EG4iB(ebQJ^v4hNQ@tH0;d-j_yhB!F zW|UE}Zn8%{cBC6R!tp`Dd2PCUowH2Bd?>g$PesbX5AYlUoCgu29mVu2s~f964rwD5 z!!~5g4eYy5bp&kQ5CMaD@sRkrOFW_M&Vo72@d{;j0_QTe#p&qX4hVL*Q426;_n`*% zygNT@-!b%TnXQ=19>Mdn>U>&b5O}s5%bYm2dxW}Wz3M*ou_MTnS1C7k+|qR)4w69( zut2NngK1{-!Ilj()gg2^4epGv%z7AO1ox@CP>=jpV$+fKGE>ALrZM*-2%p)Fp8bHV zW$fYFpSjJpK__ymA>VA5XuSR63mc}E$#2g1fdFF4{c!%tG&nIg^0CwVcPPdXAQyHH z#`&qy1lScEXQSN|u8H&t3@17B1Q7yJ6o%vT)rjDmP0S#jGsb}DfNamrgdBS|_{Oz8 zcWW*Q=kUY5^7pOD4?J!yXmD{h!CatMa3W6u`Vin14Nwt;lG(&a$*IsL=wY@zRVC}1 z=0Uz}8$6vPTXQWiJ(n#9Nb2)J(&?M@nZYjC$ooQ&lrWZZ(d?mkda(X=l-PmZX!-%I z%j|qbU1JSYO~N3u1u)%uBL}@KpK_q~0Du}B<^tEjxJJ`q8(q6fOswUVvplrU<4iui z*mA<{IHve6=&&{wlv{!U;kVfupK2$Hk_Ey-=C^F=3aa0-f+>G#S(+7CE zzzP5a7^a1A!gH6IY9({pjdmc63W8uJfHLw=vx#FOc%?9%(qoT*o@WlwqzT+VCw`%wFUZfNwz(8gUhKS#gK$?t%2KJ%tZCcd3ik z`6}HxaZxj!D?Vtq0!v2Gq*oUubW3wQmM6l7Adk4SMns&G9Gu;IxilF;Q4ga^s41mW zsL57R6<43Ga;%#Tht8_=5|fviyuyS5PUlr5;(+I(5Ie82g2ADq4GRc;wHno5tp-Oo z4G)|zTb;iVYtkCQU!4XdzeRknvn^MEXq1WBATk5{Z3;iXgB(M)Y!|a-K`Ir6Da|wP{R_k#U&72DfE$O=#z)a62}Q$`&BrKK_y&^+Jz^-jVHKG5)fU; zTEVFj(F1sVZCQwSk_x0l3P6Y;Cj!Q;adaGFDJLnKxNuSVBWY0@{rxj6$?3TiFbKa- z!_#8O?*@8Z8jOkrOGikuO2BsHUV`9DWH1YHj@tAgj?id~yjpLse;ijP=ZRz-RYMQgAi$W0K1Xjgw-SRc!Hrj>nX!tA#)qR#G zd^R{$ev`=AB`RH-XTPeTwiD)oA+=AOI!sz!_E!u!hp3VDtvIZ&>E!%IgdSBHZ00aY z5)L7k@GF-!bI1@(1Y)rr*$tCi#Slr%4HLEtO$bnOnKv&cfQX-wH%gf)6X~GoR0|`2 zCA)xie#i#+9^u8`ASW@x5&aYLa)+uQ2PdpLv4iJID3^5-HLwEwBTIwjVbXw8gP#aH zf(L=sQVi}!@w6!ho~OkS5EKugSOQ?&3G7Mh47d>YTqQA;WP?&WNszk_+eJ#|<(dT{ z0I|w4w?8hUDh~q0+@ZU$xPiQ|tyd7>mtgTzy4K*SG(IH?H23l^+eUp7&^kW2!;KcU z7OWBjR3NQJUKL1#Vb~CJwgyL)d2UpKxaxOsMkWN1ME{H_!Zt!?Kl6Pt+`yW_~2_FXbZEOm0scYc6J*|c>xT7%=|{FwDuSxUsMb@jyK(Q&#% zA^kgi<2N{NrD9bI{n$aK61Ue{vF5PWT9xC#P66jU*#>GY)TE`sC8Yd$%fTx z>!H_f@{y~n)x5A?tC@}9mI$B7O@MPAk4A@bTCtX#cbFqN9X^rVQQYJ^G!$>)8&ie% zzua$`zp{1mQhB0$rq0(s_+>i+Sp2U@cJZ&Ec8hAaq;`kYZok?cP`iU_x2$$6YPV1A zj;P&HwR=qM9#^|3)b51ZJ*jpl)$Wwq#Vag$)x}c3tJLqI3#zQH(a<*^4YQa5F~x8; z=6Khz-D94jo5V3O@BctvPTSaX#=*NuMH%Om+B*d-p^(mtnM#50$Myr)KxRsU309vrkvHJ6IyDolA6_`4>9eD-f7c@VtHpxu;3obia3h2-Ep;t z7q0Twdswlt_@QlZiSjmvqFgvgi$`Kr2-uid3dOq#L!26*hS)t!^#nSL zA=WS%Xm8c&`LXXB5~>1%6x{5QEbL zsvoV4QaXX!&fg?VGMHtBKDa3+H)gRlTU4O1`tU2TMqbNd?KYU;P*@wz*qg8hArZSP zg|*=v*6>Q>eprJ{bzlu_56jKqzzZ;Ln3X8yk0m1>18mI?XAGRmn zxD5UR%QR)^g)(9RND;6>m*9^uBsv&KqiG8d3B^06ctVhl#qfPl+>l}}9H7Z!00@fj z=93hNyD3<4s>!aY@Kze&J_JHP`4<(u1nU>jftR}ky`WsCypb$Y2nZKUpAgHrAG4MS zHNYw%99ilrE(}XHi^V%%tOwq+3Nu*9r<9h3(!4ZXElUOZOj|%CSH)7|8b?Sc=w1}3 z!J@jRNgJ8%Wi__117&$~1V{FF`7dO)x}cl`<#-Tb*D| zq-mejQKa@uod}$WV@J3K(XN(W3jV*!)(uSjCzLG)AQ<*KR0O#(OJ%Q#&NWkyAav9A!X!83`|n^7BP$#5&1twW-nStC7(@AFvmX$9bO# zjdSM|61kNy!DxxuJf8)={q$wa3=3f7qjO*61g@A^*Hv07@jWE{)-WPu6L4RK7;)rp mVGM#ZW|bfaoxh>Ni0Ut#Ee>1#g>t!kdZaM<*Njmf_V?dj3=-%7 literal 0 HcmV?d00001 diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 00000000..adbc6ec9 --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/scanner.pyc b/simplejson/scanner.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30d94445f0a0c941ee46b6c4fa3bd255e662f6ef GIT binary patch literal 2340 zcmb_dUvC>l5T8B&cj6S1v}wr$X-SKd%5~uhP(>h$k&IGTvRgn&D(iB$PA@s%o$fXz z5y=AtVOzZO)Mvf`?|cM43}2x9X3kDsMZDG4&dtuw@3*rvb9;aNTPd%;dewAj{43%4 z6-F|IaEW#x6}c82DcVtVx+v2O9a-dOXeUeR``{L3b&d|p6jnX8C{7O5ZHEFAz? zAg#zNlA9ByB(hAKY@MOa3jk)x&C{>gutGXZ5r}n#b~3zmr&{2M79hUuJZY_%@JI(M ziDL(Wj?3O_{909oRWl3Gw~uspyx7K^k~N5GZKJyJ#ly4RPimh(-*ea3)~b6C_T2kx z8`WLic)nY^|9nHH4ioX1!N_1FzeAi6c|@aBQ8X%x#iCJ>OoqHHjk03N(I_WMo<=26 z3N*4rDbh%ZQle2_lroJnG|16F(ZHfXi3VjFWN83no(4pN0u74Po8grIhTRJ^EFc*c z;%PZ7ix_-l&P?(LET?l!e5UBuxkXYKLsNw@ihfaPVa_aOJ+vrXCN-4f0ET2Qq42{D zU1X^fye7qd8Sz_%UvW&&em#p)*I|i5OL5o+BD{!IPQGzefF z6A7CA^0ai@Er@-d6dG`B1h^A~DXjQEu+jvEl1#%sOJU`!uo>QM_7a9@5dw^|o5F#m zm@p#hbC9APku$HIr9^j}vIAKFOyA@i3eWG3{ zIxkRwH)e>o$Wl5#EAS>B(n%yi{GG-=Cow+6Js|1g*nk3Pu^1&KZ#*i zWPmpauP=+>*!RU|WNPALEz=&74Hp(Y+fOta8&f7~dHkB9=1}c)nG}o)>uL zR9tHw-){+v+GIg47gH8jSD!TEIE+mN(~b$FrqQu&yfBbpT4A6?dCF07DBnmZd1wc5 zcpN1Xg$~@R?9cYZ#9nY9cF#SLkOF;ToELU1A@vPkZeC#YfsTc|7u!zCa}voj)=8Cb zBLVBc30-F7Lqv9*=q|v9*V9?g4{c*6TRYQBb{yNM<4Y0|bc5smJ~m}+xPPb}(|r+! zM`rGl%L#+T*r4ZICZ$guC0}yOcEiBYQ|sw@tMI2}1ET&c(Q#5g@?y{T!9(P#W zX0gKJ9*esW>9>Z5Q%*a@zrvmZWcT(e3tKW%O|TMg;kttU^v-rjDngP~o6NYe`? n)!s$2k|Nk1^+WgA*I)9HlN%6uHH!vYlm;tVdFUTlrBD9@kb}!4 literal 0 HcmV?d00001 diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 00000000..17c97963 --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,23 @@ +import unittest +import doctest + +def additional_tests(): + import simplejson + import simplejson.encoder + import simplejson.decoder + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + +def main(): + suite = additional_tests() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 00000000..af6463d6 --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 00000000..1cd701d4 --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,22 @@ +import decimal +from unittest import TestCase + +import simplejson as json + +class TestDecode(TestCase): + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assert_(isinstance(rval, decimal.Decimal)) + self.assertEquals(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assert_(isinstance(rval, float)) + self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"}) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 00000000..139e42bf --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEquals( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 00000000..4de37cf4 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,21 @@ +from unittest import TestCase +from cStringIO import StringIO + +import simplejson as json + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEquals(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEquals(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEquals(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEquals(json.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 00000000..7128495f --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 00000000..002eea08 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# Fri Dec 30 18:57:26 2005 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '["Illegal backslash escape: \\\'"]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 00000000..1a2b98a2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,15 @@ +import math +from unittest import TestCase + +import simplejson as json + +class TestFloat(TestCase): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: + self.assertEquals(float(json.dumps(num)), num) + self.assertEquals(json.loads(json.dumps(num)), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEquals(json.dumps(num), str(num)) + self.assertEquals(int(json.dumps(num)), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 00000000..66e19b9e --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,41 @@ +from unittest import TestCase + +import simplejson as json +import textwrap + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 00000000..c3d6302d --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E666, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ], + "compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066 + + +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) + try: + json.dumps(res, allow_nan=False) + except ValueError: + pass + else: + self.fail("23456789012E666 should be out of range") diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 00000000..de4ee00b --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 00000000..f591aba9 --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 00000000..97422a66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(x) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 00000000..b08dec71 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,111 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson as json +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, "xx\xff") diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 00000000..8fa0dac6 --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 00000000..6f4384a5 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,64 @@ +from unittest import TestCase + +import simplejson as json + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEquals(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEquals(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEquals(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEquals(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEquals(j, u'"%s"' % (u,)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEquals(j, u'["%s"]' % (u,)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') + self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEquals(json.loads('"' + u + '"'), u) + self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u%04x"' % (i,) + self.assertEquals(json.loads(s), u) + + def test_default_encoding(self): + self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEquals(type(json.loads(u'""')), unicode) + self.assertEquals(type(json.loads(u'"a"')), unicode) + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 00000000..90443317 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,37 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/static/ajax-loader.gif b/static/ajax-loader.gif new file mode 100644 index 0000000000000000000000000000000000000000..f16ebf7cbd4f28620c0daba2f4a36ae0196b3d4c GIT binary patch literal 10819 zcmb`NXHZjX->;L91QJksO9Fy4X^NnNiVC_BuprxlbVKhX^w84?z4u7CGf5evn_!Tr3?d(NECJ2Pu0AJ)uTvu54bx_-a^t*&`j>8i;TfD`Z)060EAmb_Eg z)wf#RL@#+W)ka%x?pW*}*_fIC+&j2FF}pLjxVO0SWp(}A+xH6{y(=4A-?w-72Szu) zd^`O1{b+b%YyaTK$DxhUsqNjpgDLv%hfiy@)VYJh9~E^o`Bf9b$IM!4PoLaT)mD=~ zFUJ4`006?j5qF#|Ok6F@g*p)Y6Z7*nj+PjJ@F5rmKRHY0xC_AA8$W@FCrO7u?zADA-VZ`x?tC|{EeZ$+PS}L}+>0Lu3YulS{ zT4!^6L+^W9RZs87mhra9j_Iz!%2vK2CI%TW6b5HzImN~dVJ!O2!OQxY?dQ)g-}ZOc zHbx@}1$YJ+?Kz}{t&;P~OkgsY^BS?Vr*pM7Zz+VGy zGfn8J#m3_yoX+b#a zDpjI8oCMB@qx33`Xw_VU0cm=BragGsS+$bevLq%hnO&KCcBH=3mDfvuPp-Rdj{6&R zHa8Kz$xJs|?B}2IQQUsp?)jr!!0=#iu`;qPU^}gDYqibg#q;S@SNLF|EZL!#x_2db)8GULs#w(5+~toZv=1YdVas3W&uFU4AzBgx@vEBz#;l z-pu9tZ)?LyrFGoG)7w(&W-(&|Hk`4;RUNspd=I*5q2ess%;1CmgIS<4v~VcVWaPQH zANrnVeu}B%aK0|an0w{9^-)|O9DS)D9?uwbcai9O^ScC@pw(ezOGyY?wF-n!Sm#Ke zZy3)y(6~018z!}0`0yn?_&rY+<>YT(f~^#{5m)wlacRx^dP!x6)JAFbi0ww11o>n& z{9EsPXXod0%vV-{ohI1IX%fgQW4U^o;-lR1D2o$i=iuqZ>VxzPB9Jhr87P6lwqYLL zzAmxh0lsOW7;-|8F98?s9psNE+IvPo(qi06Ws!-wc9~e{KMO$+^=Bn?AdpZ9E6B?F z1A=K_u-m%NuW@h!26eF-W1SeBa_##tKQP?qva-5}9POT49d`BGnqS=b_~meVq;S`! zgb_=kIvvv?GI&6n9GF3ujY-D@NP@D-pFf91Cy^u1oI&?!IJb}(xK)nDnXbl&f}s>; z;#;8si@QnEHk&J3gSNcYVjbRBWRIc( zvKzVb5N~!MFj(-4jL_9=*-X>+JQKFctTLloj)kA> z9G&XPDujY_f!g_$c1eN~62X^~Q5s<^MdWux&z|6ZtNq_x%Nv)pIs% z<@aqc7|CWG#B)lO27spFDxV{ zdYr!DyZ`O|uSUbIfsI>M$}+#ezuJX`W_;eY)V^^4y_63+2J*_W0U% zepJ^$z`V=Il-pRMH-O)ERX<>)`;978*j-52{u7^zX^6D`^EA)}O5Ge(0N%+LbVWZ3 z7{O(%Xc&Ih8OP#|MC*!1{kR+_8YV)k6N#2`rwV(Ym*O{JWIvY#0x~KwnK+*jILrpH zgsEGa3u@KiQ+VRWO+tOg#nRJmpN$nKR4-XirTUoGoH4@A*S|=2kmW7Na^0?<&XIR7 zOh~8kdrdpif%1ohd7+!_eVX}T-u z5(Z#qmf?p!n&=XXnBierie$(vu4-qu#{&x`ScFc29Zax*vO;;4wG^3Ly>pi2dQBUe zbn0P&ztrG*zPEG!kCk45uPjJ2c}{EBa&mFKYwyEbh)Y9wN5z&wFSW8F5PKN_dY3b9 z;`7C$R?X?Lu(C_}IaUiLI}_}`M=#WHpaA=gENGdp>e$%g{UTdV>#4Ev@4p1+=Eh|Y zro1mdCCr%)mL1?cSM=v5Lq9Hsj%wJ?9j;^ro=bC<80f*9MQpkSM;v zL4@!y?`T^T#xa7`-zO=YObN=sCp&rsK!P*uQ*+WYZ3q>q-g%jJaSkZK)98~=ryUBy z2k%0_Pmlq!a)q!m4GqDWdi$e#2Zx7ykyeufGs7^bb-%U6(h%L%cXMWRWg-Z<_=RaD z#O82mXO+I86OmbWX%C*m%p0DiNZOz0kJ1LV0C>PqrE_SsDg%>{>IpYgh~2Rqg2z5A zl6$I}Fc>=JX*Y)`iIH17<*mx=<%c2iX8DHQdzM=1%E1)FU8;FN(#6;jGWN^Mz6`Zg z&<)Aqf{`qahF9kBr`Ecq!X-Ie#WAW3V6YsIbQW-t+~{$LdQAb4@DSW*OFC&20rFy& z2lu6B?cSk=GAiD6KiHjZ_Y_mQlBneTpfipWpo2P4u;}B|Sh(zvl(O1bEWwtawu#vs zDoxa*pm3GTgXO|jcs36RP#Groh?);nb#dXA(O`Z1h~(A`%ekL<(?k`t@zKbxR*vAa ze{vyYUvs+mw~hW<#_t);DWKTB-sc_>zRKTrezuR!80~y^%k=lqqYE7S#8-7r8!^k* zu@c*hu!cZ3ZM2|$xRF&72sAaU>RIVf`2g2N9%~6ACNs@m=ChA#32CY?oRJsOQZWma zc%1tn;!=!N3wm^FY!rGnMkz1+0{;Ffh|Fz;WC!gr9iD1 zMbiLDHYx$kZmpCK5pN38^Ko=!}~{zBHD+P|(rL2v)lL1JiZcvF1V8IYn36ul$2* zPXmQA!6NM!`ns)A&3bQWMdGq~84qeH)gynuzVDvLTL$P*=d`W^bhACum14kqbeDh~ zp_|ifzbq4R#1?r$S+!ur`v(`>1hn)7e!bfAk@|=bIq{*e2U%I=tP_;ClNTT5apGYplxKjaubZEPcYqHo($AKNbHXsNM!Sbn zpuPlK5683&f*%7Glan49m06IMkdz#S^YTh8!&k>)Q5LLLGDaR42)y$IEGJ~K4n6_P zAQODB*FVbU)6jIUwO{nq^e7bh(SKoaEM{_GXoP8VZhQJOYKy0duT_T$K{jZrJ6 zfw0{Fa!$S6Q1uq;)Ke8XJcRIsl`4m)RxJ=gjG{p(&pB06gdp>A&;ucY1u;hA@7ub& z=)8wX@>Vws`(G7+)8uU?3}IYiq!ez$5SjI=<^Du*7r}U{i|A0&xq$EHhYH0#xytqD zzomRr?TY89xoC~YI5bFd*^bjJG7If(LTG|fBTfvJ%7Y=zzH|H1k()r@ou!h> z^c2FBs_5;*M>-Ul@f1okzm#+hBjTc!zu%o>fp8XrgZ zb5XdTUO|K~2l0yP-o+uH3!TxU&-#3XzQP71v?a&Cg6fBP7-@|Vyo7}s6H2Sq;U2fEjE zkDvm0ep{%LSKY)_pu9H|@MIe5iAgYjPT~W35#u0~vye{i^DsWa#1<&+X<|=^;C5lQ zvC7R3vpc9)xY6LLX${jrkI!8K%G_r?vWb#kb?{IwdLZh)@Jt)k!<`T;XvQ36!|x(V zi?#Ip6kOet_Q75*--oAbZC)NW6`Yh@Sd?4ao}73o;S*{qpj zdE1rZ@Zr1()!az|g|P~%@v5CXO^{^wIxodG^0?|%->BW?r@++ABGReRtzteY-tnA{ zqd+k@7@MV1m^)G@97TuKCtcQDW)hApI2*O&|3fM@EF2k?v=R7oyFjHyjE2ZICx8BC z?*5$vD%_%g2AuT}riLuc< z#TDg;X0vJ@E_Km@`d`2|0+puvSj8QxKo3Mhz5?&)bGr2ku-sVsc$fL%W%-zkrwSl< zHeNm$8jpY2H@p-tP+oyKvQkx(#ybswbIwySsZ2k~MU_I6Ni5HdOqI@bAp~`ma%jS* ztH6w0g7yo-tx|--@k4~tnBoOvqrN16SW zgW4D&i&oMCH@wLn4sL>_qU_#o5H|HKzTHwb0SPzB<=lHsSRr_%;GihDqkEyKoD` zNyxD^)TZOfkwRjILv>ZLWU$1oV|{xSc@Yvw_(_QQ_@3CkI0;TezVVX)6oq#;Ew$m0 z=wiI2DSrUtSfM4bC4hdtRR%JpNSTapCqPq(L~#UJK=Y{Zsyo(kuYdH+TQb>O(< zlqymlin{i7`BlJ$PUA-{ov{8{`3uIqUq8R&tlRl^UihlR5|EJ3kg^z+A^@#CYa! zQj!Ntzarvlw}!R_4x<+Q!1HOj#=QEugnA$Yn--geg{2oX17fHD`T@uwL=hO5vemwv zJg>L%GEK5UEeB^VZYHSm&{;AO{9#w|p_Nwa8%ufn0T?e{l0KsD{m8u^$PoMRFuGvg z6TWU7NL!IC%8XH$4Ek7h#N%{^M-Nyur|B`4J52DfD&f4QR9u=b&sS>JR<6HkIi!Kz zi2hR4$qW71Y*l)k+c|f%QPjA1?@azVYkdItQJ>N@S!yDA0?M~)vk=tW7X*wJ+SWaP zFHTE(KDs-)c(nXhvs!U|P<_IALv`qAO7q3$wW8oMO_*o|du}T5T|YvpA{SWFxZTmY z?Jdd7pn=ANeD{7x5FPYjGnwIU(x!T_g81S%{qLIA8R*Bsg3v1$(!^3Qt&p!?N{gm}IRq=E{aKwWGg%&?lZQ5u8=5M0ZLcR} zml9LnJ$$VI$(s4)xf(Xi(pkxBJ^acK&NECv1`US2+@K z!jbFm1H!$Ugs;zF`-Gab#;#&WcaE2pH}SUSP_j411qlldfsuT?ph3a0;kYPrG^<-! zLVyoLN@{$P&*?08+pvf{EB~-!R7O$^-aUfi?Gj?Ij{6TvTu7|te^AoX!-VXP3x?2` zMn?OyLt)*cGvh_E(=#Leb45doOP@k*yW#8ojc!A`i&NQ!$hFVV)IruEVXA~|s)~#K zW|#=0))OED2eeX%PM9Du;|2fYD2#s`MN-vFJYgeoQSM1` z-njF^horN>V0+ImOX?}2_jzu>o{HU1d|g z@gsXTU!(Y5E4HPv-`7phyJOMpBzEqK!^h9ymf{MNLhbC$zx>ZKhm!3~hyhk0k1s}S zdg)~NTVGDrh^g{`rI4mx_sJi!U+o&iL^CVXD&MSOyl-77Z<%x&8f7-PerpP2JlH7v zbpJ+_vn!-^>vm?^^d=kx71ajXz_P@Xyh3Ef0mmd ze3o=dp2bKK#@)s`Qi16&r8%&GD^>`!)PkD}i6r12dWu}LI_n@2;uKNu^1b9`B*AwFL-4nvGGQJ0!6bA#Xq zK^qYUHL0p@cdyhD^vOm_6_EE6p)J0v2^*=DWBG(d11~rQlYT@~FKq~z>q?1KiGkWT z_EXg5!b&dpAX{3*HL7Z5ScC`w#)IyPI1W@)RuEZybk(ZiqV{qoCmM@y4=Z|*?jw6& zXAEEdkgeK=haXram+vgt!jDlU<%oXf= zjd?VxL3|vAs)Qk0@Or6x9Y<{?h(R0+fNcFrwGQF#ku92mTo50adK*xj5Z3TZKq)EY zK2^yuC{jv>?nzt0%}0NftyG?2zoUE5>$rA9$|puKgZgf<2YLVov3$A0VX#>KWBVVD zJazs1{h##nha(mLO+T4zpnt7S2>`XUNnYY0<9(Xq9~MKjgeF7<$9SOvtb;r}VNMCor~N%bb0QN13qnG$C3!*FF;$er z0!O5}1rS1^rm}!Y!W|U4;qUYmtTUqT6 zaah`#bPO5(0u3$PU+F*kzTB*uDjGgpi`^Le=7KK317RoatDME=Z9y39{{aeDGM(5F zAqYGU7qxK~xpx8-v|;e)#Sck*oTWWubg>?@YgQ?nhkGIq(`9`-SnfS;)y_O~Az~5s zY*|a0H7LhiTWEz=pva8sPY%St2SQ~`bwmG@C!F)Nbo@#xs201T)ulbUZP4W8meIzX zR8F>obJYG~=6hrcL2z)6d{AtWhcJmMjBF?wEcQAd3f>LV16tP;71XtSyej&J$4(Mf zc=l}w%dQ+f{( zPSwy0?^F8H6Ee5eE=*xxR~n_f6y_O+UKH%k4Md=|(QroNEIJV>*PmzDZDO4X)q89` z7T()cAci)&+rg5=q9S4FY$qS9l6uq*kx;lP4oHNwe@!5E)^H{d*TF05y$AXVu6<2 zf}g~IJ}?`oRcly2j+Zd2U^f=AFBT}BD6VQeo4OfXmcl4qO!Eq)9{ap6vQ0MX%&=-H z4f!Rp4r6o=XfXl-))vh!6eB01jKwV%8%NbB!eO-o8phH5EREBYE8D5L4aYTO>(rWZ zyUf6=6SR`3m(!zip_})DDzR;F@wq1O26Ed>D#ET6z`=x7f#8-5R-2PALXGls)fxf$ ztrlJY3WK=5OCCq5yO^KVi-&0Ge06oP}- zG~e`&j`7ueaPjq;HdRnRVF}+eEERY`{rQXbF2 zssTI7ci6=Nnu?bOc=Sj9@ z9s{y1Dpm0x((Ve_=f-!=Lc4}7CVV-ZzYCGeEe?cDdad!oRwY%8yc7Xfz$9tEJFo@H zpQV9NV3)$?;CPhy&_w^<;T+26GF>xUZjdG#4(56S4Biu?;zRB9RM;_aTcIVgtr34V z(V)-brMXy40u{8TCubSlYfl9vCQ6bPOs(Kn5^8(#tK#t=%k;u6Bb*8>vAEl7EDXai zb@mAVl!N>fQpD8)pTd`cR=Eh7U=YyFa6HLgmXgBb32c2vD4a4{IaT=a?@p?hqa-~; zU$$8)JxX8<&RSBvoRVk26(F@)l3yiJ0&b<;+l=hgk1DOmXlLYbYinJ9etfyN{+wai z2jA!6`4X2TxuEnPA}Oa78s3HJU(R(V$|Lh?CMGC zsG4<=l)!dze4Vz%ve;wm=p1anG36pPyq+Jz88yliqFE7XkTE6{`ucoKi3KitJigp= z=0@teVqPw2CIj%U`h+KSEC11wKRo&0>g0d5(EHD zUnYI16H34#y~)1*1R^UX%+Ji-BNRr7^&>=`Bp!yKM4S&984(3d4uDzN;qCH$66}Is zdd3DsRE0QsK^V;c!3l_i^>hm61Sm|1-d-d$yBgVj5_6bZQap#pMkcV;(=#n$7}&zt z(p=XGPCj`Jtc`9Au5Rxv?PY@w7eCz&$YG3}y-H*X;s3)Epb4CduP#fSs0NEgsEl5fJ6E7xf#wA@dUw=)&B*)yj_$#CwHVJ#y4QFl!8 z!IB(ny|Gtq^dG?8#5+Pb3Z28Go=Ma6U`$F}St`pA7CyGT@N@svB?=E-<lhO5|E@f1c-g)%4-6as`B$nM3B0$`o#9 zb+F`&Vnn)Q({ktR-3z}2cE4=5a%=Sa8F?)?z%P}0uuS{@+>6Mqh(LKZyVZ@gkMe@P zTvcqr#eDuV6Xvk41Va0!0#!i~O$h+I50y2lIInn~7v@OBx#xfV9KLJ?N^8= zO3Eb##*8pg@t!>BYJUImx^FMgr!0BQl9wOiqZP~tmeZUtt^EAPR+MB*a$2&@l#647 zm6etMdi^-w`K(8phTIhrK5D30>|ExHJ)F*LH{s)GCcloUGk?r`F~>0e}+8&W_xm?DZA%b zl_IKzw|2R%>>e!llzR&Xzx9i_a&XLmgCRZ1C|`d+0x5tL=w?qKhr=T6@x-`De=}%k z7#8N57?K14nQKXb#5 zPjnMOB?9Ci%zQ$BCdKWc9DVvbH8c5qNg%b1FcF z2of7wB}+xM|DUiU^q6qD7DVq$k5Uz#wB_SkoJNlB=r`Ika$cG0DML4TZ%}kAxBAwhl`K(e;;AnS&v+`X@ zdgZn62e02{+TFOabTrw%GV(`LfDG&^m4suntv6y+7hemV3=$h{lt+Q9Reh3pSoM+$ z<=5>HRuCi>3cp}B&0?`*)wj&H6uVuA_73;un>XgqI~bK0hX&;s^z8DPG75CXFZjgy zYUnbu6Yrwo(6iR;!8sSRP)<*XBUu0)4oi0L-$jEjBQtIE9)i4r6;&Da$_sm-X3#P5 zB!noH>N@nMJ`9R=ELe^kD|(H`;nsuk^6p2HiE1ehl+`TU`9zD$K^>CMlM%|nL;^EY zLb46)sfwtNfZT4*<+Fzxc}0EE`nIluet|QEpQo32aJl7hyYSi$HEGv4UkPAzeKXC@ zfQQ{6ue>CHr})+TS2P_`>$wf-10^G#G<$^B*#~F5=j1-v#5l53iNE7nvvm)zSL3YcQq`!7K5@OdZe^d>PEMBXgo8Sj2hPG z6GL8=rq~emG*JoanRhRBKVUEv8@f^wU_LL_7_0j literal 0 HcmV?d00001 diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..ad4ca66a17637746a5c33e5a1cfc46e35754fac8 GIT binary patch literal 21792 zcmeHv2UwKH^Z&l}`q2ag8^(eim7pLN>;*+)iBgRs8VhO=>2e36#%@FrrA3LTNz^C? zMVis5!2-qxh=LspNE9QSl>5!zduQcPN%`ja|Jlb6yKmXq*?DJoXJ_X`h(HX&i9lw^ zR0pBYq1?SYcl~91gciU(Gc)eGgC0V8RtPn1%3U88z{|rC($!VJPa|~L5up{14#Oov zQz1N+j2tm!9Da(yXCZU--ZcXu>CKTt22SuNN@KlNyG}GYm1E;K!?)^yTPKTIXI70F z^-UgWI-;%qFFH>@xo^Dm*I#wEMnyGESP^15Y*rhCJl^uU$&5n}va?G{O1zYr(Iwf}wglz$%P9+ZT$XV*-Ln6V zJ59|HF@_vyId}UqME4|UyB2nKQHa!a=)Y|+A{;4Q-S%=69x?X5t{u9B=x~7k&z{q>evb`!UXIQN-_%iw8 z9Odkvo%bbmujyp)N5?-motgMJGPpe8uIrlgq?i(GbmX$4m$%Cr;^1(DIYSWfqh93> zqtux>eqIK)hc2~UoL(I5u>0_$!gZ52Jesv|?!6IL61Et08`Cmr$4uM1?6CAhi7{To z$0ZabPS1(!*NZoJ3*SuM)6*lvqOj_Rn1UxufAK3lYMI?4bN-=)MFE)$bDv!ZanBkn zJUXU2Cxt4ru6S6G-EXE>TF)Yf{xPl>oJ+i(4+^%v=q>%iIwr{ZbfToyk=emDEsE~X zO7tAv*G*O%y23!`n%|s7cMK*iz4ap7>4;Z)`N4o6;+1J*g7Z#1$n^+(eEE|9ow6Q< z%CaU?nlIY>lLsjk^!(9sx*cja`qJ66`?CyczbL%up8rK^aiQXQO8A5P6@wM@?IAt} z%Lo5n=v+W=vrd^8?;GHxo9ulm&3JtUJGVks<50Y>N^YN9`t^gv;U2fmb55R2PubJ= z`mAQR){N7$Lpw&D%F6bi)U5Nds$P2!&dJKx3o86-zRtES^Ib-YTy7Sm>`VQ0k#!1H zn&q(gme;MTeTuX5OKO(gNRK`caVnzQm}1{9kRY)=GYq(zV-~> z;YNFw#LSMLar>8<<~v(mO}bDyywm3EpLNVGxV$I!(6r#2p(n2$ij2xGSn2LkR$~(# zn|Wuu?%uR(%bOhvU+~L!vDLOyPu2QP`|5b<&{CecYt8+O`=2~&clFjDePFxYs9`^J zUgKZ6WA}@qj9k;QiXN3uO3&yW4PVGpese26wVQb>UY_HcdC#6YUi0_rlu`MOWAKct zzKYVL1K6^awksd5D+xYo$r~SdxqWPWf=x5!s1b8DQ(+s#U-h@Nlp{eZNf zy&-2ty~s{G-uk@NgX0fad#4d*C+?V@J3|;}6h4~ld?o8}+O4bMvF4&kS4X4J8pH9~ zEfiiJ&mS4*pNiZZerxO87Hj)wpW=c|lnx{TGfAL{*xYoo^Nz*?sO( zof*4#7g^oQn6=N!Gh&LR;5(O&UvctqaMt<$D&U};%DXk?A%%>v6eDEfZ4HEwNv`8}d1=5T>S`Pdn=PIm!R_ z$1VFqPWY7Uxos({nU(4GE`^mYqr6Xn&^j z!BU~`9wxe*j-qqs!{p?gH9?uB3y1z0CrU5cVnLiut@X{H_T&qX=X*W6wV3U2E_0m0 z{jJZ&Rm5dW!tFA`d(S@?G;8i=okQ0hCu9UJ>d=2yMf2VVhWr?HtTr!i&H$;mU(5KS z&i8jXCAI(?tNZjDo~|*A{VEyT;L5xQFCr(+Nd4hUpI@uiD3%XQW-@~|Tg)qXc-*Mq zLhln(%Z!JQ3oOn!d2Le3h0O8ugnir!(lU~1-J@5ZTAa9GA-=gXr|PQ3*xW72D5l`c z0g~jD88cCdbRCTMWQ$iF8XsI9QnhkH)o1H>=DmntJ6+*E9Dhj^v~g}(e`Vav zIltY0zOwW{LB#V%R>iqq<&O_snXsx=d49SgbGqO2(FHld!&9Hktcfa5$STZlR@^e+ zyB>bI_8n_m7u7}=Ou5G1zS~xgMn!FQKKJ=PLfoc}$L)m63$NNZOue0G;h|GHDR$dm+;%%GkOve&$!h`kTr16TKXlBO~Yjt>?@3Y@1be&q5IsyzkE! zPg{8{{N_;8#a>qIqIn*QbkEcCV9M(~C`~6cDM}uD$y1K#31CxSK5NjMUX93a#b#!s z5y3Z)Ki^u8b`gBOfgL)QW$wc_L$eWijMkMecq(W?^5v#-d3S;~iwPdI9ICO6)fz?e zAJmc?%J9^Atxc-V1jC*ejU4JQWbdFa!a6`1HrMD20N!zHOG>2@HKI`ljXKb%1OE#h zP$(4Wui%A3K>{cZfEK~w1*K4Y6i9d}zqTe3Q9czOlVrmE&@Kh-fR9uWK%oFA>UQfxr(&Fp|qd zNx3|X=J=Bnb@8cfUHgFhBz~tQLZJf@Iz9lQj6Mkc0=GILq(2NHm*WUIWpKGOlqVn4 zj!8!mnwWx+!)}CZ*MZ;K73i1(a3TPl>sxjC<{y@2QA5CESrXTqpwnbexM53Wm|-(z zph0V;xq!7{`9s|GB|DTllJAuIlATIjNg@E}^%{K!wUiX zO_hM%P^}#sl>#6@RX(|Za{>{062pt3H&YomZaf-6kn3y zD=|_olo-62CvK`RqrhuV+B%8ZNF|@$SSbX)SSBEcriKO3)aWHNHA+TP_MrgzD>$xX zDAzb1voM*@tiC5;p&#t}XHd5mml@hA8dQi6B7<{}zb9$k zhjaWgU&^Ds(gkcZ^kE&y0rZG+*~;~hd*|QdAIl#nN~6Efm->>r4}@tq;QdblyRk~h zZmr=nVRs1Bv?Is2HeIZb?yF}XCx7P{t|q0K$MM{S`=AEa0}WL7;1SnXmiG7o>p#q8 z&+#{)i6M(PS}Z=bY;gLE{A0egCG|7aIH5?Q<8U?qFc&A(GSswPoSwb>mfGmld;cHg zU)?U|A18q>pIP?|<{?-=wkUZ_@MVMy+i}lc8}l%rpaKJ+KLjvYu=ssE|9`;z^S1uf zCx4E$mw!o`r52u2t#7J6tg9f%_Jg^{G^`1KEe++1qyItvp`QfFQcls_T)Gx?C?3{= zo|8DPS)oP`j7ZwDFDqQpn-v7wutK>tD_jf+(SpVBj;uE;40IFep7-I_ud2D+`c>Eb z-%$Chz7wp|VFES|#_;MgA-l7dBKyu%^Dn|WfXm%k!Q3z%W91^Lp)yEnr1S&$zXt*E ze38TuYzLM7Q1kcm&y4|^8tN2&gA=0}tU8I+*4e&e< zaCx4%$1B_7&OZ)tW=YFsy zo@2X|B)d;g^Z#-U#(G2(gb*NjA1HW-0V9zl2-co2Nz}-{9{aNa@_)I0>G9dMU;}M} zy}+g#0lVr6Nw(?7@vpJtK3@Ksd3VGs`@cT>`6Kw(*u*OPfh5U}BRJWE9nFiag+0y* z2-Y&@I9Bx$S0L4(Ab&5Qu4=8uG+_Vo-G|GcR?XFTO?qXoY3#Pwb4;1D^%#1(%6=2D zYs!VZ1hAneZNg7MUH;PA3;l$D2z{XZKgNCrx_F;}h1$Xr@1X^-9|jv5_F)^IkVH2J z{7l0?2!g`8{4v1ege3?Gi~}C%|IYWz-!+btH;ebMU^-#hV*61ATn6C%Er!O%=IgNZ z{7h_na^p5m$4eLHoV>hh~w1fl;4 z-fQtyd#?zEkllo3|0hXw8o||h#XrbVSD&Q18hjkOG$E;OP_MHCU%E%bzxMeb%OCFz z1rTG2=WHxPo>&dI7U&R!PBW4)?M{=nU-D?5OqDN%-3WW%h)N+ZPDxV^Uvs>_w3$^M zZb!4@W{3^Wd43$u;yTmC2B#N@o0Q@8*GFPd{VxAlcALR}5|YPI6C?0Go}ng3!QOl; z_DQG#W%unY?Ue%i!yMpOHOC8h>tW6WpAg>n(r()k;zQpx`*zjXQ}N%G5qJbijcWp= z##I2ESAQr1;awl8;fwhaqZbP#1{Jf!O`m}O!4mvelf>Y&;DqByXj)&Z!g;Lf) zENRk0xwJ`3Wq6Zj%Ebm|%8o*|5B47#sQiZsNP_8 zaNdtXzV&35<*EFXEGwX){En{^dSe3)*T{wfoB@#?5ZVNYXp7JWpw|L|eg`5n_7LD8 zLXMDQsIIl-4#^1F??q@JjPovwVSTa(I-oDAgk`Q?H@G=leP5M@fd4#57Sdn@L)}-9 zAxuvuUnY{v)-`(z&EZ-L)|$#?EA+6>Tqg4Y_yIz&|C|Eqnzp z8s*Zc1An6qyuOw-mK!DTu@V5g3z-4z0EDUpg*sB>I#;K?^RLfkYJ2H@@hzH|8XOM; z@2cz|ZSgeq#?1Un-yPeE*lv8UJnM=fdHdd~&`{IJuq8cLhlV!C&qC!J+lI=wb`6zp z>GCF`I?3hh;hg}mxBbE1*4Wx$!>i+J-sY)}q4CZw2v(S5hfuaLLTW#;T|C61WC4DI z7?jsA=>$TflMu364SRXDPmqhN&=7sQ{kmc}-o7_PthXwpNps~$ss2X%Oeze!q$}C3 z+TY=rl2_OT_YMH|Dy5<&xdOiBSO`mbV~_WC`x?IL%lojxCEjojF@Pt6enHP@A2|Pt zsAj0oH&SGu>95dl7ERiR@tD91LSDR*&nzz{2!qzI`-K|PzAYHMK z!S|{pi1x6rd^eHAC$>EBQQ*Bj_!_=_{f)n~eUj)gk@JtK{AX&v8Fvm&5?viIkKl8t zB#_uh!^7X(zQt!6x^L#3Pl(4X&IMm1oNx4=qIs^ZeF8e74R8*)tS_p<7q~q2DB&clOi<@JYrtqR*8M9WWc`+R17WqtD3Jkyr9M-?E zy5tY*&!D>M5JYPmSOd>ve=_*N(_Vk)ZTZ8Yi*O)MyrDig3w3|{N@Dl|*56f_7LQ&8 z=cf_X5K9?Bk=9dR0k6MfmhzZ@^W6Ho`VW$5hJSW~zo{y{-M%i%Gg@ubN?9M4<9Jr^ zOWRBJw{Z4H=tW)ecBRhSP{p(ESKA-UuM7CLE-XvD&;Ag1LnyI6SRk~k2SPtc5mK*X zgA*VQIQv~-n`{jDj(e?zDQ$?l%PM_Pm*3GpxlBeZlgo8J1WROcK4*VMnumtQ^aS(> z4UK*U`Bn?L{F_!>ytJRn-z$^JU)r9y?%VSJhps=^{>J+Wb<95AW7G?H7yEV9`;RKv zf7FHgWA^`{{{CBX{ePw%&3O3#HOGDu_|I%qzsYO#le~YQu5H}br#}=Tc$N%*K|fZq znol4hN%V4p*uyO}^%=z1k6+7B<5n}&XR9C<-=BcERzh_ijwxzlZM0HXt!E4(bex2e z%sJwwB{~Gsr>JS$MEIPRje|4t1f`I-9f0%i;kE{AT+Dx(WL*QSRqNc>bS+f!m&0slvGhrKsM`N2}6T~}Wy9Xy6L!dnYeepzr zBz1<^_yO<_^xdN|eXxz-=QR}6#jyuCW{D5+2YA0^K2*ql4}EZo;;6pt1GdRD`$HTX zgv%rg^2huG{fC@<++#TUX1%!jn(xTjg9MToK}^669PoU2ejd%jrznTN>e zmM-Wg3Gx@I^nHUEH?}nGyc}Pk z{~GaFpG zV|(!U>M~l$?uPN@7LTtrI>nXW{rj5!KjP$r=Lfu>G41m9+XT_3ACHk2aQ+@kC+<1T z7ZIe%1c6TKIDt;`c!5p|;IJAg69l?P=8E-8K|gPD`subs1pL#KTO3U~EK`AUTmio^ z_@2)!DT3b{z#QtL=ADam)FY_&GSE%X&zi-Mt5F*QWr*Xc4UjfbdWZ}$eHbQKKcm4X z76oxk_%~Jf_dIbBXS1zVz-)n7zR8<4bCIThcz@J}V#a(zG4_DbZ-O1&o@U0GQG9#w zVUPsbb3CjmYjJ!yO^pdx{l1H|52eU{vk8RfRMi*;9l_HXm$a>GjrTd5KqIIBN{$C5 z4!$x2{Fm)EU=Jsio`D_2#~0?b(9J=>+fERztOQKRRhVb4@tLK!0JkBQGKC5`Ok_0$YZ0DrX=;c+p9#$sFkcrzY#RP;r6y_W z3-ApKMzX+F$fiR5;R0sKEugH9+f=R31fTi(HbaeFs;P|UIVj_GUW)JaWF)>*gzQlq zHz{DSPgBPfOaY)Fg`*L$XQ*Z%T{FxFLcIU0BXcNm6a+BNZY z{aMkf86y2Vu%`a*Ce%3xcHW=PLiP|%cD4szp}r8S0wk^>s^F~7hHtw5sK_I3SvMJ(*k$AS%&NzlenDI=4?{qZs&8V+N&-0fbq4vZC;=aZ>OTH$Am)Oh++xAE za{`y4VF->UBqSmrNSADa?}7aNUVXo1_ra_I`;F5TEIWu-1iPV^3x2mH#SHcZpRSk0 zupEBR3%S7`{!&w=yO5Vk6CFn3I9?tde2GVgv$f97};1{7vw=p2(ndgMi5}h z2o_i|LSL}s=UXwNfc6yp- Date: Mon, 22 Nov 2010 07:06:29 +0000 Subject: [PATCH 077/482] cvreating a special tag so we can quickly restore to previous version From dd483db783e30574dde166c88cf21d7705b6c65b Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Mon, 22 Nov 2010 13:24:50 -0600 Subject: [PATCH 078/482] Move books directory down into sub dir next to CLI. --- {books => fanficdownloader/books}/place holder.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {books => fanficdownloader/books}/place holder.txt (100%) diff --git a/books/place holder.txt b/fanficdownloader/books/place holder.txt similarity index 100% rename from books/place holder.txt rename to fanficdownloader/books/place holder.txt From ecdeef4b3b81a2965ef5032191321120a8c641a0 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Mon, 22 Nov 2010 15:00:00 -0600 Subject: [PATCH 079/482] Add new adapters to main.py, update verbiage on index, replace a couple prints with logging.debug. --- fanficdownloader/fictionalley.py | 2 +- fanficdownloader/ficwad.py | 2 +- index.html | 103 ++++++++++++++++++------------- main.py | 10 ++- 4 files changed, 69 insertions(+), 48 deletions(-) diff --git a/fanficdownloader/fictionalley.py b/fanficdownloader/fictionalley.py index 332a08af..68cd36e4 100644 --- a/fanficdownloader/fictionalley.py +++ b/fanficdownloader/fictionalley.py @@ -249,7 +249,7 @@ class FictionAlley(FanfictionSiteAdapter): logging.debug('li chapterlink not found! li=%s' % li) - print('Story "%s" by %s' % (self.storyName, self.authorName)) + logging.debug('Story "%s" by %s' % (self.storyName, self.authorName)) return result diff --git a/fanficdownloader/ficwad.py b/fanficdownloader/ficwad.py index 13e7a45d..058528bc 100644 --- a/fanficdownloader/ficwad.py +++ b/fanficdownloader/ficwad.py @@ -170,7 +170,7 @@ class FicWad(FanfictionSiteAdapter): logging.debug('self.numWords=%s' % self.numWords) - print('Story "%s" by %s' % (self.storyName, self.authorName)) + logging.debug('Story "%s" by %s' % (self.storyName, self.authorName)) result = [] ii = 1 diff --git a/index.html b/index.html index 4987804d..f5736129 100644 --- a/index.html +++ b/index.html @@ -2,7 +2,7 @@ - Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com to epub and HTML to Stanza, Kindle, Nook, Sony Reader + Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org to epub and HTML to Stanza, Kindle, Nook, Sony Reader @@ -33,23 +33,11 @@
    - Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites - much easier. - +

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier.

    +

    To support new features, such as including story summaries, + the URL you need to use for some sites has changed. See below for example URLs for each site.

    +

    Or see your personal list of previously downloaded fanfics.

    {{ error_message }} @@ -66,23 +54,24 @@
    -

    Login and Password

    -
    - - If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide - your credentials to download it, otherwise just leave it empty -
    -
    -
    Login
    -
    -
    - -
    -
    Password
    -
    -
    +

    Login and Password

    +
    + + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide + your credentials to download it, otherwise just leave it empty +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    @@ -100,7 +89,41 @@ {% endif %}
    - Few things to know, which will make your life substantially easier: +
    +
    fictionalley.org +
    Use the URL of the story's chapter list, such as +
    http://www.fictionalley.org/authors/drt/DA.html. Or the story text URL for + fictionalley.org one-shots, such as +
    http://www.fictionalley.org/authors/drt/JOTP01a.html. +
    fanfiction.net +
    Use the URL of any story chapter, with or without story title such as +
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
    http://www.fanfiction.net/s/5192986/5/. +
    fictionpress.com +
    Use the URL of any story chapter, such as +
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
    http://www.fictionpress.com/s/2847338/6/. +
    twilighted.net +
    Use the URL of the start of the story, such as +
    http://twilighted.net/viewstory.php?sid=8422. +
    ficwad.com +
    Use the URL of any story chapter, such as +
    http://www.ficwad.com/story/75246. +
    harrypotterfanfiction.com +
    Use the URL of the story's chapter list, such as +
    http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
    potionsandsnitches.net +
    Use the URL of the story's chapter list, such as +
    http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
    mediaminer.org +
    Use the URL of the story's chapter list, such as +
    http://www.mediaminer.org/fanfic/view_st.php/166653. + Or the story URL for one-shots, such as +
    http://www.mediaminer.org/fanfic/view_st.php/167618. +
    + + + A few additional things to know, which will make your life substantially easier:
    1. First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password @@ -111,20 +134,12 @@ — how to read fiction in Stanza or any other ebook reader.
    2. - Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, and twilighted.net. - (fanficauthors.net withdrawn as they offer native ePub functionality now.) + Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, mediaminer.org and twilighted.net. + fanficauthors.net and tthfanfic.org offer native ePub functionality.
    3. You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader.
    4. -
    5. - Paste a URL of the first chapter of the fanfic, not the index page, except for fictionalley.org. -
    6. -
    7. - For fictionalley.org, you need to use the URL of the story's chapter list, such as - this. Or the story text URL for fictionalley.org - one-shots, such as this. -
    8. One-shots, fics with a single chapter, are now supported.
    9. diff --git a/main.py b/main.py index 1ca7dcb2..1ae0ac99 100644 --- a/main.py +++ b/main.py @@ -184,12 +184,18 @@ class FanfictionDownloader(webapp.RequestHandler): adapter = fictionalley.FictionAlley(url) elif url.find('ficwad') != -1: adapter = ficwad.FicWad(url) - elif url.find('fanfiction.net') != -1 or url.find('fictionpress.com') != -1: + elif url.find('fanfiction.net') != -1: adapter = ffnet.FFNet(url) + elif url.find('fictionpress.com') != -1: + adapter = fpcom.FPCom(url) elif url.find('harrypotterfanfiction.com') != -1: adapter = hpfiction.HPFiction(url) - elif url.find('twilighted') != -1 or url.find('potionsandsnitches.net') != -1: + elif url.find('twilighted.net') != -1: adapter = twilighted.Twilighted(url) + elif url.find('potionsandsnitches.net') != -1: + adapter = potionsNsnitches.PotionsNSnitches(url) + elif url.find('mediaminer.org') != -1: + adapter = mediaminer.MediaMiner(url) else: logging.debug("Bad URL detected") self.redirect('/?error=bad_url&url=' + urlEscape(url) ) From cf0d1bf09be23668c6495c9e74c321d1a07177cb Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Mon, 22 Nov 2010 20:05:58 -0600 Subject: [PATCH 080/482] Change output names for web version, make html & text output work for CLI version. --- fanficdownloader/downloader.py | 12 ++++++++++++ fanficdownloader/output.py | 13 +++++++++++-- main.py | 5 +++-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/fanficdownloader/downloader.py b/fanficdownloader/downloader.py index cb4ca7d4..17078430 100644 --- a/fanficdownloader/downloader.py +++ b/fanficdownloader/downloader.py @@ -62,6 +62,13 @@ class FanficLoader: self.standAlone = sa return self.standAlone + def getOverWrite(self): + return self.overWrite + + def setOverWrite(self, sa): + self.overWrite = sa + return self.overWrite + def getAdapter(): return self.adapter @@ -153,6 +160,8 @@ if __name__ == '__main__': writerClass = output.EPubFanficWriter elif bookFormat == 'html': writerClass = output.HTMLWriter + elif bookFormat == 'text': + writerClass = output.TextWriter if adapter.requiresLogin(url): print("Meow, URL %s requires you to haz been logged in! Please can I haz this datas?" % url) @@ -167,6 +176,9 @@ if __name__ == '__main__': loader = FanficLoader(adapter, writerClass) loader.setStandAlone(True) + if bookFormat != 'epub': + loader.setOverWrite(True) + try: loader.download() diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index af6788ce..ff5daa51 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -60,6 +60,7 @@ class TextWriter(FanficWriter): return '.txt' def __init__(self, base, adapter, inmemory=False, compress=False): + self.inmemory = inmemory self.htmlWriter = HTMLWriter(base, adapter, True, False) def writeChapter(self, index, title, text): @@ -67,9 +68,17 @@ class TextWriter(FanficWriter): def finalise(self): self.htmlWriter.finalise() - self.output = StringIO.StringIO() + self.name=self.htmlWriter.name + self.fileName = self.htmlWriter.fileName.replace(".html",".txt") + if self.inmemory: + self.output = StringIO.StringIO() + else: + self.output = open(self.fileName, 'w') + self.output.write(html2text.html2text(self.htmlWriter.output.getvalue().decode('utf-8')).encode('utf-8')) - self.name = self.htmlWriter.name + + if not self.inmemory: + self.output.close() class HTMLWriter(FanficWriter): diff --git a/main.py b/main.py index 1ae0ac99..9a9cbf31 100644 --- a/main.py +++ b/main.py @@ -230,7 +230,8 @@ class FanfictionDownloader(webapp.RequestHandler): ext = '.html' if format == 'text': ext = '.txt' - files = {makeAcceptableFilename(str(adapter.getStoryName())) + ext : StringIO.StringIO(data.decode('utf-8')) } + logging.debug(data) + files = {makeAcceptableFilename(str(adapter.getOutputName())) + ext : StringIO.StringIO(data.decode('utf-8')) } d = inMemoryZip(files) data = d.getvalue() @@ -265,7 +266,7 @@ class FanfictionDownloader(webapp.RequestHandler): fic.user = user fic.url = url fic.format = format - fic.name = self._printableVersion(adapter.getStoryName()) + fic.name = self._printableVersion(adapter.getOutputName()) fic.author = self._printableVersion(adapter.getAuthorName()) fic.blob = data From ebcce7e42d6b98581f69152caebfdcb176af2743 Mon Sep 17 00:00:00 2001 From: sigizmund Date: Tue, 23 Nov 2010 07:15:18 +0000 Subject: [PATCH 081/482] Adding Mobi format and making final configuration changes before uploading a beta version. --- app.yaml | 8 +- fanficdownloader/downloader.py | 2 + fanficdownloader/html.py | 121 ++++++++++++ fanficdownloader/mobi.py | 344 +++++++++++++++++++++++++++++++++ fanficdownloader/output.py | 67 ++++++- 5 files changed, 533 insertions(+), 9 deletions(-) create mode 100644 fanficdownloader/html.py create mode 100644 fanficdownloader/mobi.py diff --git a/app.yaml b/app.yaml index 6f1ccdc8..8709ef3a 100644 --- a/app.yaml +++ b/app.yaml @@ -1,13 +1,9 @@ application: fanfictionloader -version: 2-5-5 +version: 2-6-beta runtime: python api_version: 1 handlers: -- url: /generate_mock_data - script: mocks/generate_mock_data.py - login: admin - - url: /r3m0v3r script: utils/remover.py login: admin @@ -25,7 +21,5 @@ handlers: - url: /static static_dir: static - - url: /.* script: main.py - diff --git a/fanficdownloader/downloader.py b/fanficdownloader/downloader.py index 17078430..c31f1a4b 100644 --- a/fanficdownloader/downloader.py +++ b/fanficdownloader/downloader.py @@ -160,6 +160,8 @@ if __name__ == '__main__': writerClass = output.EPubFanficWriter elif bookFormat == 'html': writerClass = output.HTMLWriter + elif bookFormat == 'mobi': + writerClass = output.MobiWriter elif bookFormat == 'text': writerClass = output.TextWriter diff --git a/fanficdownloader/html.py b/fanficdownloader/html.py new file mode 100644 index 00000000..2c14a58d --- /dev/null +++ b/fanficdownloader/html.py @@ -0,0 +1,121 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + +import re +import sys +import StringIO +import urllib + +from BeautifulSoup import BeautifulSoup + +class HtmlProcessor: + WHITESPACE_RE = re.compile(r'\s') + # Look for + BAD_TAG_RE = re.compile(r'<[^>]+<', re.MULTILINE) + + def __init__(self, html, unfill=0): + self.unfill = unfill + html = self._ProcessRawHtml(html) + self._soup = BeautifulSoup(html) + if self._soup.title: + self.title = self._soup.title.contents[0] + else: + self.title = None + + def _ProcessRawHtml(self, html): + new_html, count = HtmlProcessor.BAD_TAG_RE.subn('<', html) + if count: + print >>sys.stderr, 'Replaced %d bad tags' % count + return new_html + + def _StubInternalAnchors(self): + '''Replace each internal anchor with a fixed-size filepos anchor. + + Looks for every anchor with and replaces that + with . Stores anchors in self._anchor_references''' + self._anchor_references = [] + anchor_num = 0 + for anchor in self._soup.findAll('a', href=re.compile('^#')): + self._anchor_references.append((anchor_num, anchor['href'])) + del anchor['href'] + anchor['filepos'] = '%.10d' % anchor_num + anchor_num += 1 + + def _ReplaceAnchorStubs(self): + # TODO: Browsers allow extra whitespace in the href names. + assembled_text = self._soup.prettify() + del self._soup # shouldn't touch this anymore + for anchor_num, original_ref in self._anchor_references: + ref = urllib.unquote(original_ref[1:]) # remove leading '#' + # Find the position of ref in the utf-8 document. + # TODO(chatham): Using regexes and looking for name= would be better. + newpos = assembled_text.rfind(ref.encode('utf-8')) + if newpos == -1: + print >>sys.stderr, 'Could not find anchor "%s"' % original_ref + continue + newpos += len(ref) + 2 # don't point into the middle of the tag + old_filepos = 'filepos="%.10d"' % anchor_num + new_filepos = 'filepos="%.10d"' % newpos + assert assembled_text.find(old_filepos) != -1 + assembled_text = assembled_text.replace(old_filepos, new_filepos, 1) + return assembled_text + + def _FixPreTags(self): + '''Replace
       tags with HTML-ified text.'''
      +    pres = self._soup.findAll('pre')
      +    for pre in pres:
      +      pre.replaceWith(self._FixPreContents(str(pre.contents[0])))
      +
      +  def _FixPreContents(self, text):
      +    if self.unfill:
      +      line_splitter = '\n\n'
      +      line_joiner = '

      ' + else: + line_splitter = '\n' + line_joiner = '
      ' + lines = [] + for line in text.split(line_splitter): + lines.append(self.WHITESPACE_RE.subn(' ', line)[0]) + return line_joiner.join(lines) + + def _RemoveUnsupported(self): + '''Remove any tags which the kindle cannot handle.''' + # TODO(chatham): tags to script? + unsupported_tags = ('script', 'style') + for tag_type in unsupported_tags: + for element in self._soup.findAll(tag_type): + element.extract() + + def RenameAnchors(self, prefix): + '''Rename every internal anchor to have the given prefix, then + return the contents of the body tag.''' + for anchor in self._soup.findAll('a', href=re.compile('^#')): + anchor['href'] = '#' + prefix + anchor['href'][1:] + for a in self._soup.findAll('a'): + if a.get('name'): + a['name'] = prefix + a['name'] + + # TODO(chatham): figure out how to fix this. sometimes body comes out + # as NoneType. + content = [] + if self._soup.body is not None: + content = [unicode(c) for c in self._soup.body.contents] + return '\n'.join(content) + + def CleanHtml(self): + # TODO(chatham): fix_html_br, fix_html + self._RemoveUnsupported() + self._StubInternalAnchors() + self._FixPreTags() + return self._ReplaceAnchorStubs() + + +if __name__ == '__main__': + FILE ='/tmp/documentation.html' + #FILE = '/tmp/multipre.html' + FILE = '/tmp/view.html' + import codecs + d = open(FILE).read() + h = HtmlProcessor(d) + s = h.CleanHtml() + #print s diff --git a/fanficdownloader/mobi.py b/fanficdownloader/mobi.py new file mode 100644 index 00000000..4facb556 --- /dev/null +++ b/fanficdownloader/mobi.py @@ -0,0 +1,344 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + + +import StringIO +import struct +import time +import random +import logging + +from html import HtmlProcessor + +# http://wiki.mobileread.com/wiki/MOBI +# http://membres.lycos.fr/microfirst/palm/pdb.html + +encoding = { + 'UTF-8' : 65001, + 'latin-1' : 1252, +} + +languages = {"en-us" : 0x0409, + "sv" : 0x041d, + "fi" : 0x000b, + "en" : 0x0009, + "en-gb" : 0x0809} + +def ToHex(s): + v = ['%.2x' % ord(c) for c in s] + return ' '.join(v) + +class _SubEntry: + def __init__(self, pos, html_data): + self.pos = pos + self.html = HtmlProcessor(html_data) + self.title = self.html.title + self._name = 'mobi_article_%d' % pos + if not self.title: + self.title = 'Article %d' % self.pos + + def TocLink(self): + return '
      %.80s' % (self._name, self.title) + + def Anchor(self): + return '' % self._name + + def Body(self): + return self.html.RenameAnchors(self._name + '_') + +class Converter: + def __init__(self, refresh_url=''): + self._header = Header() + self._refresh_url = refresh_url + + def ConvertString(self, s): + out = StringIO.StringIO() + self._ConvertStringToFile(s, out) + return out.getvalue() + + def ConvertStrings(self, html_strs): + out = StringIO.StringIO() + self._ConvertStringsToFile(html_strs, out) + return out.getvalue() + + def ConvertFile(self, html_file, out_file): + self._ConvertStringToFile(open(html_file).read(), + open(out_file, 'w')) + + def ConvertFiles(self, html_files, out_file): + html_strs = [open(f).read() for f in html_files] + self._ConvertStringsToFile(html_strs, open(out_file, 'w')) + + def MakeOneHTML(self, html_strs): + """This takes a list of HTML strings and returns a big HTML file with + all contents consolidated. It constructs a table of contents and adds + anchors within the text + """ + toc_html = [] + if self._refresh_url: + toc_html.append('Update Reading List
      ' % + self._refresh_url) + body_html = [] + titles = [] + + PAGE_BREAK = '' + for pos, html in enumerate(html_strs): + entry = _SubEntry(pos+1, html) + titles.append(entry.title[:10]) + toc_html.append('%s
      ' % entry.TocLink()) + + # give some space between bodies of work. + body_html.append(PAGE_BREAK) + body_html.append(entry.Anchor()) + + body_html.append('

      %s

      ' % entry.title) + body_html.append(entry.Body()) + + # TODO: this title can get way too long with RSS feeds. Not sure how to fix + header = 'Bibliorize %s GMT' % time.ctime( + time.time()) + + footer = '' + all_html = header + '\n'.join(toc_html + body_html) + footer + return all_html + + def _ConvertStringsToFile(self, html_strs, out_file): + try: + tmp = self.MakeOneHTML(html_strs) + self._ConvertStringToFile(tmp, out_file) + except Exception, e: + logging.error('Error %s', e) + logging.debug('Details: %s' % html_strs) + + def _ConvertStringToFile(self, html_data, out): + html = HtmlProcessor(html_data) + data = html.CleanHtml() + records = [] + title = html.title + if title: + self._header.SetTitle(title) + record_id = 1 + for start_pos in range(0, len(data), Record.MAX_SIZE): + end = min(len(data), start_pos + Record.MAX_SIZE) + record_data = data[start_pos:end] + records.append(self._header.AddRecord(record_data, record_id)) + record_id += 1 + self._header.SetImageRecordIndex(record_id) + records[0:0] = [self._header.MobiHeader()] + + header, rec_offset = self._header.PDBHeader(len(records)) + out.write(header) + for record in records: + record.WriteHeader(out, rec_offset) + rec_offset += len(record.data) + + # Write to nuls for some reason + out.write('\0\0') + for record in records: + record.WriteData(out) + +class Record: + MAX_SIZE = 4096 + INDEX_LEN = 8 + _unique_id_seed = 28 # should be arbitrary, but taken from MobiHeader + + # TODO(chatham): Record compression doesn't look that hard. + + def __init__(self, data, record_id): + assert len(data) <= self.MAX_SIZE + self.data = data + if record_id != 0: + self._id = record_id + else: + Record._unique_id_seed += 1 + self._id = 0 + + def __repr__(self): + return 'Record: id=%d len=%d' % (self._id, len(self.data)) + + def _SetUniqueId(self): + Record._unique_id_seed += 1 + # TODO(chatham): Wraparound crap + self._id = Record._unique_id_seed + + def WriteData(self, out): + out.write(self.data) + + def WriteHeader(self, out, rec_offset): + attributes = 64 # dirty? + header = struct.pack('>IbbH', + rec_offset, + attributes, + 0, self._id) + assert len(header) == Record.INDEX_LEN + out.write(header) + +EXTH_HEADER_FIELDS = { + 'author' : 100, + 'publisher' : 101, +} + +class Header: + EPOCH_1904 = 2082844800 + + def __init__(self): + self._length = 0 + self._record_count = 0 + self._title = '2008_2_34' + self._author = 'Unknown author' + self._publisher = 'Unknown publisher' + self._first_image_index = 0 + + def SetAuthor(self, author): + self._author = author + + def SetTitle(self, title): + # TODO(chatham): Reevaluate whether this needs to be ASCII. + # maybe just do sys.setdefaultencoding('utf-8')? Problems + # appending self._title with other things. + self._title = title.encode('ascii') + + def SetPublisher(self, publisher): + self._publisher = publisher + + def AddRecord(self, data, record_id): + self.max_record_size = max(Record.MAX_SIZE, len(data)) + self._record_count += 1 + self._length += len(data) + return Record(data, record_id) + + def _ReplaceWord(self, data, pos, word): + return data[:pos] + struct.pack('>I', word) + data[pos+4:] + + def PalmDocHeader(self): + compression = 1 # no compression + unused = 0 + encryption_type = 0 # no ecryption + records = self._record_count + 1 # the header record itself + palmdoc_header = struct.pack('>HHIHHHH', + compression, + unused, + self._length, + records, + Record.MAX_SIZE, + encryption_type, + unused) + assert len(palmdoc_header) == 16 + return palmdoc_header + + def PDBHeader(self, num_records): + HEADER_LEN = 32+2+2+9*4 + RECORD_INDEX_HEADER_LEN = 6 + RESOURCE_INDEX_LEN = 10 + + index_len = RECORD_INDEX_HEADER_LEN + num_records * Record.INDEX_LEN + rec_offset = HEADER_LEN + index_len + 2 + + short_title = self._title[0:31] + attributes = 0 + version = 0 + ctime = self.EPOCH_1904 + int(time.time()) + mtime = self.EPOCH_1904 + int(time.time()) + backup_time = self.EPOCH_1904 + int(time.time()) + modnum = 0 + appinfo_offset = 0 + sort_offset = 0 + type = 'BOOK' + creator = 'MOBI' + id_seed = 36 + header = struct.pack('>32sHHII', + short_title, attributes, version, + ctime, mtime) + header += struct.pack('>IIII', backup_time, modnum, + appinfo_offset, sort_offset) + header += struct.pack('>4s4sI', + type, creator, id_seed) + next_record = 0 # not used? + header += struct.pack('>IH', next_record, num_records) + return header, rec_offset + + def _GetExthHeader(self): + # They set author, publisher, coveroffset, thumboffset + data = {'author' : self._author, + 'publisher' : self._publisher, + } + # Turn string type names into EXTH typeids. + r = [] + for key, value in data.items(): + typeid = EXTH_HEADER_FIELDS[key] + length_encoding_len = 8 + r.append(struct.pack('>LL', typeid, len(value) + length_encoding_len,) + value) + content = ''.join(r) + + # Pad to word boundary + while len(content) % 4: + content += '\0' + TODO_mysterious = 12 + exth = 'EXTH' + struct.pack('>LL', len(content) + TODO_mysterious, len(data)) + content + return exth + + def SetImageRecordIndex(self, idx): + self._first_image_index = idx + + def MobiHeader(self): + exth_header = self._GetExthHeader(); + palmdoc_header = self.PalmDocHeader() + + fs = 0xffffffff + + # Record 0 + header_len = 0xE4 # TODO + mobi_type = 2 # BOOK + text_encoding = encoding['UTF-8'] + unique_id = random.randint(1, 1<<32) + creator_version = 4 + reserved = '%c' % 0xff * 40 + nonbook_index = fs + full_name_offset = header_len + len(palmdoc_header) + len(exth_header) # put full name after header + language = languages['en-us'] + unused = 0 + mobi_header = struct.pack('>4sIIIII40sIIIIII', + 'MOBI', + header_len, + mobi_type, + text_encoding, + unique_id, + creator_version, + reserved, + nonbook_index, + full_name_offset, + len(self._title), + language, + fs, fs) + assert len(mobi_header) == 104 - 16 + + unknown_fields = chr(0) * 32 + drm_offset = 0 + drm_count = 0 + drm_size = 0 + drm_flags = 0 + exth_flags = 0x50 + header_end = chr(0) * 64 + mobi_header += struct.pack('>IIIIIII', + creator_version, + self._first_image_index, + fs, + unused, + fs, + unused, + exth_flags) + mobi_header += '\0' * 112 # TODO: Why this much padding? + # Set some magic offsets to be 0xFFFFFFF. + for pos in (0x94, 0x98, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xdc): + mobi_header = self._ReplaceWord(mobi_header, pos, fs) + + # 16 bytes? + padding = '\0' * 48 * 4 # why? + total_header = palmdoc_header + mobi_header + exth_header + self._title + padding + + return self.AddRecord(total_header, 0) + +if __name__ == '__main__': + import sys + m = Converter() + m.ConvertFiles(sys.argv[1:], '/tmp/test.mobi') \ No newline at end of file diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index ff5daa51..10fb6198 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -21,6 +21,7 @@ import urlparse as up import BeautifulSoup as bs import htmlentitydefs as hdefs +import mobi import zipdir import html_constants from constants import * @@ -81,6 +82,68 @@ class TextWriter(FanficWriter): self.output.close() +class MobiWriter(FanficWriter): + body = '' + + @staticmethod + def getFormatName(): + return 'mobi' + + @staticmethod + def getFormatExt(): + return '.mobi' + + def __init__(self, base, adapter, inmemory=False, compress=False): + self.basePath = base + self.storyTitle = removeEntities(adapter.getStoryName()) + self.name = makeAcceptableFilename(adapter.getOutputName()) + self.fileName = self.basePath + '/' + self.name + self.getFormatExt() + self.authorName = removeEntities(adapter.getAuthorName()) + self.adapter = adapter + self.mobi = mobi + self.inmemory = inmemory + + if not self.inmemory and os.path.exists(self.fileName): + os.remove(self.fileName) + + if self.inmemory: + self.output = StringIO.StringIO() + else: + self.output = open(self.fileName, 'w') + + self.xhtmlTemplate = string.Template(html_constants.XHTML_START) + self.chapterStartTemplate = string.Template(html_constants.XHTML_CHAPTER_START) + + def _printableVersion(self, text): + try: + d = text.decode('utf-8') + return d + except: + return text + + def writeChapter(self, index, title, text): + title = self._printableVersion(title) #title.decode('utf-8') + text = self._printableVersion(text) #text.decode('utf-8') + self.body = self.body + '\n' + self.chapterStartTemplate.substitute({'chapter' : title}) + self.body = self.body + '\n' + text + + def finalise(self): + html = self.xhtmlTemplate.substitute({'title' : self.storyTitle, 'author' : self.authorName, 'body' : self.body}) + soup = bs.BeautifulSoup(html) + result = soup.__str__('utf8') + +# f = open(self.fileName, 'w') +# f.write(result) +# f.close() + + c = mobi.Converter() + mobidata = c.ConvertString(result) + + self.output.write(mobidata) + if not self.inmemory: + self.output.close() + + class HTMLWriter(FanficWriter): body = '' @@ -92,14 +155,14 @@ class HTMLWriter(FanficWriter): def getFormatExt(): return '.html' - def __init__(self, base, adapter, inmemory=False, compress=False): + def __init__(self, base, adapter, inmemory=False, compress=False, mobi = False): self.basePath = base self.storyTitle = removeEntities(adapter.getStoryName()) self.name = makeAcceptableFilename(adapter.getOutputName()) self.fileName = self.basePath + '/' + self.name + self.getFormatExt() self.authorName = removeEntities(adapter.getAuthorName()) self.adapter = adapter - + self.mobi = mobi self.inmemory = inmemory if not self.inmemory and os.path.exists(self.fileName): From f3571959df98fdb237e192863d1b256bca00d0b6 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Tue, 23 Nov 2010 12:42:33 -0600 Subject: [PATCH 082/482] Change a couple of the example story URLs. --- app.yaml | 31 + cron.yaml | 4 + css/index.css | 71 + delete_fic.py | 59 + fanficdownloader/BeautifulSoup.py | 2014 ++++++++ fanficdownloader/__init__.py | 1 + fanficdownloader/adapter.py | 229 + fanficdownloader/books/place holder.txt | 0 fanficdownloader/constants.py | 542 ++ fanficdownloader/downloader.py | 205 + fanficdownloader/ffnet.py | 358 ++ fanficdownloader/fictionalley.py | 301 ++ fanficdownloader/ficwad.py | 267 + fanficdownloader/fpcom.py | 344 ++ fanficdownloader/hpfiction.py | 280 ++ fanficdownloader/html2text.py | 452 ++ fanficdownloader/html_constants.py | 19 + fanficdownloader/mediaminer.py | 406 ++ fanficdownloader/output.py | 424 ++ fanficdownloader/potionsNsnitches.py | 367 ++ fanficdownloader/readme.txt | 10 + fanficdownloader/twilighted.py | 316 ++ fanficdownloader/twipassword.py | 4 + fanficdownloader/zipdir.py | 177 + ffstorage.py | 21 + index-ajax.html | 109 + index.html | 204 + index.yaml | 22 + js/fdownloader.js | 116 + js/jquery-1.3.2.js | 4376 +++++++++++++++++ main.py | 316 ++ queue.yaml | 5 + recent.html | 69 + simplejson/__init__.py | 318 ++ simplejson/__init__.pyc | Bin 0 -> 12071 bytes simplejson/_speedups.c | 2329 +++++++++ simplejson/decoder.py | 354 ++ simplejson/decoder.pyc | Bin 0 -> 11292 bytes simplejson/encoder.py | 440 ++ simplejson/encoder.pyc | Bin 0 -> 13938 bytes simplejson/scanner.py | 65 + simplejson/scanner.pyc | Bin 0 -> 2340 bytes simplejson/tests/__init__.py | 23 + simplejson/tests/test_check_circular.py | 30 + simplejson/tests/test_decode.py | 22 + simplejson/tests/test_default.py | 9 + simplejson/tests/test_dump.py | 21 + .../tests/test_encode_basestring_ascii.py | 38 + simplejson/tests/test_fail.py | 76 + simplejson/tests/test_float.py | 15 + simplejson/tests/test_indent.py | 41 + simplejson/tests/test_pass1.py | 76 + simplejson/tests/test_pass2.py | 14 + simplejson/tests/test_pass3.py | 20 + simplejson/tests/test_recursion.py | 67 + simplejson/tests/test_scanstring.py | 111 + simplejson/tests/test_separators.py | 42 + simplejson/tests/test_unicode.py | 64 + simplejson/tool.py | 37 + static/ajax-loader.gif | Bin 0 -> 10819 bytes static/favicon.ico | Bin 0 -> 21792 bytes utils/remover.py | 53 + 62 files changed, 16384 insertions(+) create mode 100644 app.yaml create mode 100644 cron.yaml create mode 100644 css/index.css create mode 100644 delete_fic.py create mode 100644 fanficdownloader/BeautifulSoup.py create mode 100644 fanficdownloader/__init__.py create mode 100644 fanficdownloader/adapter.py create mode 100644 fanficdownloader/books/place holder.txt create mode 100644 fanficdownloader/constants.py create mode 100644 fanficdownloader/downloader.py create mode 100644 fanficdownloader/ffnet.py create mode 100644 fanficdownloader/fictionalley.py create mode 100644 fanficdownloader/ficwad.py create mode 100644 fanficdownloader/fpcom.py create mode 100644 fanficdownloader/hpfiction.py create mode 100644 fanficdownloader/html2text.py create mode 100644 fanficdownloader/html_constants.py create mode 100644 fanficdownloader/mediaminer.py create mode 100644 fanficdownloader/output.py create mode 100644 fanficdownloader/potionsNsnitches.py create mode 100644 fanficdownloader/readme.txt create mode 100644 fanficdownloader/twilighted.py create mode 100644 fanficdownloader/twipassword.py create mode 100644 fanficdownloader/zipdir.py create mode 100644 ffstorage.py create mode 100644 index-ajax.html create mode 100644 index.html create mode 100644 index.yaml create mode 100644 js/fdownloader.js create mode 100644 js/jquery-1.3.2.js create mode 100644 main.py create mode 100644 queue.yaml create mode 100644 recent.html create mode 100644 simplejson/__init__.py create mode 100644 simplejson/__init__.pyc create mode 100644 simplejson/_speedups.c create mode 100644 simplejson/decoder.py create mode 100644 simplejson/decoder.pyc create mode 100644 simplejson/encoder.py create mode 100644 simplejson/encoder.pyc create mode 100644 simplejson/scanner.py create mode 100644 simplejson/scanner.pyc create mode 100644 simplejson/tests/__init__.py create mode 100644 simplejson/tests/test_check_circular.py create mode 100644 simplejson/tests/test_decode.py create mode 100644 simplejson/tests/test_default.py create mode 100644 simplejson/tests/test_dump.py create mode 100644 simplejson/tests/test_encode_basestring_ascii.py create mode 100644 simplejson/tests/test_fail.py create mode 100644 simplejson/tests/test_float.py create mode 100644 simplejson/tests/test_indent.py create mode 100644 simplejson/tests/test_pass1.py create mode 100644 simplejson/tests/test_pass2.py create mode 100644 simplejson/tests/test_pass3.py create mode 100644 simplejson/tests/test_recursion.py create mode 100644 simplejson/tests/test_scanstring.py create mode 100644 simplejson/tests/test_separators.py create mode 100644 simplejson/tests/test_unicode.py create mode 100644 simplejson/tool.py create mode 100644 static/ajax-loader.gif create mode 100644 static/favicon.ico create mode 100644 utils/remover.py diff --git a/app.yaml b/app.yaml new file mode 100644 index 00000000..6f1ccdc8 --- /dev/null +++ b/app.yaml @@ -0,0 +1,31 @@ +application: fanfictionloader +version: 2-5-5 +runtime: python +api_version: 1 + +handlers: +- url: /generate_mock_data + script: mocks/generate_mock_data.py + login: admin + +- url: /r3m0v3r + script: utils/remover.py + login: admin + +- url: /r3m0v3r + script: main.py + login: admin + +- url: /css + static_dir: css + +- url: /js + static_dir: js + +- url: /static + static_dir: static + + +- url: /.* + script: main.py + diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 00000000..1d9c70a0 --- /dev/null +++ b/cron.yaml @@ -0,0 +1,4 @@ +cron: +- description: cleanup job + url: /r3m0v3r + schedule: every 3 hours \ No newline at end of file diff --git a/css/index.css b/css/index.css new file mode 100644 index 00000000..f4aec452 --- /dev/null +++ b/css/index.css @@ -0,0 +1,71 @@ +body +{ + font: 0.9em "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif; +} + +#main +{ + width: 43%; + margin-left: 23%; + background-color: #dae6ff; + padding: 2em; +} + +#greeting +{ + margin-bottom: 1em; + border-color: #efefef; +} + + + +#logpassword:hover, #logpasswordtable:hover, #urlbox:hover, #typebox:hover, #helpbox:hover, #yourfile:hover +{ + border: thin solid #fffeff; +} + +h1 +{ + text-decoration: none; +} + +#logpasswordtable +{ + padding: 1em; +} + +#logpassword, #logpasswordtable { + display: none; +} + +#urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile +{ + margin: 1em; + padding: 1em; + border: thin dotted #fffeff; +} + +div.field +{ + margin-bottom: 0.5em; +} + +#submitbtn +{ + padding: 1em; +} + +#typelabel +{ +} + +#typeoptions +{ + margin-top: 0.5em; +} + +#error +{ + font-size: small; + color: #f00; +} \ No newline at end of file diff --git a/delete_fic.py b/delete_fic.py new file mode 100644 index 00000000..73722724 --- /dev/null +++ b/delete_fic.py @@ -0,0 +1,59 @@ +import os +import cgi +import sys +import logging +import traceback +import StringIO + +from google.appengine.api import users +from google.appengine.ext import webapp +from google.appengine.ext.webapp import util + +from fanficdownloader.downaloder import * +from fanficdownloader.ffnet import * +from fanficdownloader.output import * + +from google.appengine.ext import db + +from fanficdownloader.zipdir import * + +from ffstorage import * + +def create_mac(user, fic_id, fic_url): + return str(abs(hash(user)+hash(fic_id)))+str(abs(hash(fic_url))) + +def check_mac(user, fic_id, fic_url, mac): + return (create_mac(user, fic_id, fic_url) == mac) + +def create_mac_for_fic(user, fic_id): + key = db.Key(fic_id) + fanfic = db.get(key) + if fanfic.user != user: + return None + else: + return create_mac(user, key, fanfic.url) + +class DeleteFicHandler(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect('/login') + + fic_id = self.request.get('fic_id') + fic_mac = self.request.get('key_id') + + actual_mac = create_mac_for_fic(user, fic_id) + if actual_mac != fic_mac: + self.response.out.write("Ooops") + else: + key = db.Key(fic_id) + fanfic = db.get(key) + fanfic.delete() + self.redirect('/recent') + + + fics = db.GqlQuery("Select * From DownloadedFanfic WHERE user = :1", user) + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + \ No newline at end of file diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py new file mode 100644 index 00000000..31ff0e5f --- /dev/null +++ b/fanficdownloader/BeautifulSoup.py @@ -0,0 +1,2014 @@ +# -*- coding: utf-8 -*- + +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.0.8.1" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
      (No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

      FooBar *

      * should pop to 'p', not 'b'. +

      FooBar *

      * should pop to 'table', not 'p'. +

      Foo

      Bar *

      * should pop to 'tr', not 'p'. + +

      • *
      • * should pop to 'ul', not the first 'li'. +
    10. ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
      ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

      tag should implicitly close the previous

      tag. + +

      Para1

      Para2 + should be transformed into: +

      Para1

      Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

      tag should _not_ implicitly close the previous +
      tag. + + Alice said:
      Bob said:
      Blah + should NOT be transformed into: + Alice said:
      Bob said:
      Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
      , + but not close a tag in another table. + +
      BlahBlah + should be transformed into: +
      BlahBlah + but, + Blah
      Blah + should NOT be transformed into + Blah
      Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + + + + + + + + +
      +

      + FanFiction Downloader +

      + + +
      +
      + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
      + +
      + Ebook format   +
      + +
      + +
      + + + +
      + + + +
      +
      + +

      + Login and Password +

      +
      + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
      +
      +
      +
      Login
      +
      +
      + +
      +
      Password
      +
      +
      +
      +
      + + +
      + + +
      + +
      +
      + Few things to know, which will make your life substantially easier: +
        +
      1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
      2. +
      3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
      4. +
      5. Paste a URL of the first chapter of the fanfic, not the index page
      6. +
      7. Fics with a single chapter are not supported (you can just copy and paste it)
      8. +
      9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
      10. +
      11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
      12. +
      13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
      14. +
      15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
      16. +
      17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
      18. +
      + Otherwise, just have fun, and if you want to say thank you — use the email above. +
      +
      + Powered by Google App Engine +

      + FanfictionLoader is a web front-end to fanficdownloader
      + Copyright © Roman Kirillov +
      + +
      + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..4ee35c4f --- /dev/null +++ b/index.html @@ -0,0 +1,204 @@ + + + + + Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org to epub and HTML to Stanza, Kindle, Nook, Sony Reader + + + + +
      +

      + FanFiction Downloader +

      + +
      + + +
      + + {{yourfile}} + + + {% if authorized %} + +
      +
      +

      Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier.

      +

      To support new features, such as including story summaries, + the URL you need to use for some sites has changed. See below for example URLs for each site.

      +

      Or see your personal list of previously downloaded fanfics.

      +
      +
      + {{ error_message }} +
      + +
      + +
      +
      Ebook format
      +
      + EPub + HTML + Plain Text +
      +
      + +
      +

      Login and Password

      +
      + + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide + your credentials to download it, otherwise just leave it empty +
      +
      +
      Login
      +
      +
      + +
      +
      Password
      +
      +
      +
      + +
      + +
      + + {% else %} +
      +
      +

      + This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. +

      +

      Login using Google account

      +
      +
      + {% endif %} + +
      +
      +
      fictionalley.org +
      Use the URL of the story's chapter list, such as +
      http://www.fictionalley.org/authors/drt/DA.html. Or the story text URL for + fictionalley.org one-shots, such as +
      http://www.fictionalley.org/authors/drt/JOTP01a.html. +
      fanfiction.net +
      Use the URL of any story chapter, with or without story title such as +
      http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
      http://www.fanfiction.net/s/5192986/5/. +
      fictionpress.com +
      Use the URL of any story chapter, such as +
      http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
      http://www.fictionpress.com/s/2847338/6/. +
      twilighted.net +
      Use the URL of the start of the story, such as +
      http://twilighted.net/viewstory.php?sid=8422. +
      ficwad.com +
      Use the URL of any story chapter, such as +
      http://www.ficwad.com/story/75246. +
      harrypotterfanfiction.com +
      Use the URL of the story's chapter list, such as +
      http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
      potionsandsnitches.net +
      Use the URL of the story's chapter list, such as +
      http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
      mediaminer.org +
      Use the URL of the story's chapter list, such as +
      http://www.mediaminer.org/fanfic/view_st.php/166653. + Or the story URL for one-shots, such as +
      http://www.mediaminer.org/fanfic/view_st.php/167618. +
      + + + A few additional things to know, which will make your life substantially easier: +
        +
      1. + First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
      2. +
      3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
      4. +
      5. + Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, mediaminer.org and twilighted.net. + fanficauthors.net and tthfanfic.org offer native ePub functionality. +
      6. +
      7. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
      8. +
      9. + One-shots, fics with a single chapter, are now supported. +
      10. +
      11. + You can download fanfics and store them for 'later' by just downloading them and visiting recent + downloads section. +
      12. +
      13. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
      14. +
      15. + If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is + too large to save in the database and you need to download it straight away. +
      16. +
      17. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
      18. +
      19. + If you think that something that should work in fact doesn't, drop me a mail + to sigizmund@gmail.com, or, even better, write an email to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
      20. +
      + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
      +
      + Powered by Google App Engine +

      + FanfictionLoader is a web front-end to fanficdownloader
      + Copyright © Roman Kirillov +
      + +
      + + +
      + +
      + + + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..bbed2dff --- /dev/null +++ b/index.yaml @@ -0,0 +1,22 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
      " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "
      ", "
      " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + + + + + {{yourfile}} + + +

      +
      + Hi, {{ nickname }}! These fanfics you've downloaded previously. +
      +
      + +
      + {% for fic in fics %} +

      {{ fic.name }} by {{ fic.author }} ({{ fic.format }})
      {{ fic.url }}

      + {% endfor %} +
      + + + + + +
    + + + + diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 00000000..d5b4d399 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/__init__.pyc b/simplejson/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f01003d4f81d37513d0f8a2a5fb857b8448ae2bd GIT binary patch literal 12071 zcmeHNL37+jc5aXoC52lM??!ES`^Va5uF#&l87#d{Ux!3 zn-|3n?q3p7OB|dN$$7DJUN}^KM;t7P z_xSsL5nU6}&*=IaadGDz>Vo#>kD8f3w86!7@%u|+hsD0O&EIev42oGpIC}l95x%f< zrIyx+#l_FX@3E|_XH|W`RXp2m??ckj2g^eIdi&8s>HRu*9&Cq2oR{*^@Rna)x_EB5coSj#}_YN%Byvr%iNvp!DC;7EE8?)~QTmG#@}@@5f9 z6~#tUrBx&Y>YT*;?9r*L2+zFO@cy?gJgjIku=it zI6O$yKw_vWQQDVVC9RKys3XiN4U*(oP6A929~HHpV;JbA9?3{Cv$KQAFtd$ioXRhc z%Q2d-`?tGtSe1<^-3qfw4jm7%gz{J(#^re0_!dvG>H9Gky|5|@m6pkIM~(yC((!&8 zkK!;$OPQ;J^_GT82GMie3ig%mO7&c&EIY&4m5$SWUR##amIR5s*P>;nyd(&aI#(*H zat-xANW(0m4#PmlVLi9Z*vB|lP;7`Fy|K}1N&LHe7p5`Ev!ayKJ)`|5?KCaejG}6i zYj4*bWtrQRFWg~JxEs>L@7E|l%u>~rYyL-Fx!yT>+Tp(LZX2!JXx&EZ_J-WW@7E}& zRg%=LpPoE*o00MYo5q9tX1w+uiP)p=M&`_o*Y~R2y=ra!<}J7G!=?7?JGgs$P20Wi zX!oKWVi{OuduV?H`aS7N4ITCm)Un=tTvW=8`=ZUYGp)JzNi&a8kxk@wiAC>kJ*qdN zE;p^>Ol~%eL#+kpb%I85+Dcc=8GuNYyJ!st{ z8`R+21;w0HWLzV4KEj*5=_9?0)o==6&jfOlQ&B&Q%x(N&GdP9*(Wn zUq*IYzTb{SY6J(`r$~{gBQFZe&IXU>`#zb1j7QS#*Y*9rOZJ0S^Npw>48JN;gr-K) zu8UKi(D6oxT{oTt`>wUMTDt9o`g&0QZTyAZ@)zxyDZmy>Y$yB_iAQM-mn0mQ>nE-; z+j;<_oc=h=4mPLjG|KnZe!2c^x(_z8K#vfXoH>s*e+|(CPC={w2y-hpZEGKgf_kw5 zox10_)Xj{;cKG@|d^=x8d&oUiy-yyN{pvo(2+o9CLPho6daF(~oY~7=H1kQxT{=iU z>DU~}TDwIMYb75a=juUGWQA9#yzsJ){H1IY#!0i%m?)4F+iWmQl#PrKF|T41LD$iD z4Rgbqf+{ID=htPF=mhd|eQ&^Hzw1I*0} zKSO$}^@JhP6u_e~QbSX{Pn zImHbun2iFO;RRRaXyw!L0$N!E4QW|4!u$CA3qJbK_FN%{Tkvir!S~ADQoIrCiB`{sg2xOP$a6!=E7X^)aNnkh(@~yZy zEC1+2@p4(*e|k-vTox}eHl$ppv7N8}UHlFc-}Xou`VUagwsjdw4hAsn0VoymI*xdT zzm?#61{TtB84N}_8hHWR@nGN|7C4YzXE0dN6%3+a+Z@G-T1nyqMGg)2+5rn8hqpC? zfZ6~ch6oyB3^AD$HBlUvxJ%Z7TR|y6_SjB#L0mQp7&_~?TDhn!KM>;Ms%#-y9n0V`o9<)gl;VvG->!xMNIJT&8OrK31S zV#Cg2TAWjiamz+40qitgN!31*BF_~DFV(&(?1|v%1w{dqSBaYNaI{&*QShWDYBHo8 zP`#(KQ5olx6D;f=%%CzsZY1&L=P8CFqGoQeDCqbjBPUCd{(&A4&}6C(nUncYz3~Kf zs%VmFqf?^11hbTeKu(~|n(#F8*cFond2oc2ep3Z-g$;?Aaz(gL;9?SVG_VUddlDT zC1Qh_Wrmp)aRm1QrC3e-_2OtFCJH&hh`f2d0Jyx!q)FkY*)?_G!TB`8{K!R=hk`JsN_&yRZ~ z?rVJX-_cd~s&mD0owjpr;j+_m-pBuC=lbFWvE+obaMD);`~D3DfTHCr1Ka{=8{-G) zFTfAb-wu9&wG6oX4GIlT`wWE!1KvadFwDTde?$Rj%=GyHg%+g0HxK)^8QnFKE$BOM zKp$=c^x@gYB11pGH9$XrK0rT!K7PnSLj&|-On^SdeY2p?0=hrLVzyrabg<4>0G)*n zb3V=Da&urA#E{vOXZ!2KWi~0oF}H~|HLjhaA9BhZU*72mP19r zQt2=t%t?EMM&#qDHV8qBGT71omI$hO zJ7ORE3JXl4EUbtMBAGy7#Xc@KYnjGDW+r$f&zuo%%I2a#Kg_mSYS;u)WQ`D7xsGCO zhW-VQ3JQ`+&JbF}pMc?|D{Fx2jCfER8b3M3F~Gyp02_%<=CFw+g@4kP=<>IQ4XJz3o7aY3LxcX7!n3JEHD|%d%5jwB8HTX zEc__%c=bX-Ozg8} z(jz|C!@(7t?B`QTUZF>?Se+8yyFvHpouVEQtG|dZqv`w?KAI*Wu3u%c*7z#&$?U5Z z;qxdDzQhT5qGj+@ra-i8u`UoT4}yxX`%wrGH@r;hiKV_*U_?O7)#3*3@tNb zToTz8bOtp81#`q5CURyFTLr#`ss&qRCS$8W;!w3{tITX{l}YEHvsIV^+B($yTj4a> zeCv1r15|z9?^4=;PYQDdI_@(^yGhc_xLdeYvR*c?H*>e(A@;08wi%}3LgX;s(C_ky z?0aG6ukpKY=5|>Uxn9oJ{}8v!lIn*G*6p71x-)KbR1tZ z^&43~xy3qq%(yFxO?cO#37Xx8d7p|GoZz-70r3Kfq2ky+mZD@i0R=d2yGy?Of&v{s z%Z4q%GRZibgf<%Uj&qvbODkk)%YuUonw<&(2nDbNW3Ti^Bjxbuc<~wxe4ytKoKIzW zb!@;?=+y)t{+?e^XpE$B&OgH1;o{$F<>>b#t{c)QqhGI_)ldh)!C*f8yxF2D<%(TK z95fr1(KBBHZN%B}NOwc7)DIrIa(ab_6!m+9=3Ny|TbZOd$NE#7YjVhNLh2|~d``(2 z;}kidI5K+*>!61ZjfWsqRnbeW0C>ip|3YD&g8S+COF7<$Uc*!7Q@h6Y3+656+B|Nj zxQZvvh_L?;(VcL{6%4Nb0T-g}Iyao_j^Qbnk))mdJoMf}6MjbD?;|Aj`;2Y+eVhMB znMg;!4+o8F;##<_kZ~_;mDK<*o7*x3R>d^E{VRGF%eaSL37V2UP9XP4Nj;iqufa$j zNvlbN??bUIMMVdWXMnUth%ajb-P5 E0OpUxq5uE@ literal 0 HcmV?d00001 diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 00000000..23b5f4a6 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,2329 @@ +#include "Python.h" +#include "structmember.h" +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#ifndef Py_IS_FINITE +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *skipkeys; + int fast_encode; + int allow_nan; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {NULL} +}; + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +void init_speedups(void); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *const); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()); + return 1; + return 0; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t max_output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + max_output_size = 2 + (input_chars * MAX_EXPANSION); + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + Py_ssize_t new_output_size = output_size * 2; + /* This is an upper bound */ + if (new_output_size > max_output_size) { + new_output_size = max_output_size; + } + /* Make sure that the output size changed before resizing */ + if (new_output_size != output_size) { + output_size = new_output_size; + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (!S_CHAR(c)) { + /* If we have to escape something, scan the string for unicode */ + Py_ssize_t j; + for (j = i; j < input_chars; j++) { + c = (Py_UNICODE)(unsigned char)input_str[j]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + } + break; + } + } + + if (i == input_chars) { + /* Input is already ASCII */ + output_size = 2 + input_chars; + } + else { + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + } + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + output[0] = '"'; + + /* We know that everything up to i is ASCII already */ + chars = i + 1; + memcpy(&output[1], input_str, i); + + for (; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use the Python function simplejson.decoder.errmsg to raise a nice + looking ValueError exception */ + static PyObject *errmsg_fn = NULL; + PyObject *pymsg; + if (errmsg_fn == NULL) { + PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); + if (decoder == NULL) + return; + errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + Py_DECREF(decoder); + if (errmsg_fn == NULL) + return; + } + pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (pymsg) { + PyErr_SetObject(PyExc_ValueError, pymsg); + Py_DECREF(pymsg); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + if (c > 0x7f) { + has_unicode = 1; + } + if (has_unicode) { + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + chunk = PyUnicode_FromUnicode(&buf[end], next - end); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_DECREF(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } + else if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return 0; +} + +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = PyString_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += PyString_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromUnicode(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromString(numstr, NULL); + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_str(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_unicode(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } + else if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + /* PyString_AS_STRING is used on encoding */ + s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == Py_None) { + Py_DECREF(Py_None); + s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + } + else if (PyUnicode_Check(s->encoding)) { + PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); + Py_DECREF(s->encoding); + s->encoding = tmp; + } + if (s->encoding == NULL || !PyString_Check(s->encoding)) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->sort_keys = NULL; + s->skipkeys = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + + PyEncoderObject *s; + PyObject *allow_nan; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, + &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + return -1; + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->sort_keys); + Py_INCREF(s->skipkeys); + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + PyObject *rval; + Py_ssize_t indent_level; + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + rval = PyList_New(0); + if (rval == NULL) + return NULL; + if (encoder_listencode_obj(s, rval, obj, indent_level)) { + Py_DECREF(rval); + return NULL; + } + return rval; +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = PyString_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = PyString_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = PyString_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (i > 0) { + return PyString_FromString("Infinity"); + } + else if (i < 0) { + return PyString_FromString("-Infinity"); + } + else { + return PyString_FromString("NaN"); + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_list_append(PyObject *lst, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = PyList_Append(lst, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + PyObject *newobj; + int rv; + + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr == NULL) + return -1; + return _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + return encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + return encoder_listencode_dict(s, rval, obj, indent_level); + } + else { + PyObject *ident = NULL; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + return -1; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + return -1; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + return -1; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + return -1; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + return -1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + return -1; + } + Py_XDECREF(ident); + } + return rv; + } +} + +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term, rval is a PyList */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *key, *value; + Py_ssize_t pos; + int skipkeys; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = PyString_InternFromString("{"); + close_dict = PyString_InternFromString("}"); + empty_dict = PyString_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return PyList_Append(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (PyList_Append(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + /* TODO: C speedup not implemented for sort_keys */ + + pos = 0; + skipkeys = PyObject_IsTrue(s->skipkeys); + idx = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + PyObject *encoded; + + if (PyString_Check(key) || PyUnicode_Check(key)) { + Py_INCREF(key); + kstr = key; + } + else if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); + if (kstr == NULL) + goto bail; + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + if (kstr == NULL) + goto bail; + } + else if (key == Py_True || key == Py_False || key == Py_None) { + kstr = _encoded_const(key); + if (kstr == NULL) + goto bail; + } + else if (skipkeys) { + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_ValueError, "keys must be a string"); + goto bail; + } + + if (idx) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyList_Append(rval, encoded)) { + Py_DECREF(encoded); + goto bail; + } + Py_DECREF(encoded); + if (PyList_Append(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + idx += 1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term, rval is a PyList */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *s_fast = NULL; + Py_ssize_t num_items; + PyObject **seq_items; + Py_ssize_t i; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = PyString_InternFromString("["); + close_array = PyString_InternFromString("]"); + empty_array = PyString_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + if (s_fast == NULL) + return -1; + num_items = PySequence_Fast_GET_SIZE(s_fast); + if (num_items == 0) { + Py_DECREF(s_fast); + return PyList_Append(rval, empty_array); + } + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + seq_items = PySequence_Fast_ITEMS(s_fast); + if (PyList_Append(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + for (i = 0; i < num_items; i++) { + PyObject *obj = seq_items[i]; + if (i) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_array)) + goto bail; + Py_DECREF(s_fast); + return 0; + +bail: + Py_XDECREF(ident); + Py_DECREF(s_fast); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->sort_keys); + Py_VISIT(s->skipkeys); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->skipkeys); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +void +init_speedups(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); +} diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 00000000..b769ea48 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/decoder.pyc b/simplejson/decoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ae9b3591ee9c6400d5cd09eb0a05999ef680bdc GIT binary patch literal 11292 zcmcIq&2tmkcE2sjACi&37_iM}!weOf0h^grg(*VjJp*RO4@U6XFwFBak=p8(-B{|D zyIWwr$eTbG*{8C4o2;@-Ro*I9S){Vde~_xAD%-5H%x3cYo!i}#{XArtI09eY&vVZ` z=iKvk^}GKnmp=LJZ`T?s`IPbfbNogBXei|K(wGQ9BBIomM%g zsO}crSyW1_%%C!%?rxlw9p;pSt)|pl;4v>toH8a%Vx}{N$}}rnGRq2~eZplA;DT$P?H3UqU?swTMdF{uF3{A%N4Zo6Ljgj~FYVy)e2 zxoy{v?br)^GiaK>SbtJ|;5Gus4NKnl_*5(4wu(=oXxqDPEo#`l?}oa!L_p|>;?VQA z;|Vl}0Epz|(Km*_`<&WE0Xgwd=G0q59RM<#;7(q>%B$d(GWr-RC=>F<|0C)z@@hf% zT|U$|hqb<;j5x2n0)+sRz^%=kIsn|9>Km$$GN&e*c^sK2RAUq6?S~J)xHa@ad#H8m z762->rtLSq#v6BQIr7NLS5e?EYplYhPA{GUj9Rl5*l{BOldw-TgRY2tQ^u4rZQL^E zjRwzEz#k7Ld|F2Z^dz6;R)d&tag+0Ej+E8x%Bi3Pc7fP=<)4!pmtg+Tl6ef04SCh1 z;Bk{`M1d*+w#>$HxqZyN8$s9C)PguU?!L?y(l|jwzB@rA)N6UZ>j%lAmLHrj-6$>G zq7k&tqnglb!6+N0+=9{IG01F6hR|ytl^7%wnM=2d3=!+kmbMrmx%$r;+T(Zvcpu)w#cPpfUu%#VsTeE0br_&zScdU=tb0+O+ zwqv;=7)iKC@}UJxPn?0goo*NcuTM~mCQ+!|8KayZg}k3Z84>rJYq&nX7x!YKk~Mfo zt9CUa>XoJIKU=K)`s(K9=Kif)H#he`Yrb6UQi88SbD@;_rpWZB)R(1yS?c3b-;(+% zsW+uwlDaSTveZMV7o{FYybRsAdGm_~4??0+wTh_y9N6&&&Wlvx24V>=B_(KECBg!Q zdH^wzt2hrS+W+qyg-rwu(s@u+{bCB?f~ZjN6r#o%JqYVKq5le1X(r{!7;2OlVKOq_)Dg+L}Q~!zXDTobN zK?9~>5%&c+Ptby$Jvc=<1Csqy=`m6y_pTU5AHXMww6XVxvRiL5nYM6HRxkWdRRN+$ z@8Ys}7aG>Vf$vh2n2s90sky`LJ>5635^Tj)KDu zsJ0|4q_Et$8~T`Q#$EU$38fNToaEqg1TG(@=3pnGq(B=`6J|nFbO{ayct>mZ+y}aC z&$mh2lz${OTu~6hGC7g6H)Z--LIrq2hfii+1IJ_0yj^jR(wUo@_IFHZK=>*&dtwu^ z@$Pr{BpmXlIY<2~itqo8#rK%~G5QYA-qdTNF1WT0%*~Ds=Z^kHL^o)&>rvCgIf7eE zFU?c`j)GQzzO$-U#LUj28gs)3s}EP6KYmtQt=@m~V6FOmr&K z1z+ZDhe5X!$s8~8#5ZiX(Bs(0_vrr1&>Fok5iqfkkKCSakQAZY3B~xg5I^M6;rX#e zJ1Q&N_I?UTkO$D6P&|iMf#+AmPY_OOl(Z{=eHlh@0r?5fnKY?w>94j7GWD(LvU zf`JqC&lys8dJwn3>uA8N%WPOF2>M&B$I7GY#i#%V0z_SKGiK~6RC`}n{U;Yq&vQOGBg2ROmu*XPY} z`pP(hQ}36_oP~r=cd=E5v0k1IT^z!K)E*||-rs1*FZ|PRRe&{oo-uJ%#0BXsFd|8? z(ZTX5o>sd^;LyND(&Pl*hf&v_4EzLe1L>09y%cZF@&x-ocn^xnVG_SkQa=fzN$O`@ zd`5A_1{BC7K^(>MHW>s4U93jDDF;Cd)9I<#I2#}sEUCU<0mHVAfmBY-S4;! zF^nCg5RjednZDh2WyT#bYJ(YfV6IzUo6bN)8#5RQwq5}W0$nIjD(@RdR^Aw_tWcwi zZnv(pM(ZP5^jX+-EiSf}*hST{9tWK@x*kkISwy$OZqfQnx2)T!TMS(#e1x;)*8+G# zl5Nr$)pmklR~v~pqPuHvBI}R1Oza)40?llcDS6TORPwuSPmBf}q^^6y%ANL??%orj z+Fzj+?W4%6qA{AEfzB;M51+yJENUk8yc#pk7Z9H!Mm2M#T;7;L?~HK`T6zSn1$;2XoBhJrE{ zU{GwdGDRj*w1xs20}WMR(g^k;nVc$Dt)XC1g9L~P{pJwRs8_)p&_dJA0qhPKPRew?=PY1weHrc9v#MaEM7nz1GMM{ zI#pUX$kly$?A5j&Vb*&tHJt%{sTWy$%;Fms*&UZ#U7?s+RTfWJ{E~&mVsI&mkKDS* zdd88vEWSl?!ncrl9^S&0FtmIBfkt!>gYx;Eae)2 zA|KL(AfpE;T)ade+(ijr1WXfI~(x*y^F)w7aeNjt21Q&BLFgZH|5_TlFn} z`bj<_ZT~cweix~f?9(@^NLFV}seA~YBFbwqavxCucH?b_FDXuB*6YX;*XuV>Npr_+ zF+o$WXSwBi-LxZtfs}_AA~7F^Zqx~U#=4j&jKL(W3~!ikb`UBvGZS><$a5q&qi?Bfdh}-}lO{DA$yzLN^s0m-IUPq!zB6*w&i3CZ~ zAX~+ZPdZ|DBPPTGin<$iLKo!qd!PocSG_LOBA>KUh!R!jP;13IAWfREFe{QJAJS%v zOQs24?bwm&2N@i<*@WW?(jv>1sla+Yxw}9!*(jkdx2|UCy9RPKNF%g*X5gbpkH7;V zxhGjMGwQ;rj&KU323OYaudSJHf^hdnhJe-@;AztgK!#p92gIvgK`dL4{-Mqa^RCrT;95r4d< zR5!X&9JDh)j*FL0582P(B*v=ZE^RMSLO|M15XOS*P3!4>v(W;b%pFJ$F3{p3;&f~< zK|gr@c;aZ@WSAT#j#isM14l?Gv}E4PjAtT_k%{!IhZ_tz0nQmLSzUp8uVE92=5Yi8 zVL)>&6^@OU_+AGT4IvFUi%#rP9+{Vxo`tJ)*iez-Tj8!kzZWk`g%!TT9~Un0c|b_hT4S+2k+u;=sH8fo@9b0IDQJ%D`Bh< zK?38Un_zv;Vfw+k6NHnLXzl)!>iV;l>a%rOQLAmcF@pYftp+`xPEM&O0l4*okoZ#& z4M}Sb3qJ(nGg#UyUYCUULsK>B7z5~-Kz@)R34zFMHFb>7V_2IkUb3jOVBCMg5dkw3 zwHn`jYqcu~fg`?Ct4l@XTBjwq${PPQm?2>DfQD9#?4O|mlZUJ&U%6oZU{_I*hfYMY zZ)p<^#;kdD023>t2=uu92$O$m4k;^{)z=Lfr@^E zLcg%-EF1q}A(4zJ_ zD3Ow1@tl3o0Jt#FTir*LVN!MJU&gR6wBOXUy42y~XXqOQi89YZl#~R7j1L7a1WM+g zEXOVK?-%4WQBxz=CI5ZRkRxPFX4EB2{u-6ug#n+z#v~IVTz%nRM!XH?ArpbOZhXK` z$U?#@$y$ zJN9cRI|`HU23D-lpJzf>%;Ujj5?O_+Bu69@S*tMz3w2Aetkv3q(`~Uyajn%r@mfu?p*qik z)PjCmd-3%eemVAZ<^HPZ{5F2vwId>Kam&Feq;?WNBeN*P`Sd8G7e;eMW41CiA-^NE zc8M2fgswdQSv*b5XUf5E(w3OD#jgfM|Hss~Lt`~Ku&#sG$brph3_Xd7Wx)>>;RDL! zqmU$_5!~E_Uf-@-_nxe;ikbTcFQRLY*Q%@XVEEw4vz5n>g;Wn8uY9#`@kenMACP7E zX%NALOSS55qbk2~(yyg|%1Pu#BWQQLmQFdZKeO=AdjIR^)km`1y_Nfq)*r8|e{GTA znpbVszGBfrA?9?zwHhFZCDn$LdM7cYq~iYr|Dtigy{pCROHoL7(J_jbKm5y0> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/encoder.pyc b/simplejson/encoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e59d372a7ab88749c287a7a2a77dfad41ebd616b GIT binary patch literal 13938 zcmcgz%WoVu6UVGPCtt?yCBhuPSIZfHJC2EJw=^C=d zVfU!JMb#3`xaN zKxe3_sjlZ&Rp0mf9@YBQzmHU2|K^)Jb(MZjryf^SIHD|k&>9s%L0O-t?Mu>rKs^|IUi(4QUR%m9E3Z#| zjp-^Xee9RVq3m%$9*5PcQYCLt6}+;#Qt~Ru3^6l|%m_21$c!;Fj?6J;jw5q|nF(Z0 zsw)ph)aF}Ck1B7HW8Qf5ntCv%^oa7ul|H8Q7)!k=W*U~7{2EJo@^9+CKCZk|Lc|HB zC)9(J$~&!|VY4QccSbVkQc^AKj-FBvipqOQ=~EoSdzsZ=EA_>L(@LLF8Wk@q{fg4x zQy5jhsjS#LGY=BjivMeT=BqeFl}sVgQeInjV;v`vAFR~exbFL=!7v)EyKA~;T1D|G6g*tq zym)bLar3Rl;^Oq;@?v8#Sd12vNAVacs}E85X#3$W7VWv;dNdOcnGToOq0x907x28e z7<2kX^Dv$8VcI+_OUltJd}MK!^Hg@xc&W$?`$Rm$iAh2 z%uk(DaOx7i*YSacf#GCtmF18)Ll(Ezkd?^3 zw3kgeR~S3o?IWg$yLEs)r#TysQI*Pt{9L&y*IBcK#OyHG3I`d^C@U!_zbFTbO)sg( zWfh)K77w8>J%k;l0!tB}QidohbWi$~KhWU`9M+UqBwKK{_!^RoEdu-6!u30M=E9)4 z^+GHXbEtOGn7brP;S;~YEPsp)i4;b~aP}fExl1SEa``SY7(|flrfqyWZcBA@g!iCOODgBW zJ`VR)WJ=cjU5L~f%+T6O;rsyTzlqwIt5c;ntP@swJRy8{X^%SV_~Ds+h;O`zL_@(r zvXFvxpq{}-qUfRfb$|8`qBbc1P>W1SJu6e|gZZAJBUF38WT4v7azH)9I77-YycWZ1 zP&YRd9mM`}QyU4pvYI5V%NH&X$Ige*%2hiBGrJK+8610V;u4eFb>O#-&l_M~Xt!Em zwb)KpHCh8MH=y|JaQU&Wqtp#NJK1XK*k0bUJ>78I&E&G_`R3fg&;GJE1^l-&5~vB% z&$gjFd$hNu<(uyv{Om8w#`OGpl6RNPc1m_unvQvp%(>^$+4SRN)=t{c3F#o)wi`w6 zRyJft=L?L9EuXd9flod+!)&LjF&p&7>GT7T*$jh~S-a5;UF^fQ9kkb%b@YE9^Ip`> z7M%{VZG&67%3hq#TW&M%8UW*N+l`*G7ax<*k*(7Fa_!A#r|d<>6t=q;+L|H-USIVg z3L(szdbkq!UurvqU__Zb%-V5#x!!bRBN<{m(7d!=8E0wfd@3W=nWZIrUBeFY>)M(z0X~hZcz?g`sSiE56(GzjPteGLn%$5#Oby5M%ibyA{SF^RtGg4If+UCw9CV zwws<^cd-Q&;<(+?kgU10vBTbJZXG7NPerYf(X41A~2rVt6M^COMB^|rAxp90qn+SA<-0GS%n?8 zlU7?6veI@VH%PPwtB3Y7227%S_6+n5!-AcBI7I7QR|a48RdlZ>Fs^MK*Ikm(L;rpf zIy;%sumOvl^JXKOIeQmUU)A-sTHTN8?WP;`EG4iB(ebQJ^v4hNQ@tH0;d-j_yhB!F zW|UE}Zn8%{cBC6R!tp`Dd2PCUowH2Bd?>g$PesbX5AYlUoCgu29mVu2s~f964rwD5 z!!~5g4eYy5bp&kQ5CMaD@sRkrOFW_M&Vo72@d{;j0_QTe#p&qX4hVL*Q426;_n`*% zygNT@-!b%TnXQ=19>Mdn>U>&b5O}s5%bYm2dxW}Wz3M*ou_MTnS1C7k+|qR)4w69( zut2NngK1{-!Ilj()gg2^4epGv%z7AO1ox@CP>=jpV$+fKGE>ALrZM*-2%p)Fp8bHV zW$fYFpSjJpK__ymA>VA5XuSR63mc}E$#2g1fdFF4{c!%tG&nIg^0CwVcPPdXAQyHH z#`&qy1lScEXQSN|u8H&t3@17B1Q7yJ6o%vT)rjDmP0S#jGsb}DfNamrgdBS|_{Oz8 zcWW*Q=kUY5^7pOD4?J!yXmD{h!CatMa3W6u`Vin14Nwt;lG(&a$*IsL=wY@zRVC}1 z=0Uz}8$6vPTXQWiJ(n#9Nb2)J(&?M@nZYjC$ooQ&lrWZZ(d?mkda(X=l-PmZX!-%I z%j|qbU1JSYO~N3u1u)%uBL}@KpK_q~0Du}B<^tEjxJJ`q8(q6fOswUVvplrU<4iui z*mA<{IHve6=&&{wlv{!U;kVfupK2$Hk_Ey-=C^F=3aa0-f+>G#S(+7CE zzzP5a7^a1A!gH6IY9({pjdmc63W8uJfHLw=vx#FOc%?9%(qoT*o@WlwqzT+VCw`%wFUZfNwz(8gUhKS#gK$?t%2KJ%tZCcd3ik z`6}HxaZxj!D?Vtq0!v2Gq*oUubW3wQmM6l7Adk4SMns&G9Gu;IxilF;Q4ga^s41mW zsL57R6<43Ga;%#Tht8_=5|fviyuyS5PUlr5;(+I(5Ie82g2ADq4GRc;wHno5tp-Oo z4G)|zTb;iVYtkCQU!4XdzeRknvn^MEXq1WBATk5{Z3;iXgB(M)Y!|a-K`Ir6Da|wP{R_k#U&72DfE$O=#z)a62}Q$`&BrKK_y&^+Jz^-jVHKG5)fU; zTEVFj(F1sVZCQwSk_x0l3P6Y;Cj!Q;adaGFDJLnKxNuSVBWY0@{rxj6$?3TiFbKa- z!_#8O?*@8Z8jOkrOGikuO2BsHUV`9DWH1YHj@tAgj?id~yjpLse;ijP=ZRz-RYMQgAi$W0K1Xjgw-SRc!Hrj>nX!tA#)qR#G zd^R{$ev`=AB`RH-XTPeTwiD)oA+=AOI!sz!_E!u!hp3VDtvIZ&>E!%IgdSBHZ00aY z5)L7k@GF-!bI1@(1Y)rr*$tCi#Slr%4HLEtO$bnOnKv&cfQX-wH%gf)6X~GoR0|`2 zCA)xie#i#+9^u8`ASW@x5&aYLa)+uQ2PdpLv4iJID3^5-HLwEwBTIwjVbXw8gP#aH zf(L=sQVi}!@w6!ho~OkS5EKugSOQ?&3G7Mh47d>YTqQA;WP?&WNszk_+eJ#|<(dT{ z0I|w4w?8hUDh~q0+@ZU$xPiQ|tyd7>mtgTzy4K*SG(IH?H23l^+eUp7&^kW2!;KcU z7OWBjR3NQJUKL1#Vb~CJwgyL)d2UpKxaxOsMkWN1ME{H_!Zt!?Kl6Pt+`yW_~2_FXbZEOm0scYc6J*|c>xT7%=|{FwDuSxUsMb@jyK(Q&#% zA^kgi<2N{NrD9bI{n$aK61Ue{vF5PWT9xC#P66jU*#>GY)TE`sC8Yd$%fTx z>!H_f@{y~n)x5A?tC@}9mI$B7O@MPAk4A@bTCtX#cbFqN9X^rVQQYJ^G!$>)8&ie% zzua$`zp{1mQhB0$rq0(s_+>i+Sp2U@cJZ&Ec8hAaq;`kYZok?cP`iU_x2$$6YPV1A zj;P&HwR=qM9#^|3)b51ZJ*jpl)$Wwq#Vag$)x}c3tJLqI3#zQH(a<*^4YQa5F~x8; z=6Khz-D94jo5V3O@BctvPTSaX#=*NuMH%Om+B*d-p^(mtnM#50$Myr)KxRsU309vrkvHJ6IyDolA6_`4>9eD-f7c@VtHpxu;3obia3h2-Ep;t z7q0Twdswlt_@QlZiSjmvqFgvgi$`Kr2-uid3dOq#L!26*hS)t!^#nSL zA=WS%Xm8c&`LXXB5~>1%6x{5QEbL zsvoV4QaXX!&fg?VGMHtBKDa3+H)gRlTU4O1`tU2TMqbNd?KYU;P*@wz*qg8hArZSP zg|*=v*6>Q>eprJ{bzlu_56jKqzzZ;Ln3X8yk0m1>18mI?XAGRmn zxD5UR%QR)^g)(9RND;6>m*9^uBsv&KqiG8d3B^06ctVhl#qfPl+>l}}9H7Z!00@fj z=93hNyD3<4s>!aY@Kze&J_JHP`4<(u1nU>jftR}ky`WsCypb$Y2nZKUpAgHrAG4MS zHNYw%99ilrE(}XHi^V%%tOwq+3Nu*9r<9h3(!4ZXElUOZOj|%CSH)7|8b?Sc=w1}3 z!J@jRNgJ8%Wi__117&$~1V{FF`7dO)x}cl`<#-Tb*D| zq-mejQKa@uod}$WV@J3K(XN(W3jV*!)(uSjCzLG)AQ<*KR0O#(OJ%Q#&NWkyAav9A!X!83`|n^7BP$#5&1twW-nStC7(@AFvmX$9bO# zjdSM|61kNy!DxxuJf8)={q$wa3=3f7qjO*61g@A^*Hv07@jWE{)-WPu6L4RK7;)rp mVGM#ZW|bfaoxh>Ni0Ut#Ee>1#g>t!kdZaM<*Njmf_V?dj3=-%7 literal 0 HcmV?d00001 diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 00000000..adbc6ec9 --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/scanner.pyc b/simplejson/scanner.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30d94445f0a0c941ee46b6c4fa3bd255e662f6ef GIT binary patch literal 2340 zcmb_dUvC>l5T8B&cj6S1v}wr$X-SKd%5~uhP(>h$k&IGTvRgn&D(iB$PA@s%o$fXz z5y=AtVOzZO)Mvf`?|cM43}2x9X3kDsMZDG4&dtuw@3*rvb9;aNTPd%;dewAj{43%4 z6-F|IaEW#x6}c82DcVtVx+v2O9a-dOXeUeR``{L3b&d|p6jnX8C{7O5ZHEFAz? zAg#zNlA9ByB(hAKY@MOa3jk)x&C{>gutGXZ5r}n#b~3zmr&{2M79hUuJZY_%@JI(M ziDL(Wj?3O_{909oRWl3Gw~uspyx7K^k~N5GZKJyJ#ly4RPimh(-*ea3)~b6C_T2kx z8`WLic)nY^|9nHH4ioX1!N_1FzeAi6c|@aBQ8X%x#iCJ>OoqHHjk03N(I_WMo<=26 z3N*4rDbh%ZQle2_lroJnG|16F(ZHfXi3VjFWN83no(4pN0u74Po8grIhTRJ^EFc*c z;%PZ7ix_-l&P?(LET?l!e5UBuxkXYKLsNw@ihfaPVa_aOJ+vrXCN-4f0ET2Qq42{D zU1X^fye7qd8Sz_%UvW&&em#p)*I|i5OL5o+BD{!IPQGzefF z6A7CA^0ai@Er@-d6dG`B1h^A~DXjQEu+jvEl1#%sOJU`!uo>QM_7a9@5dw^|o5F#m zm@p#hbC9APku$HIr9^j}vIAKFOyA@i3eWG3{ zIxkRwH)e>o$Wl5#EAS>B(n%yi{GG-=Cow+6Js|1g*nk3Pu^1&KZ#*i zWPmpauP=+>*!RU|WNPALEz=&74Hp(Y+fOta8&f7~dHkB9=1}c)nG}o)>uL zR9tHw-){+v+GIg47gH8jSD!TEIE+mN(~b$FrqQu&yfBbpT4A6?dCF07DBnmZd1wc5 zcpN1Xg$~@R?9cYZ#9nY9cF#SLkOF;ToELU1A@vPkZeC#YfsTc|7u!zCa}voj)=8Cb zBLVBc30-F7Lqv9*=q|v9*V9?g4{c*6TRYQBb{yNM<4Y0|bc5smJ~m}+xPPb}(|r+! zM`rGl%L#+T*r4ZICZ$guC0}yOcEiBYQ|sw@tMI2}1ET&c(Q#5g@?y{T!9(P#W zX0gKJ9*esW>9>Z5Q%*a@zrvmZWcT(e3tKW%O|TMg;kttU^v-rjDngP~o6NYe`? n)!s$2k|Nk1^+WgA*I)9HlN%6uHH!vYlm;tVdFUTlrBD9@kb}!4 literal 0 HcmV?d00001 diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 00000000..17c97963 --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,23 @@ +import unittest +import doctest + +def additional_tests(): + import simplejson + import simplejson.encoder + import simplejson.decoder + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + +def main(): + suite = additional_tests() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 00000000..af6463d6 --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 00000000..1cd701d4 --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,22 @@ +import decimal +from unittest import TestCase + +import simplejson as json + +class TestDecode(TestCase): + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assert_(isinstance(rval, decimal.Decimal)) + self.assertEquals(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assert_(isinstance(rval, float)) + self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"}) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 00000000..139e42bf --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEquals( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 00000000..4de37cf4 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,21 @@ +from unittest import TestCase +from cStringIO import StringIO + +import simplejson as json + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEquals(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEquals(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEquals(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEquals(json.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 00000000..7128495f --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 00000000..002eea08 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# Fri Dec 30 18:57:26 2005 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '["Illegal backslash escape: \\\'"]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 00000000..1a2b98a2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,15 @@ +import math +from unittest import TestCase + +import simplejson as json + +class TestFloat(TestCase): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: + self.assertEquals(float(json.dumps(num)), num) + self.assertEquals(json.loads(json.dumps(num)), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEquals(json.dumps(num), str(num)) + self.assertEquals(int(json.dumps(num)), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 00000000..66e19b9e --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,41 @@ +from unittest import TestCase + +import simplejson as json +import textwrap + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 00000000..c3d6302d --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E666, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ], + "compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066 + + +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) + try: + json.dumps(res, allow_nan=False) + except ValueError: + pass + else: + self.fail("23456789012E666 should be out of range") diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 00000000..de4ee00b --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 00000000..f591aba9 --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 00000000..97422a66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(x) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 00000000..b08dec71 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,111 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson as json +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, "xx\xff") diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 00000000..8fa0dac6 --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 00000000..6f4384a5 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,64 @@ +from unittest import TestCase + +import simplejson as json + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEquals(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEquals(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEquals(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEquals(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEquals(j, u'"%s"' % (u,)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEquals(j, u'["%s"]' % (u,)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') + self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEquals(json.loads('"' + u + '"'), u) + self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u%04x"' % (i,) + self.assertEquals(json.loads(s), u) + + def test_default_encoding(self): + self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEquals(type(json.loads(u'""')), unicode) + self.assertEquals(type(json.loads(u'"a"')), unicode) + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 00000000..90443317 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,37 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/static/ajax-loader.gif b/static/ajax-loader.gif new file mode 100644 index 0000000000000000000000000000000000000000..f16ebf7cbd4f28620c0daba2f4a36ae0196b3d4c GIT binary patch literal 10819 zcmb`NXHZjX->;L91QJksO9Fy4X^NnNiVC_BuprxlbVKhX^w84?z4u7CGf5evn_!Tr3?d(NECJ2Pu0AJ)uTvu54bx_-a^t*&`j>8i;TfD`Z)060EAmb_Eg z)wf#RL@#+W)ka%x?pW*}*_fIC+&j2FF}pLjxVO0SWp(}A+xH6{y(=4A-?w-72Szu) zd^`O1{b+b%YyaTK$DxhUsqNjpgDLv%hfiy@)VYJh9~E^o`Bf9b$IM!4PoLaT)mD=~ zFUJ4`006?j5qF#|Ok6F@g*p)Y6Z7*nj+PjJ@F5rmKRHY0xC_AA8$W@FCrO7u?zADA-VZ`x?tC|{EeZ$+PS}L}+>0Lu3YulS{ zT4!^6L+^W9RZs87mhra9j_Iz!%2vK2CI%TW6b5HzImN~dVJ!O2!OQxY?dQ)g-}ZOc zHbx@}1$YJ+?Kz}{t&;P~OkgsY^BS?Vr*pM7Zz+VGy zGfn8J#m3_yoX+b#a zDpjI8oCMB@qx33`Xw_VU0cm=BragGsS+$bevLq%hnO&KCcBH=3mDfvuPp-Rdj{6&R zHa8Kz$xJs|?B}2IQQUsp?)jr!!0=#iu`;qPU^}gDYqibg#q;S@SNLF|EZL!#x_2db)8GULs#w(5+~toZv=1YdVas3W&uFU4AzBgx@vEBz#;l z-pu9tZ)?LyrFGoG)7w(&W-(&|Hk`4;RUNspd=I*5q2ess%;1CmgIS<4v~VcVWaPQH zANrnVeu}B%aK0|an0w{9^-)|O9DS)D9?uwbcai9O^ScC@pw(ezOGyY?wF-n!Sm#Ke zZy3)y(6~018z!}0`0yn?_&rY+<>YT(f~^#{5m)wlacRx^dP!x6)JAFbi0ww11o>n& z{9EsPXXod0%vV-{ohI1IX%fgQW4U^o;-lR1D2o$i=iuqZ>VxzPB9Jhr87P6lwqYLL zzAmxh0lsOW7;-|8F98?s9psNE+IvPo(qi06Ws!-wc9~e{KMO$+^=Bn?AdpZ9E6B?F z1A=K_u-m%NuW@h!26eF-W1SeBa_##tKQP?qva-5}9POT49d`BGnqS=b_~meVq;S`! zgb_=kIvvv?GI&6n9GF3ujY-D@NP@D-pFf91Cy^u1oI&?!IJb}(xK)nDnXbl&f}s>; z;#;8si@QnEHk&J3gSNcYVjbRBWRIc( zvKzVb5N~!MFj(-4jL_9=*-X>+JQKFctTLloj)kA> z9G&XPDujY_f!g_$c1eN~62X^~Q5s<^MdWux&z|6ZtNq_x%Nv)pIs% z<@aqc7|CWG#B)lO27spFDxV{ zdYr!DyZ`O|uSUbIfsI>M$}+#ezuJX`W_;eY)V^^4y_63+2J*_W0U% zepJ^$z`V=Il-pRMH-O)ERX<>)`;978*j-52{u7^zX^6D`^EA)}O5Ge(0N%+LbVWZ3 z7{O(%Xc&Ih8OP#|MC*!1{kR+_8YV)k6N#2`rwV(Ym*O{JWIvY#0x~KwnK+*jILrpH zgsEGa3u@KiQ+VRWO+tOg#nRJmpN$nKR4-XirTUoGoH4@A*S|=2kmW7Na^0?<&XIR7 zOh~8kdrdpif%1ohd7+!_eVX}T-u z5(Z#qmf?p!n&=XXnBierie$(vu4-qu#{&x`ScFc29Zax*vO;;4wG^3Ly>pi2dQBUe zbn0P&ztrG*zPEG!kCk45uPjJ2c}{EBa&mFKYwyEbh)Y9wN5z&wFSW8F5PKN_dY3b9 z;`7C$R?X?Lu(C_}IaUiLI}_}`M=#WHpaA=gENGdp>e$%g{UTdV>#4Ev@4p1+=Eh|Y zro1mdCCr%)mL1?cSM=v5Lq9Hsj%wJ?9j;^ro=bC<80f*9MQpkSM;v zL4@!y?`T^T#xa7`-zO=YObN=sCp&rsK!P*uQ*+WYZ3q>q-g%jJaSkZK)98~=ryUBy z2k%0_Pmlq!a)q!m4GqDWdi$e#2Zx7ykyeufGs7^bb-%U6(h%L%cXMWRWg-Z<_=RaD z#O82mXO+I86OmbWX%C*m%p0DiNZOz0kJ1LV0C>PqrE_SsDg%>{>IpYgh~2Rqg2z5A zl6$I}Fc>=JX*Y)`iIH17<*mx=<%c2iX8DHQdzM=1%E1)FU8;FN(#6;jGWN^Mz6`Zg z&<)Aqf{`qahF9kBr`Ecq!X-Ie#WAW3V6YsIbQW-t+~{$LdQAb4@DSW*OFC&20rFy& z2lu6B?cSk=GAiD6KiHjZ_Y_mQlBneTpfipWpo2P4u;}B|Sh(zvl(O1bEWwtawu#vs zDoxa*pm3GTgXO|jcs36RP#Groh?);nb#dXA(O`Z1h~(A`%ekL<(?k`t@zKbxR*vAa ze{vyYUvs+mw~hW<#_t);DWKTB-sc_>zRKTrezuR!80~y^%k=lqqYE7S#8-7r8!^k* zu@c*hu!cZ3ZM2|$xRF&72sAaU>RIVf`2g2N9%~6ACNs@m=ChA#32CY?oRJsOQZWma zc%1tn;!=!N3wm^FY!rGnMkz1+0{;Ffh|Fz;WC!gr9iD1 zMbiLDHYx$kZmpCK5pN38^Ko=!}~{zBHD+P|(rL2v)lL1JiZcvF1V8IYn36ul$2* zPXmQA!6NM!`ns)A&3bQWMdGq~84qeH)gynuzVDvLTL$P*=d`W^bhACum14kqbeDh~ zp_|ifzbq4R#1?r$S+!ur`v(`>1hn)7e!bfAk@|=bIq{*e2U%I=tP_;ClNTT5apGYplxKjaubZEPcYqHo($AKNbHXsNM!Sbn zpuPlK5683&f*%7Glan49m06IMkdz#S^YTh8!&k>)Q5LLLGDaR42)y$IEGJ~K4n6_P zAQODB*FVbU)6jIUwO{nq^e7bh(SKoaEM{_GXoP8VZhQJOYKy0duT_T$K{jZrJ6 zfw0{Fa!$S6Q1uq;)Ke8XJcRIsl`4m)RxJ=gjG{p(&pB06gdp>A&;ucY1u;hA@7ub& z=)8wX@>Vws`(G7+)8uU?3}IYiq!ez$5SjI=<^Du*7r}U{i|A0&xq$EHhYH0#xytqD zzomRr?TY89xoC~YI5bFd*^bjJG7If(LTG|fBTfvJ%7Y=zzH|H1k()r@ou!h> z^c2FBs_5;*M>-Ul@f1okzm#+hBjTc!zu%o>fp8XrgZ zb5XdTUO|K~2l0yP-o+uH3!TxU&-#3XzQP71v?a&Cg6fBP7-@|Vyo7}s6H2Sq;U2fEjE zkDvm0ep{%LSKY)_pu9H|@MIe5iAgYjPT~W35#u0~vye{i^DsWa#1<&+X<|=^;C5lQ zvC7R3vpc9)xY6LLX${jrkI!8K%G_r?vWb#kb?{IwdLZh)@Jt)k!<`T;XvQ36!|x(V zi?#Ip6kOet_Q75*--oAbZC)NW6`Yh@Sd?4ao}73o;S*{qpj zdE1rZ@Zr1()!az|g|P~%@v5CXO^{^wIxodG^0?|%->BW?r@++ABGReRtzteY-tnA{ zqd+k@7@MV1m^)G@97TuKCtcQDW)hApI2*O&|3fM@EF2k?v=R7oyFjHyjE2ZICx8BC z?*5$vD%_%g2AuT}riLuc< z#TDg;X0vJ@E_Km@`d`2|0+puvSj8QxKo3Mhz5?&)bGr2ku-sVsc$fL%W%-zkrwSl< zHeNm$8jpY2H@p-tP+oyKvQkx(#ybswbIwySsZ2k~MU_I6Ni5HdOqI@bAp~`ma%jS* ztH6w0g7yo-tx|--@k4~tnBoOvqrN16SW zgW4D&i&oMCH@wLn4sL>_qU_#o5H|HKzTHwb0SPzB<=lHsSRr_%;GihDqkEyKoD` zNyxD^)TZOfkwRjILv>ZLWU$1oV|{xSc@Yvw_(_QQ_@3CkI0;TezVVX)6oq#;Ew$m0 z=wiI2DSrUtSfM4bC4hdtRR%JpNSTapCqPq(L~#UJK=Y{Zsyo(kuYdH+TQb>O(< zlqymlin{i7`BlJ$PUA-{ov{8{`3uIqUq8R&tlRl^UihlR5|EJ3kg^z+A^@#CYa! zQj!Ntzarvlw}!R_4x<+Q!1HOj#=QEugnA$Yn--geg{2oX17fHD`T@uwL=hO5vemwv zJg>L%GEK5UEeB^VZYHSm&{;AO{9#w|p_Nwa8%ufn0T?e{l0KsD{m8u^$PoMRFuGvg z6TWU7NL!IC%8XH$4Ek7h#N%{^M-Nyur|B`4J52DfD&f4QR9u=b&sS>JR<6HkIi!Kz zi2hR4$qW71Y*l)k+c|f%QPjA1?@azVYkdItQJ>N@S!yDA0?M~)vk=tW7X*wJ+SWaP zFHTE(KDs-)c(nXhvs!U|P<_IALv`qAO7q3$wW8oMO_*o|du}T5T|YvpA{SWFxZTmY z?Jdd7pn=ANeD{7x5FPYjGnwIU(x!T_g81S%{qLIA8R*Bsg3v1$(!^3Qt&p!?N{gm}IRq=E{aKwWGg%&?lZQ5u8=5M0ZLcR} zml9LnJ$$VI$(s4)xf(Xi(pkxBJ^acK&NECv1`US2+@K z!jbFm1H!$Ugs;zF`-Gab#;#&WcaE2pH}SUSP_j411qlldfsuT?ph3a0;kYPrG^<-! zLVyoLN@{$P&*?08+pvf{EB~-!R7O$^-aUfi?Gj?Ij{6TvTu7|te^AoX!-VXP3x?2` zMn?OyLt)*cGvh_E(=#Leb45doOP@k*yW#8ojc!A`i&NQ!$hFVV)IruEVXA~|s)~#K zW|#=0))OED2eeX%PM9Du;|2fYD2#s`MN-vFJYgeoQSM1` z-njF^horN>V0+ImOX?}2_jzu>o{HU1d|g z@gsXTU!(Y5E4HPv-`7phyJOMpBzEqK!^h9ymf{MNLhbC$zx>ZKhm!3~hyhk0k1s}S zdg)~NTVGDrh^g{`rI4mx_sJi!U+o&iL^CVXD&MSOyl-77Z<%x&8f7-PerpP2JlH7v zbpJ+_vn!-^>vm?^^d=kx71ajXz_P@Xyh3Ef0mmd ze3o=dp2bKK#@)s`Qi16&r8%&GD^>`!)PkD}i6r12dWu}LI_n@2;uKNu^1b9`B*AwFL-4nvGGQJ0!6bA#Xq zK^qYUHL0p@cdyhD^vOm_6_EE6p)J0v2^*=DWBG(d11~rQlYT@~FKq~z>q?1KiGkWT z_EXg5!b&dpAX{3*HL7Z5ScC`w#)IyPI1W@)RuEZybk(ZiqV{qoCmM@y4=Z|*?jw6& zXAEEdkgeK=haXram+vgt!jDlU<%oXf= zjd?VxL3|vAs)Qk0@Or6x9Y<{?h(R0+fNcFrwGQF#ku92mTo50adK*xj5Z3TZKq)EY zK2^yuC{jv>?nzt0%}0NftyG?2zoUE5>$rA9$|puKgZgf<2YLVov3$A0VX#>KWBVVD zJazs1{h##nha(mLO+T4zpnt7S2>`XUNnYY0<9(Xq9~MKjgeF7<$9SOvtb;r}VNMCor~N%bb0QN13qnG$C3!*FF;$er z0!O5}1rS1^rm}!Y!W|U4;qUYmtTUqT6 zaah`#bPO5(0u3$PU+F*kzTB*uDjGgpi`^Le=7KK317RoatDME=Z9y39{{aeDGM(5F zAqYGU7qxK~xpx8-v|;e)#Sck*oTWWubg>?@YgQ?nhkGIq(`9`-SnfS;)y_O~Az~5s zY*|a0H7LhiTWEz=pva8sPY%St2SQ~`bwmG@C!F)Nbo@#xs201T)ulbUZP4W8meIzX zR8F>obJYG~=6hrcL2z)6d{AtWhcJmMjBF?wEcQAd3f>LV16tP;71XtSyej&J$4(Mf zc=l}w%dQ+f{( zPSwy0?^F8H6Ee5eE=*xxR~n_f6y_O+UKH%k4Md=|(QroNEIJV>*PmzDZDO4X)q89` z7T()cAci)&+rg5=q9S4FY$qS9l6uq*kx;lP4oHNwe@!5E)^H{d*TF05y$AXVu6<2 zf}g~IJ}?`oRcly2j+Zd2U^f=AFBT}BD6VQeo4OfXmcl4qO!Eq)9{ap6vQ0MX%&=-H z4f!Rp4r6o=XfXl-))vh!6eB01jKwV%8%NbB!eO-o8phH5EREBYE8D5L4aYTO>(rWZ zyUf6=6SR`3m(!zip_})DDzR;F@wq1O26Ed>D#ET6z`=x7f#8-5R-2PALXGls)fxf$ ztrlJY3WK=5OCCq5yO^KVi-&0Ge06oP}- zG~e`&j`7ueaPjq;HdRnRVF}+eEERY`{rQXbF2 zssTI7ci6=Nnu?bOc=Sj9@ z9s{y1Dpm0x((Ve_=f-!=Lc4}7CVV-ZzYCGeEe?cDdad!oRwY%8yc7Xfz$9tEJFo@H zpQV9NV3)$?;CPhy&_w^<;T+26GF>xUZjdG#4(56S4Biu?;zRB9RM;_aTcIVgtr34V z(V)-brMXy40u{8TCubSlYfl9vCQ6bPOs(Kn5^8(#tK#t=%k;u6Bb*8>vAEl7EDXai zb@mAVl!N>fQpD8)pTd`cR=Eh7U=YyFa6HLgmXgBb32c2vD4a4{IaT=a?@p?hqa-~; zU$$8)JxX8<&RSBvoRVk26(F@)l3yiJ0&b<;+l=hgk1DOmXlLYbYinJ9etfyN{+wai z2jA!6`4X2TxuEnPA}Oa78s3HJU(R(V$|Lh?CMGC zsG4<=l)!dze4Vz%ve;wm=p1anG36pPyq+Jz88yliqFE7XkTE6{`ucoKi3KitJigp= z=0@teVqPw2CIj%U`h+KSEC11wKRo&0>g0d5(EHD zUnYI16H34#y~)1*1R^UX%+Ji-BNRr7^&>=`Bp!yKM4S&984(3d4uDzN;qCH$66}Is zdd3DsRE0QsK^V;c!3l_i^>hm61Sm|1-d-d$yBgVj5_6bZQap#pMkcV;(=#n$7}&zt z(p=XGPCj`Jtc`9Au5Rxv?PY@w7eCz&$YG3}y-H*X;s3)Epb4CduP#fSs0NEgsEl5fJ6E7xf#wA@dUw=)&B*)yj_$#CwHVJ#y4QFl!8 z!IB(ny|Gtq^dG?8#5+Pb3Z28Go=Ma6U`$F}St`pA7CyGT@N@svB?=E-<lhO5|E@f1c-g)%4-6as`B$nM3B0$`o#9 zb+F`&Vnn)Q({ktR-3z}2cE4=5a%=Sa8F?)?z%P}0uuS{@+>6Mqh(LKZyVZ@gkMe@P zTvcqr#eDuV6Xvk41Va0!0#!i~O$h+I50y2lIInn~7v@OBx#xfV9KLJ?N^8= zO3Eb##*8pg@t!>BYJUImx^FMgr!0BQl9wOiqZP~tmeZUtt^EAPR+MB*a$2&@l#647 zm6etMdi^-w`K(8phTIhrK5D30>|ExHJ)F*LH{s)GCcloUGk?r`F~>0e}+8&W_xm?DZA%b zl_IKzw|2R%>>e!llzR&Xzx9i_a&XLmgCRZ1C|`d+0x5tL=w?qKhr=T6@x-`De=}%k z7#8N57?K14nQKXb#5 zPjnMOB?9Ci%zQ$BCdKWc9DVvbH8c5qNg%b1FcF z2of7wB}+xM|DUiU^q6qD7DVq$k5Uz#wB_SkoJNlB=r`Ika$cG0DML4TZ%}kAxBAwhl`K(e;;AnS&v+`X@ zdgZn62e02{+TFOabTrw%GV(`LfDG&^m4suntv6y+7hemV3=$h{lt+Q9Reh3pSoM+$ z<=5>HRuCi>3cp}B&0?`*)wj&H6uVuA_73;un>XgqI~bK0hX&;s^z8DPG75CXFZjgy zYUnbu6Yrwo(6iR;!8sSRP)<*XBUu0)4oi0L-$jEjBQtIE9)i4r6;&Da$_sm-X3#P5 zB!noH>N@nMJ`9R=ELe^kD|(H`;nsuk^6p2HiE1ehl+`TU`9zD$K^>CMlM%|nL;^EY zLb46)sfwtNfZT4*<+Fzxc}0EE`nIluet|QEpQo32aJl7hyYSi$HEGv4UkPAzeKXC@ zfQQ{6ue>CHr})+TS2P_`>$wf-10^G#G<$^B*#~F5=j1-v#5l53iNE7nvvm)zSL3YcQq`!7K5@OdZe^d>PEMBXgo8Sj2hPG z6GL8=rq~emG*JoanRhRBKVUEv8@f^wU_LL_7_0j literal 0 HcmV?d00001 diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..ad4ca66a17637746a5c33e5a1cfc46e35754fac8 GIT binary patch literal 21792 zcmeHv2UwKH^Z&l}`q2ag8^(eim7pLN>;*+)iBgRs8VhO=>2e36#%@FrrA3LTNz^C? zMVis5!2-qxh=LspNE9QSl>5!zduQcPN%`ja|Jlb6yKmXq*?DJoXJ_X`h(HX&i9lw^ zR0pBYq1?SYcl~91gciU(Gc)eGgC0V8RtPn1%3U88z{|rC($!VJPa|~L5up{14#Oov zQz1N+j2tm!9Da(yXCZU--ZcXu>CKTt22SuNN@KlNyG}GYm1E;K!?)^yTPKTIXI70F z^-UgWI-;%qFFH>@xo^Dm*I#wEMnyGESP^15Y*rhCJl^uU$&5n}va?G{O1zYr(Iwf}wglz$%P9+ZT$XV*-Ln6V zJ59|HF@_vyId}UqME4|UyB2nKQHa!a=)Y|+A{;4Q-S%=69x?X5t{u9B=x~7k&z{q>evb`!UXIQN-_%iw8 z9Odkvo%bbmujyp)N5?-motgMJGPpe8uIrlgq?i(GbmX$4m$%Cr;^1(DIYSWfqh93> zqtux>eqIK)hc2~UoL(I5u>0_$!gZ52Jesv|?!6IL61Et08`Cmr$4uM1?6CAhi7{To z$0ZabPS1(!*NZoJ3*SuM)6*lvqOj_Rn1UxufAK3lYMI?4bN-=)MFE)$bDv!ZanBkn zJUXU2Cxt4ru6S6G-EXE>TF)Yf{xPl>oJ+i(4+^%v=q>%iIwr{ZbfToyk=emDEsE~X zO7tAv*G*O%y23!`n%|s7cMK*iz4ap7>4;Z)`N4o6;+1J*g7Z#1$n^+(eEE|9ow6Q< z%CaU?nlIY>lLsjk^!(9sx*cja`qJ66`?CyczbL%up8rK^aiQXQO8A5P6@wM@?IAt} z%Lo5n=v+W=vrd^8?;GHxo9ulm&3JtUJGVks<50Y>N^YN9`t^gv;U2fmb55R2PubJ= z`mAQR){N7$Lpw&D%F6bi)U5Nds$P2!&dJKx3o86-zRtES^Ib-YTy7Sm>`VQ0k#!1H zn&q(gme;MTeTuX5OKO(gNRK`caVnzQm}1{9kRY)=GYq(zV-~> z;YNFw#LSMLar>8<<~v(mO}bDyywm3EpLNVGxV$I!(6r#2p(n2$ij2xGSn2LkR$~(# zn|Wuu?%uR(%bOhvU+~L!vDLOyPu2QP`|5b<&{CecYt8+O`=2~&clFjDePFxYs9`^J zUgKZ6WA}@qj9k;QiXN3uO3&yW4PVGpese26wVQb>UY_HcdC#6YUi0_rlu`MOWAKct zzKYVL1K6^awksd5D+xYo$r~SdxqWPWf=x5!s1b8DQ(+s#U-h@Nlp{eZNf zy&-2ty~s{G-uk@NgX0fad#4d*C+?V@J3|;}6h4~ld?o8}+O4bMvF4&kS4X4J8pH9~ zEfiiJ&mS4*pNiZZerxO87Hj)wpW=c|lnx{TGfAL{*xYoo^Nz*?sO( zof*4#7g^oQn6=N!Gh&LR;5(O&UvctqaMt<$D&U};%DXk?A%%>v6eDEfZ4HEwNv`8}d1=5T>S`Pdn=PIm!R_ z$1VFqPWY7Uxos({nU(4GE`^mYqr6Xn&^j z!BU~`9wxe*j-qqs!{p?gH9?uB3y1z0CrU5cVnLiut@X{H_T&qX=X*W6wV3U2E_0m0 z{jJZ&Rm5dW!tFA`d(S@?G;8i=okQ0hCu9UJ>d=2yMf2VVhWr?HtTr!i&H$;mU(5KS z&i8jXCAI(?tNZjDo~|*A{VEyT;L5xQFCr(+Nd4hUpI@uiD3%XQW-@~|Tg)qXc-*Mq zLhln(%Z!JQ3oOn!d2Le3h0O8ugnir!(lU~1-J@5ZTAa9GA-=gXr|PQ3*xW72D5l`c z0g~jD88cCdbRCTMWQ$iF8XsI9QnhkH)o1H>=DmntJ6+*E9Dhj^v~g}(e`Vav zIltY0zOwW{LB#V%R>iqq<&O_snXsx=d49SgbGqO2(FHld!&9Hktcfa5$STZlR@^e+ zyB>bI_8n_m7u7}=Ou5G1zS~xgMn!FQKKJ=PLfoc}$L)m63$NNZOue0G;h|GHDR$dm+;%%GkOve&$!h`kTr16TKXlBO~Yjt>?@3Y@1be&q5IsyzkE! zPg{8{{N_;8#a>qIqIn*QbkEcCV9M(~C`~6cDM}uD$y1K#31CxSK5NjMUX93a#b#!s z5y3Z)Ki^u8b`gBOfgL)QW$wc_L$eWijMkMecq(W?^5v#-d3S;~iwPdI9ICO6)fz?e zAJmc?%J9^Atxc-V1jC*ejU4JQWbdFa!a6`1HrMD20N!zHOG>2@HKI`ljXKb%1OE#h zP$(4Wui%A3K>{cZfEK~w1*K4Y6i9d}zqTe3Q9czOlVrmE&@Kh-fR9uWK%oFA>UQfxr(&Fp|qd zNx3|X=J=Bnb@8cfUHgFhBz~tQLZJf@Iz9lQj6Mkc0=GILq(2NHm*WUIWpKGOlqVn4 zj!8!mnwWx+!)}CZ*MZ;K73i1(a3TPl>sxjC<{y@2QA5CESrXTqpwnbexM53Wm|-(z zph0V;xq!7{`9s|GB|DTllJAuIlATIjNg@E}^%{K!wUiX zO_hM%P^}#sl>#6@RX(|Za{>{062pt3H&YomZaf-6kn3y zD=|_olo-62CvK`RqrhuV+B%8ZNF|@$SSbX)SSBEcriKO3)aWHNHA+TP_MrgzD>$xX zDAzb1voM*@tiC5;p&#t}XHd5mml@hA8dQi6B7<{}zb9$k zhjaWgU&^Ds(gkcZ^kE&y0rZG+*~;~hd*|QdAIl#nN~6Efm->>r4}@tq;QdblyRk~h zZmr=nVRs1Bv?Is2HeIZb?yF}XCx7P{t|q0K$MM{S`=AEa0}WL7;1SnXmiG7o>p#q8 z&+#{)i6M(PS}Z=bY;gLE{A0egCG|7aIH5?Q<8U?qFc&A(GSswPoSwb>mfGmld;cHg zU)?U|A18q>pIP?|<{?-=wkUZ_@MVMy+i}lc8}l%rpaKJ+KLjvYu=ssE|9`;z^S1uf zCx4E$mw!o`r52u2t#7J6tg9f%_Jg^{G^`1KEe++1qyItvp`QfFQcls_T)Gx?C?3{= zo|8DPS)oP`j7ZwDFDqQpn-v7wutK>tD_jf+(SpVBj;uE;40IFep7-I_ud2D+`c>Eb z-%$Chz7wp|VFES|#_;MgA-l7dBKyu%^Dn|WfXm%k!Q3z%W91^Lp)yEnr1S&$zXt*E ze38TuYzLM7Q1kcm&y4|^8tN2&gA=0}tU8I+*4e&e< zaCx4%$1B_7&OZ)tW=YFsy zo@2X|B)d;g^Z#-U#(G2(gb*NjA1HW-0V9zl2-co2Nz}-{9{aNa@_)I0>G9dMU;}M} zy}+g#0lVr6Nw(?7@vpJtK3@Ksd3VGs`@cT>`6Kw(*u*OPfh5U}BRJWE9nFiag+0y* z2-Y&@I9Bx$S0L4(Ab&5Qu4=8uG+_Vo-G|GcR?XFTO?qXoY3#Pwb4;1D^%#1(%6=2D zYs!VZ1hAneZNg7MUH;PA3;l$D2z{XZKgNCrx_F;}h1$Xr@1X^-9|jv5_F)^IkVH2J z{7l0?2!g`8{4v1ege3?Gi~}C%|IYWz-!+btH;ebMU^-#hV*61ATn6C%Er!O%=IgNZ z{7h_na^p5m$4eLHoV>hh~w1fl;4 z-fQtyd#?zEkllo3|0hXw8o||h#XrbVSD&Q18hjkOG$E;OP_MHCU%E%bzxMeb%OCFz z1rTG2=WHxPo>&dI7U&R!PBW4)?M{=nU-D?5OqDN%-3WW%h)N+ZPDxV^Uvs>_w3$^M zZb!4@W{3^Wd43$u;yTmC2B#N@o0Q@8*GFPd{VxAlcALR}5|YPI6C?0Go}ng3!QOl; z_DQG#W%unY?Ue%i!yMpOHOC8h>tW6WpAg>n(r()k;zQpx`*zjXQ}N%G5qJbijcWp= z##I2ESAQr1;awl8;fwhaqZbP#1{Jf!O`m}O!4mvelf>Y&;DqByXj)&Z!g;Lf) zENRk0xwJ`3Wq6Zj%Ebm|%8o*|5B47#sQiZsNP_8 zaNdtXzV&35<*EFXEGwX){En{^dSe3)*T{wfoB@#?5ZVNYXp7JWpw|L|eg`5n_7LD8 zLXMDQsIIl-4#^1F??q@JjPovwVSTa(I-oDAgk`Q?H@G=leP5M@fd4#57Sdn@L)}-9 zAxuvuUnY{v)-`(z&EZ-L)|$#?EA+6>Tqg4Y_yIz&|C|Eqnzp z8s*Zc1An6qyuOw-mK!DTu@V5g3z-4z0EDUpg*sB>I#;K?^RLfkYJ2H@@hzH|8XOM; z@2cz|ZSgeq#?1Un-yPeE*lv8UJnM=fdHdd~&`{IJuq8cLhlV!C&qC!J+lI=wb`6zp z>GCF`I?3hh;hg}mxBbE1*4Wx$!>i+J-sY)}q4CZw2v(S5hfuaLLTW#;T|C61WC4DI z7?jsA=>$TflMu364SRXDPmqhN&=7sQ{kmc}-o7_PthXwpNps~$ss2X%Oeze!q$}C3 z+TY=rl2_OT_YMH|Dy5<&xdOiBSO`mbV~_WC`x?IL%lojxCEjojF@Pt6enHP@A2|Pt zsAj0oH&SGu>95dl7ERiR@tD91LSDR*&nzz{2!qzI`-K|PzAYHMK z!S|{pi1x6rd^eHAC$>EBQQ*Bj_!_=_{f)n~eUj)gk@JtK{AX&v8Fvm&5?viIkKl8t zB#_uh!^7X(zQt!6x^L#3Pl(4X&IMm1oNx4=qIs^ZeF8e74R8*)tS_p<7q~q2DB&clOi<@JYrtqR*8M9WWc`+R17WqtD3Jkyr9M-?E zy5tY*&!D>M5JYPmSOd>ve=_*N(_Vk)ZTZ8Yi*O)MyrDig3w3|{N@Dl|*56f_7LQ&8 z=cf_X5K9?Bk=9dR0k6MfmhzZ@^W6Ho`VW$5hJSW~zo{y{-M%i%Gg@ubN?9M4<9Jr^ zOWRBJw{Z4H=tW)ecBRhSP{p(ESKA-UuM7CLE-XvD&;Ag1LnyI6SRk~k2SPtc5mK*X zgA*VQIQv~-n`{jDj(e?zDQ$?l%PM_Pm*3GpxlBeZlgo8J1WROcK4*VMnumtQ^aS(> z4UK*U`Bn?L{F_!>ytJRn-z$^JU)r9y?%VSJhps=^{>J+Wb<95AW7G?H7yEV9`;RKv zf7FHgWA^`{{{CBX{ePw%&3O3#HOGDu_|I%qzsYO#le~YQu5H}br#}=Tc$N%*K|fZq znol4hN%V4p*uyO}^%=z1k6+7B<5n}&XR9C<-=BcERzh_ijwxzlZM0HXt!E4(bex2e z%sJwwB{~Gsr>JS$MEIPRje|4t1f`I-9f0%i;kE{AT+Dx(WL*QSRqNc>bS+f!m&0slvGhrKsM`N2}6T~}Wy9Xy6L!dnYeepzr zBz1<^_yO<_^xdN|eXxz-=QR}6#jyuCW{D5+2YA0^K2*ql4}EZo;;6pt1GdRD`$HTX zgv%rg^2huG{fC@<++#TUX1%!jn(xTjg9MToK}^669PoU2ejd%jrznTN>e zmM-Wg3Gx@I^nHUEH?}nGyc}Pk z{~GaFpG zV|(!U>M~l$?uPN@7LTtrI>nXW{rj5!KjP$r=Lfu>G41m9+XT_3ACHk2aQ+@kC+<1T z7ZIe%1c6TKIDt;`c!5p|;IJAg69l?P=8E-8K|gPD`subs1pL#KTO3U~EK`AUTmio^ z_@2)!DT3b{z#QtL=ADam)FY_&GSE%X&zi-Mt5F*QWr*Xc4UjfbdWZ}$eHbQKKcm4X z76oxk_%~Jf_dIbBXS1zVz-)n7zR8<4bCIThcz@J}V#a(zG4_DbZ-O1&o@U0GQG9#w zVUPsbb3CjmYjJ!yO^pdx{l1H|52eU{vk8RfRMi*;9l_HXm$a>GjrTd5KqIIBN{$C5 z4!$x2{Fm)EU=Jsio`D_2#~0?b(9J=>+fERztOQKRRhVb4@tLK!0JkBQGKC5`Ok_0$YZ0DrX=;c+p9#$sFkcrzY#RP;r6y_W z3-ApKMzX+F$fiR5;R0sKEugH9+f=R31fTi(HbaeFs;P|UIVj_GUW)JaWF)>*gzQlq zHz{DSPgBPfOaY)Fg`*L$XQ*Z%T{FxFLcIU0BXcNm6a+BNZY z{aMkf86y2Vu%`a*Ce%3xcHW=PLiP|%cD4szp}r8S0wk^>s^F~7hHtw5sK_I3SvMJ(*k$AS%&NzlenDI=4?{qZs&8V+N&-0fbq4vZC;=aZ>OTH$Am)Oh++xAE za{`y4VF->UBqSmrNSADa?}7aNUVXo1_ra_I`;F5TEIWu-1iPV^3x2mH#SHcZpRSk0 zupEBR3%S7`{!&w=yO5Vk6CFn3I9?tde2GVgv$f97};1{7vw=p2(ndgMi5}h z2o_i|LSL}s=UXwNfc6yp- Date: Tue, 23 Nov 2010 12:49:57 -0600 Subject: [PATCH 083/482] Change a couple of the example story URLs. --- index.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/index.html b/index.html index f5736129..4ee35c4f 100644 --- a/index.html +++ b/index.html @@ -98,7 +98,7 @@
    fanfiction.net
    Use the URL of any story chapter, with or without story title such as
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or -
    http://www.fanfiction.net/s/5192986/5/. +
    http://www.fanfiction.net/s/5192986/5/.
    fictionpress.com
    Use the URL of any story chapter, such as
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or @@ -117,7 +117,7 @@
    http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332.
    mediaminer.org
    Use the URL of the story's chapter list, such as -
    http://www.mediaminer.org/fanfic/view_st.php/166653. +
    http://www.mediaminer.org/fanfic/view_st.php/166653. Or the story URL for one-shots, such as
    http://www.mediaminer.org/fanfic/view_st.php/167618. From 708f750bab20ac33885752321e5374cd821fc546 Mon Sep 17 00:00:00 2001 From: sigizmund Date: Wed, 24 Nov 2010 09:37:49 +0000 Subject: [PATCH 084/482] Added Mobipocket support to the Web UI --- index.html | 1 + main.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/index.html b/index.html index 4ee35c4f..ac1c0426 100644 --- a/index.html +++ b/index.html @@ -51,6 +51,7 @@ EPub HTML Plain Text + Mobi (Kindle) diff --git a/main.py b/main.py index 9a9cbf31..e124982b 100644 --- a/main.py +++ b/main.py @@ -218,6 +218,8 @@ class FanfictionDownloader(webapp.RequestHandler): writerClass = output.EPubFanficWriter elif format == 'html': writerClass = output.HTMLWriter + elif format == 'mobi': + writerClass = output.MobiWriter else: writerClass = output.TextWriter @@ -292,7 +294,9 @@ class FanfictionDownloader(webapp.RequestHandler): elif format == 'text': self.response.headers['Content-Type'] = 'application/zip' self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.txt.zip' - + elif format == 'mobi': + self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' + self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi' self.response.out.write(data) def toPercentDecimal(match): From ccef466090d6050681b2efea40d05e4904091b5a Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Wed, 24 Nov 2010 12:36:38 -0600 Subject: [PATCH 085/482] Change no-class div tags to p tags in mediaminer stories to get paragraph breaks. --- fanficdownloader/mediaminer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fanficdownloader/mediaminer.py b/fanficdownloader/mediaminer.py index 6a4c03a7..daa48fa6 100644 --- a/fanficdownloader/mediaminer.py +++ b/fanficdownloader/mediaminer.py @@ -346,6 +346,12 @@ class MediaMiner(FanfictionSiteAdapter): soup = bs.BeautifulSoup(data) except: raise FailedToDownload("Error downloading Chapter: %s! Problem decoding page!" % url) + + # convert div's to p's. mediaminer uses div with a + # margin for paragraphs. + divlist = soup.findAll('div', {'class' : None}) + for tag in divlist: + tag.name='p'; nvs = bs.NavigableString('') sst='' From 719aed32a98e1c23f7ea56aad36446f646fc4164 Mon Sep 17 00:00:00 2001 From: sigizmund Date: Fri, 26 Nov 2010 15:55:04 +0000 Subject: [PATCH 086/482] Tiny static changes - preparing for doing some SEO --- index.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/index.html b/index.html index ac1c0426..c084a399 100644 --- a/index.html +++ b/index.html @@ -2,7 +2,7 @@ - Fanfiction Downloader — twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org to epub and HTML to Stanza, Kindle, Nook, Sony Reader + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza @@ -35,6 +35,7 @@

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier.

    +

    For Amazon Kindle use Mobi output, for Sony Reader, Nook and iPad use ePub

    To support new features, such as including story summaries, the URL you need to use for some sites has changed. See below for example URLs for each site.

    Or see your personal list of previously downloaded fanfics.

    From 2d7f6c2e5c64d9f354d676fdaf2b78bf5a4bfcb5 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Fri, 26 Nov 2010 19:36:53 -0600 Subject: [PATCH 087/482] Correct story status for ficwad, change story status for fictionalley to 'Unknown' (site doesn't tell us status), change mobi file output mode to 'wb'(binary) so it works on windows. --- fanficdownloader/fictionalley.py | 2 +- fanficdownloader/ficwad.py | 34 +++++++++++--------------------- fanficdownloader/output.py | 2 +- 3 files changed, 14 insertions(+), 24 deletions(-) diff --git a/fanficdownloader/fictionalley.py b/fanficdownloader/fictionalley.py index 68cd36e4..b1a32125 100644 --- a/fanficdownloader/fictionalley.py +++ b/fanficdownloader/fictionalley.py @@ -69,7 +69,7 @@ class FictionAlley(FanfictionSiteAdapter): self.numWords = 0 self.genre = '' self.category = 'Harry Potter' - self.storyStatus = 'In-Progress' + self.storyStatus = 'Unknown' # fictionalley doesn't give us in-progress/completed anywhere. self.storyRating = 'K' self.storyUserRating = '0' self.storyCharacters = [] diff --git a/fanficdownloader/ficwad.py b/fanficdownloader/ficwad.py index 058528bc..9cb353ce 100644 --- a/fanficdownloader/ficwad.py +++ b/fanficdownloader/ficwad.py @@ -111,26 +111,20 @@ class FicWad(FanfictionSiteAdapter): meta = soup.find('p', {'class' : 'meta'}) if meta is not None: - s = unicode(meta).replace('\n',' ').replace('\t','').split(' - ') - #logging.debug('meta.s=%s' % s) + logging.debug('meta.s pre=%s' % meta.__str__('utf8')) + s = re.sub('<[^>]+>','',unicode(meta)).replace('\n',' ').replace('\t','').split(' - ') + #logging.debug('meta.s post=%s' % s) for ss in s: s1 = ss.replace(' ','').split(':') - #logging.debug('meta.s.s1=%s' % s1) + #logging.debug('ss=%s' % ss) if len(s1) > 1: - s2 = re.split ('<[^>]+>', s1[0]) - #logging.debug('meta.s.s1.s2=%s' % s2) - if len(s2) > 1: - s1[0] = s2[1] skey = s1[0].strip() #logging.debug('Checking = %s' % skey) if skey == 'Category': - soup1 = bs.BeautifulStoneSoup(s1[1]) - allAs = soup1.findAll('a') - for a in allAs: - if self.category == 'Category': - self.category = unicode(a.string) - logging.debug('self.category=%s' % self.category) - self.addSubject(self.category) + # ficwad doesn't allow multiple categories. + self.category = unicode(s1[1]) + logging.debug('self.category=%s' % self.category) + self.addSubject(self.category) logging.debug('self.subjects=%s' % self.subjects) elif skey == 'Rating': self.storyRating = s1[1] @@ -159,14 +153,10 @@ class FicWad(FanfictionSiteAdapter): self.storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(s1[1].strip(' '), "%Y/%m/%d"))) logging.debug('self.storyUpdated=%s' % self.storyUpdated) else: - s3 = re.split ('<[^>]+>', s1[0]) - #logging.debug('meta.s.s1.s3=%s' % s3) - if len(s3) > 1: - s1[0] = s3[0] - s4 = s1[0].split('w') - #logging.debug('meta.s.s1.s4=%s' % s4) - if len(s4) > 1 and s4[1] == 'ords': - self.numWords = s4[0] + if ss == 'Complete' : + self.storyStatus = 'Completed' + elif ss.endswith('words'): + self.numWords=ss.replace('words','').replace(' ','') logging.debug('self.numWords=%s' % self.numWords) diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 10fb6198..25487d6f 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -109,7 +109,7 @@ class MobiWriter(FanficWriter): if self.inmemory: self.output = StringIO.StringIO() else: - self.output = open(self.fileName, 'w') + self.output = open(self.fileName, 'wb') self.xhtmlTemplate = string.Template(html_constants.XHTML_START) self.chapterStartTemplate = string.Template(html_constants.XHTML_CHAPTER_START) From 617dc21eb99d44a284cb1d70e89911660a21903b Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Sat, 27 Nov 2010 17:32:31 -0600 Subject: [PATCH 088/482] Kludge fix for uniode in story descs in appengine. --- fanficdownloader/output.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 25487d6f..b5a1ff8a 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -341,9 +341,13 @@ class EPubFanficWriter(FanficWriter): description = self.adapter.getStoryDescription() if hasattr(description, "text"): - description = unicode(description.text) - else: + description = description.text + prevalue=description + try: description = unicode(description) + except: + description=prevalue + if description is not None and len(description) > 0: description = description.replace ('\\\'', '\'').replace('\\\"', '\"') description = removeEntities(description) From cfb218f21e9eea22554d2c6701f208e282520890 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Mon, 29 Nov 2010 18:45:47 -0600 Subject: [PATCH 089/482] Moved the check for a completed story higher in the code, this makes the continue's that are in that portion of the if statement not skip that check. --- fanficdownloader/ffnet.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fanficdownloader/ffnet.py b/fanficdownloader/ffnet.py index 4caa13fa..d156f9fa 100644 --- a/fanficdownloader/ffnet.py +++ b/fanficdownloader/ffnet.py @@ -186,6 +186,10 @@ class FFNet(FanfictionSiteAdapter): (u1, u2, self.authorId, u3) = s2.a['href'].split('/') logging.debug('self.authorId=%s, self.authorName=%s' % (self.authorId, self.authorName)) elif l.find("Rated: 0: continue From 30455bfda2207f5681c3f1fb3b7e39249246dd3d Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Tue, 30 Nov 2010 12:13:00 -0600 Subject: [PATCH 090/482] Updated the processing for fictionpress.com of the information line to correctly extract the Updated and Published dates, and the Completion status --- fanficdownloader/fpcom.py | 66 ++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/fanficdownloader/fpcom.py b/fanficdownloader/fpcom.py index 471c0b85..fcf4a7ac 100644 --- a/fanficdownloader/fpcom.py +++ b/fanficdownloader/fpcom.py @@ -136,6 +136,38 @@ class FPCom(FanfictionSiteAdapter): self.addSubject(subj) return True + def _processInfoLine(self, line): + have_lang = False + words = line.split(' - ') + if words is not None: + for word in words: + if word.find(': ') != -1: + sds = word.split(': ') + if sds is not None and len(sds) > 1: + if sds[0] == 'Updated': + self.storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(sds[1].strip(' '), "%m-%d-%y"))) + logging.debug('self.storyUpdated=%s' % self.storyUpdated) + elif sds[0] == 'Published': + self.storyPublished = datetime.datetime.fromtimestamp(time.mktime(time.strptime(sds[1].strip(' '), "%m-%d-%y"))) + logging.debug('self.storyPublished=%s' % self.storyPublished) + elif sds[0] == 'Reviews': + reviews = sds[1] + logging.debug('reviews=%s' % reviews) + elif word.find('Complete') != -1: + self.storyStatus = 'Completed' + logging.debug('self.storyStatus=%s' % self.storyStatus) + elif not have_lang: + have_lang = True + language = word + logging.debug('language=%s' % language) + else: + self.category = word + logging.debug('self.category=%s' % self.category) + sgs = self.category.split('/') + for sg in sgs: + self.addSubject(sg) + logging.debug('self.subjects=%s' % self.subjects) + def extractIndividualUrls(self): data = '' try: @@ -250,39 +282,9 @@ class FPCom(FanfictionSiteAdapter): self.storyRating = ss[1] logging.debug('self.storyRating=%s' % self.storyRating) if ll > 3: - ss = tdas[3].split(' - ') - if ss is not None: - lls = len(ss) - if lls > 1: - language = ss[1] - logging.debug('language=%s' % language) - if lls > 2: - self.category = ss[2] - logging.debug('self.category=%s' % self.category) - sgs = self.category.split('/') - for sg in sgs: - self.addSubject(sg) - logging.debug('self.subjects=%s' % self.subjects) - if lls > 3 and ss[3].strip() == 'Reviews:' and ll > 4: - reviews = tdas[4] - logging.debug('reviews=%s' % reviews) + self._processInfoLine (tdas[3]) if ll > 5: - ss = tdas[5].split(' - ') - if ss is not None: - lls = len(ss) - if lls > 1: - sds = ss[1].split(': ') - if sds is not None and len(sds) > 1 and sds[0] == 'Published': - self.storyPublished = datetime.datetime.fromtimestamp(time.mktime(time.strptime(sds[1].strip(' '), "%m-%d-%y"))) - logging.debug('self.storyPublished=%s' % self.storyPublished) - lls = len(ss) - if lls > 2: - sds = ss[2].split(': ') - if sds is not None and len(sds) > 1 and sds[0] == 'Updated': - self.storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(sds[1].strip(' '), "%m-%d-%y"))) - logging.debug('self.storyUpdated=%s' % self.storyUpdated) - - + self._processInfoLine (tdas[5]) self.authorURL = 'http://' + self.host + '/u/' + self.authorId From 2f0bb31e2487966af862566477d83ad68837e954 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Tue, 30 Nov 2010 13:56:18 -0600 Subject: [PATCH 091/482] Take out adding the id: field as a subject. --- fanficdownloader/fpcom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/fpcom.py b/fanficdownloader/fpcom.py index fcf4a7ac..ad89b37e 100644 --- a/fanficdownloader/fpcom.py +++ b/fanficdownloader/fpcom.py @@ -141,7 +141,7 @@ class FPCom(FanfictionSiteAdapter): words = line.split(' - ') if words is not None: for word in words: - if word.find(': ') != -1: + if word.find(':') != -1: sds = word.split(': ') if sds is not None and len(sds) > 1: if sds[0] == 'Updated': From f114a2438eee22034b89712ff3d088bb63445df8 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Tue, 30 Nov 2010 14:06:12 -0600 Subject: [PATCH 092/482] This one might be controversial.. Since none of the EBook readers are really for update-able stories I added tags/subjects in for Last Update Year/Month and Last Update, which at least in FBReader I can order by, and see all stories updated for a certain month, or a certain date. This will let you know to reread the story. --- fanficdownloader/output.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index b5a1ff8a..98c93827 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -337,6 +337,9 @@ class EPubFanficWriter(FanficWriter): createda = self.adapter.getStoryCreated().strftime("%Y-%m-%d %H:%M:%S") created = self.adapter.getStoryCreated().strftime("%Y-%m-%d") updated = self.adapter.getStoryUpdated().strftime("%Y-%m-%d") + updateyy = self.adapter.getStoryUpdated().strftime("%Y") + updatemm = self.adapter.getStoryUpdated().strftime("%m") + updatedd = self.adapter.getStoryUpdated().strftime("%d") calibre = self.adapter.getStoryUpdated().strftime("%Y-%m-%dT%H:%M:%S") description = self.adapter.getStoryDescription() @@ -387,6 +390,12 @@ class EPubFanficWriter(FanficWriter): i = i + 1 if (i <= 0): self._writeFile(opfFilePath, CONTENT_SUBJECT % "FanFiction") + + subj = "Last Update Year/Month: " + updateyy + "/" + updatemm + self._writeFile(opfFilePath, CONTENT_SUBJECT % subj) + + subj = "Last Update: " + updateyy + "/" + updatemm + "/" + updatedd + self._writeFile(opfFilePath, CONTENT_SUBJECT % subj) self._writeFile(opfFilePath, CONTENT_END_METADATA % (self.adapter.getPublisher(), self.adapter.getUUID(), self.adapter.getStoryURL(), self.adapter.getStoryURL(), self.adapter.getStoryUserRating())) # print >> opf, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName) From c6b79cb57bc42ec3a60e40fc8f24179d6592d482 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Tue, 30 Nov 2010 15:11:20 -0600 Subject: [PATCH 093/482] Changed the "Title Page" reference in the TOC and CONTENT epub files to be title_page instead. --- fanficdownloader/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 98c93827..da7503e1 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -406,7 +406,7 @@ class EPubFanficWriter(FanficWriter): t = "Title Page" f = "title_page.xhtml" - chapterId = "Title Page" + chapterId = "title_page" self._writeFile(tocFilePath, TOC_ITEM % (chapterId, i, t, f)) self._writeFile(opfFilePath, CONTENT_ITEM % (chapterId, f)) From 4c99fdbcc11cd71cbc3f07bc1ada101a8b11b3bb Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Tue, 30 Nov 2010 15:17:00 -0600 Subject: [PATCH 094/482] Removed some leftover functions that were left from using ffnet.py as a base for fpcom.py and mediaminer.py. --- app.yaml | 25 + cron.yaml | 4 + css/index.css | 71 + delete_fic.py | 59 + fanficdownloader/BeautifulSoup.py | 2014 ++++++++ fanficdownloader/__init__.py | 1 + fanficdownloader/adapter.py | 229 + fanficdownloader/books/place holder.txt | 0 fanficdownloader/constants.py | 542 ++ fanficdownloader/downloader.py | 207 + fanficdownloader/ffnet.py | 358 ++ fanficdownloader/fictionalley.py | 301 ++ fanficdownloader/ficwad.py | 257 + fanficdownloader/fpcom.py | 301 ++ fanficdownloader/hpfiction.py | 280 ++ fanficdownloader/html.py | 121 + fanficdownloader/html2text.py | 452 ++ fanficdownloader/html_constants.py | 19 + fanficdownloader/mediaminer.py | 366 ++ fanficdownloader/mobi.py | 344 ++ fanficdownloader/output.py | 500 ++ fanficdownloader/potionsNsnitches.py | 367 ++ fanficdownloader/readme.txt | 10 + fanficdownloader/twilighted.py | 316 ++ fanficdownloader/twipassword.py | 4 + fanficdownloader/zipdir.py | 177 + ffstorage.py | 21 + index-ajax.html | 109 + index.html | 206 + index.yaml | 22 + js/fdownloader.js | 116 + js/jquery-1.3.2.js | 4376 +++++++++++++++++ main.py | 320 ++ queue.yaml | 5 + recent.html | 69 + simplejson/__init__.py | 318 ++ simplejson/__init__.pyc | Bin 0 -> 12071 bytes simplejson/_speedups.c | 2329 +++++++++ simplejson/decoder.py | 354 ++ simplejson/decoder.pyc | Bin 0 -> 11292 bytes simplejson/encoder.py | 440 ++ simplejson/encoder.pyc | Bin 0 -> 13938 bytes simplejson/scanner.py | 65 + simplejson/scanner.pyc | Bin 0 -> 2340 bytes simplejson/tests/__init__.py | 23 + simplejson/tests/test_check_circular.py | 30 + simplejson/tests/test_decode.py | 22 + simplejson/tests/test_default.py | 9 + simplejson/tests/test_dump.py | 21 + .../tests/test_encode_basestring_ascii.py | 38 + simplejson/tests/test_fail.py | 76 + simplejson/tests/test_float.py | 15 + simplejson/tests/test_indent.py | 41 + simplejson/tests/test_pass1.py | 76 + simplejson/tests/test_pass2.py | 14 + simplejson/tests/test_pass3.py | 20 + simplejson/tests/test_recursion.py | 67 + simplejson/tests/test_scanstring.py | 111 + simplejson/tests/test_separators.py | 42 + simplejson/tests/test_unicode.py | 64 + simplejson/tool.py | 37 + static/ajax-loader.gif | Bin 0 -> 10819 bytes static/favicon.ico | Bin 0 -> 21792 bytes utils/remover.py | 53 + 64 files changed, 16834 insertions(+) create mode 100644 app.yaml create mode 100644 cron.yaml create mode 100644 css/index.css create mode 100644 delete_fic.py create mode 100644 fanficdownloader/BeautifulSoup.py create mode 100644 fanficdownloader/__init__.py create mode 100644 fanficdownloader/adapter.py create mode 100644 fanficdownloader/books/place holder.txt create mode 100644 fanficdownloader/constants.py create mode 100644 fanficdownloader/downloader.py create mode 100644 fanficdownloader/ffnet.py create mode 100644 fanficdownloader/fictionalley.py create mode 100644 fanficdownloader/ficwad.py create mode 100644 fanficdownloader/fpcom.py create mode 100644 fanficdownloader/hpfiction.py create mode 100644 fanficdownloader/html.py create mode 100644 fanficdownloader/html2text.py create mode 100644 fanficdownloader/html_constants.py create mode 100644 fanficdownloader/mediaminer.py create mode 100644 fanficdownloader/mobi.py create mode 100644 fanficdownloader/output.py create mode 100644 fanficdownloader/potionsNsnitches.py create mode 100644 fanficdownloader/readme.txt create mode 100644 fanficdownloader/twilighted.py create mode 100644 fanficdownloader/twipassword.py create mode 100644 fanficdownloader/zipdir.py create mode 100644 ffstorage.py create mode 100644 index-ajax.html create mode 100644 index.html create mode 100644 index.yaml create mode 100644 js/fdownloader.js create mode 100644 js/jquery-1.3.2.js create mode 100644 main.py create mode 100644 queue.yaml create mode 100644 recent.html create mode 100644 simplejson/__init__.py create mode 100644 simplejson/__init__.pyc create mode 100644 simplejson/_speedups.c create mode 100644 simplejson/decoder.py create mode 100644 simplejson/decoder.pyc create mode 100644 simplejson/encoder.py create mode 100644 simplejson/encoder.pyc create mode 100644 simplejson/scanner.py create mode 100644 simplejson/scanner.pyc create mode 100644 simplejson/tests/__init__.py create mode 100644 simplejson/tests/test_check_circular.py create mode 100644 simplejson/tests/test_decode.py create mode 100644 simplejson/tests/test_default.py create mode 100644 simplejson/tests/test_dump.py create mode 100644 simplejson/tests/test_encode_basestring_ascii.py create mode 100644 simplejson/tests/test_fail.py create mode 100644 simplejson/tests/test_float.py create mode 100644 simplejson/tests/test_indent.py create mode 100644 simplejson/tests/test_pass1.py create mode 100644 simplejson/tests/test_pass2.py create mode 100644 simplejson/tests/test_pass3.py create mode 100644 simplejson/tests/test_recursion.py create mode 100644 simplejson/tests/test_scanstring.py create mode 100644 simplejson/tests/test_separators.py create mode 100644 simplejson/tests/test_unicode.py create mode 100644 simplejson/tool.py create mode 100644 static/ajax-loader.gif create mode 100644 static/favicon.ico create mode 100644 utils/remover.py diff --git a/app.yaml b/app.yaml new file mode 100644 index 00000000..8709ef3a --- /dev/null +++ b/app.yaml @@ -0,0 +1,25 @@ +application: fanfictionloader +version: 2-6-beta +runtime: python +api_version: 1 + +handlers: +- url: /r3m0v3r + script: utils/remover.py + login: admin + +- url: /r3m0v3r + script: main.py + login: admin + +- url: /css + static_dir: css + +- url: /js + static_dir: js + +- url: /static + static_dir: static + +- url: /.* + script: main.py diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 00000000..1d9c70a0 --- /dev/null +++ b/cron.yaml @@ -0,0 +1,4 @@ +cron: +- description: cleanup job + url: /r3m0v3r + schedule: every 3 hours \ No newline at end of file diff --git a/css/index.css b/css/index.css new file mode 100644 index 00000000..f4aec452 --- /dev/null +++ b/css/index.css @@ -0,0 +1,71 @@ +body +{ + font: 0.9em "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif; +} + +#main +{ + width: 43%; + margin-left: 23%; + background-color: #dae6ff; + padding: 2em; +} + +#greeting +{ + margin-bottom: 1em; + border-color: #efefef; +} + + + +#logpassword:hover, #logpasswordtable:hover, #urlbox:hover, #typebox:hover, #helpbox:hover, #yourfile:hover +{ + border: thin solid #fffeff; +} + +h1 +{ + text-decoration: none; +} + +#logpasswordtable +{ + padding: 1em; +} + +#logpassword, #logpasswordtable { + display: none; +} + +#urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile +{ + margin: 1em; + padding: 1em; + border: thin dotted #fffeff; +} + +div.field +{ + margin-bottom: 0.5em; +} + +#submitbtn +{ + padding: 1em; +} + +#typelabel +{ +} + +#typeoptions +{ + margin-top: 0.5em; +} + +#error +{ + font-size: small; + color: #f00; +} \ No newline at end of file diff --git a/delete_fic.py b/delete_fic.py new file mode 100644 index 00000000..73722724 --- /dev/null +++ b/delete_fic.py @@ -0,0 +1,59 @@ +import os +import cgi +import sys +import logging +import traceback +import StringIO + +from google.appengine.api import users +from google.appengine.ext import webapp +from google.appengine.ext.webapp import util + +from fanficdownloader.downaloder import * +from fanficdownloader.ffnet import * +from fanficdownloader.output import * + +from google.appengine.ext import db + +from fanficdownloader.zipdir import * + +from ffstorage import * + +def create_mac(user, fic_id, fic_url): + return str(abs(hash(user)+hash(fic_id)))+str(abs(hash(fic_url))) + +def check_mac(user, fic_id, fic_url, mac): + return (create_mac(user, fic_id, fic_url) == mac) + +def create_mac_for_fic(user, fic_id): + key = db.Key(fic_id) + fanfic = db.get(key) + if fanfic.user != user: + return None + else: + return create_mac(user, key, fanfic.url) + +class DeleteFicHandler(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect('/login') + + fic_id = self.request.get('fic_id') + fic_mac = self.request.get('key_id') + + actual_mac = create_mac_for_fic(user, fic_id) + if actual_mac != fic_mac: + self.response.out.write("Ooops") + else: + key = db.Key(fic_id) + fanfic = db.get(key) + fanfic.delete() + self.redirect('/recent') + + + fics = db.GqlQuery("Select * From DownloadedFanfic WHERE user = :1", user) + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + \ No newline at end of file diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py new file mode 100644 index 00000000..31ff0e5f --- /dev/null +++ b/fanficdownloader/BeautifulSoup.py @@ -0,0 +1,2014 @@ +# -*- coding: utf-8 -*- + +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.0.8.1" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
    (No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

    FooBar *

    * should pop to 'p', not 'b'. +

    FooBar *

    * should pop to 'table', not 'p'. +

    Foo

    Bar *

    * should pop to 'tr', not 'p'. + +

    • *
    • * should pop to 'ul', not the first 'li'. +
  • ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

    tag should implicitly close the previous

    tag. + +

    Para1

    Para2 + should be transformed into: +

    Para1

    Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

    tag should _not_ implicitly close the previous +
    tag. + + Alice said:
    Bob said:
    Blah + should NOT be transformed into: + Alice said:
    Bob said:
    Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
    , + but not close a tag in another table. + +
    BlahBlah + should be transformed into: +
    BlahBlah + but, + Blah
    Blah + should NOT be transformed into + Blah
    Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + + + + + + + + +
    +

    + FanFiction Downloader +

    + + +
    +
    + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
    + +
    + Ebook format   +
    + +
    + +
    + + + +
    + + + +
    +
    + +

    + Login and Password +

    +
    + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
    +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    +
    + + +
    + + +
    + +
    +
    + Few things to know, which will make your life substantially easier: +
      +
    1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
    2. +
    3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
    4. +
    5. Paste a URL of the first chapter of the fanfic, not the index page
    6. +
    7. Fics with a single chapter are not supported (you can just copy and paste it)
    8. +
    9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
    10. +
    11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
    12. +
    13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
    14. +
    15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
    16. +
    17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
    18. +
    + Otherwise, just have fun, and if you want to say thank you — use the email above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..c084a399 --- /dev/null +++ b/index.html @@ -0,0 +1,206 @@ + + + + + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + + {{yourfile}} + + + {% if authorized %} +
    +
    +
    +

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier.

    +

    For Amazon Kindle use Mobi output, for Sony Reader, Nook and iPad use ePub

    +

    To support new features, such as including story summaries, + the URL you need to use for some sites has changed. See below for example URLs for each site.

    +

    Or see your personal list of previously downloaded fanfics.

    +
    +
    + {{ error_message }} +
    + +
    + +
    +
    Ebook format
    +
    + EPub + HTML + Plain Text + Mobi (Kindle) +
    +
    + +
    +

    Login and Password

    +
    + + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide + your credentials to download it, otherwise just leave it empty +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    + +
    + +
    + + {% else %} +
    +
    +

    + This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. +

    +

    Login using Google account

    +
    +
    + {% endif %} + +
    +
    +
    fictionalley.org +
    Use the URL of the story's chapter list, such as +
    http://www.fictionalley.org/authors/drt/DA.html. Or the story text URL for + fictionalley.org one-shots, such as +
    http://www.fictionalley.org/authors/drt/JOTP01a.html. +
    fanfiction.net +
    Use the URL of any story chapter, with or without story title such as +
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
    http://www.fanfiction.net/s/5192986/5/. +
    fictionpress.com +
    Use the URL of any story chapter, such as +
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
    http://www.fictionpress.com/s/2847338/6/. +
    twilighted.net +
    Use the URL of the start of the story, such as +
    http://twilighted.net/viewstory.php?sid=8422. +
    ficwad.com +
    Use the URL of any story chapter, such as +
    http://www.ficwad.com/story/75246. +
    harrypotterfanfiction.com +
    Use the URL of the story's chapter list, such as +
    http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
    potionsandsnitches.net +
    Use the URL of the story's chapter list, such as +
    http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
    mediaminer.org +
    Use the URL of the story's chapter list, such as +
    http://www.mediaminer.org/fanfic/view_st.php/166653. + Or the story URL for one-shots, such as +
    http://www.mediaminer.org/fanfic/view_st.php/167618. +
    + + + A few additional things to know, which will make your life substantially easier: +
      +
    1. + First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
    2. +
    3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
    4. +
    5. + Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, mediaminer.org and twilighted.net. + fanficauthors.net and tthfanfic.org offer native ePub functionality. +
    6. +
    7. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
    8. +
    9. + One-shots, fics with a single chapter, are now supported. +
    10. +
    11. + You can download fanfics and store them for 'later' by just downloading them and visiting recent + downloads section. +
    12. +
    13. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
    14. +
    15. + If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is + too large to save in the database and you need to download it straight away. +
    16. +
    17. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
    18. +
    19. + If you think that something that should work in fact doesn't, drop me a mail + to sigizmund@gmail.com, or, even better, write an email to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
    20. +
    + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + +
    + +
    + + + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..bbed2dff --- /dev/null +++ b/index.yaml @@ -0,0 +1,22 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
    " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "
    ", "
    " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + + + + + {{yourfile}} + + +

    +
    + Hi, {{ nickname }}! These fanfics you've downloaded previously. +
    +
    + +
    + {% for fic in fics %} +

    {{ fic.name }} by {{ fic.author }} ({{ fic.format }})
    {{ fic.url }}

    + {% endfor %} +
    + + + + + +
    + + + + diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 00000000..d5b4d399 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/__init__.pyc b/simplejson/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f01003d4f81d37513d0f8a2a5fb857b8448ae2bd GIT binary patch literal 12071 zcmeHNL37+jc5aXoC52lM??!ES`^Va5uF#&l87#d{Ux!3 zn-|3n?q3p7OB|dN$$7DJUN}^KM;t7P z_xSsL5nU6}&*=IaadGDz>Vo#>kD8f3w86!7@%u|+hsD0O&EIev42oGpIC}l95x%f< zrIyx+#l_FX@3E|_XH|W`RXp2m??ckj2g^eIdi&8s>HRu*9&Cq2oR{*^@Rna)x_EB5coSj#}_YN%Byvr%iNvp!DC;7EE8?)~QTmG#@}@@5f9 z6~#tUrBx&Y>YT*;?9r*L2+zFO@cy?gJgjIku=it zI6O$yKw_vWQQDVVC9RKys3XiN4U*(oP6A929~HHpV;JbA9?3{Cv$KQAFtd$ioXRhc z%Q2d-`?tGtSe1<^-3qfw4jm7%gz{J(#^re0_!dvG>H9Gky|5|@m6pkIM~(yC((!&8 zkK!;$OPQ;J^_GT82GMie3ig%mO7&c&EIY&4m5$SWUR##amIR5s*P>;nyd(&aI#(*H zat-xANW(0m4#PmlVLi9Z*vB|lP;7`Fy|K}1N&LHe7p5`Ev!ayKJ)`|5?KCaejG}6i zYj4*bWtrQRFWg~JxEs>L@7E|l%u>~rYyL-Fx!yT>+Tp(LZX2!JXx&EZ_J-WW@7E}& zRg%=LpPoE*o00MYo5q9tX1w+uiP)p=M&`_o*Y~R2y=ra!<}J7G!=?7?JGgs$P20Wi zX!oKWVi{OuduV?H`aS7N4ITCm)Un=tTvW=8`=ZUYGp)JzNi&a8kxk@wiAC>kJ*qdN zE;p^>Ol~%eL#+kpb%I85+Dcc=8GuNYyJ!st{ z8`R+21;w0HWLzV4KEj*5=_9?0)o==6&jfOlQ&B&Q%x(N&GdP9*(Wn zUq*IYzTb{SY6J(`r$~{gBQFZe&IXU>`#zb1j7QS#*Y*9rOZJ0S^Npw>48JN;gr-K) zu8UKi(D6oxT{oTt`>wUMTDt9o`g&0QZTyAZ@)zxyDZmy>Y$yB_iAQM-mn0mQ>nE-; z+j;<_oc=h=4mPLjG|KnZe!2c^x(_z8K#vfXoH>s*e+|(CPC={w2y-hpZEGKgf_kw5 zox10_)Xj{;cKG@|d^=x8d&oUiy-yyN{pvo(2+o9CLPho6daF(~oY~7=H1kQxT{=iU z>DU~}TDwIMYb75a=juUGWQA9#yzsJ){H1IY#!0i%m?)4F+iWmQl#PrKF|T41LD$iD z4Rgbqf+{ID=htPF=mhd|eQ&^Hzw1I*0} zKSO$}^@JhP6u_e~QbSX{Pn zImHbun2iFO;RRRaXyw!L0$N!E4QW|4!u$CA3qJbK_FN%{Tkvir!S~ADQoIrCiB`{sg2xOP$a6!=E7X^)aNnkh(@~yZy zEC1+2@p4(*e|k-vTox}eHl$ppv7N8}UHlFc-}Xou`VUagwsjdw4hAsn0VoymI*xdT zzm?#61{TtB84N}_8hHWR@nGN|7C4YzXE0dN6%3+a+Z@G-T1nyqMGg)2+5rn8hqpC? zfZ6~ch6oyB3^AD$HBlUvxJ%Z7TR|y6_SjB#L0mQp7&_~?TDhn!KM>;Ms%#-y9n0V`o9<)gl;VvG->!xMNIJT&8OrK31S zV#Cg2TAWjiamz+40qitgN!31*BF_~DFV(&(?1|v%1w{dqSBaYNaI{&*QShWDYBHo8 zP`#(KQ5olx6D;f=%%CzsZY1&L=P8CFqGoQeDCqbjBPUCd{(&A4&}6C(nUncYz3~Kf zs%VmFqf?^11hbTeKu(~|n(#F8*cFond2oc2ep3Z-g$;?Aaz(gL;9?SVG_VUddlDT zC1Qh_Wrmp)aRm1QrC3e-_2OtFCJH&hh`f2d0Jyx!q)FkY*)?_G!TB`8{K!R=hk`JsN_&yRZ~ z?rVJX-_cd~s&mD0owjpr;j+_m-pBuC=lbFWvE+obaMD);`~D3DfTHCr1Ka{=8{-G) zFTfAb-wu9&wG6oX4GIlT`wWE!1KvadFwDTde?$Rj%=GyHg%+g0HxK)^8QnFKE$BOM zKp$=c^x@gYB11pGH9$XrK0rT!K7PnSLj&|-On^SdeY2p?0=hrLVzyrabg<4>0G)*n zb3V=Da&urA#E{vOXZ!2KWi~0oF}H~|HLjhaA9BhZU*72mP19r zQt2=t%t?EMM&#qDHV8qBGT71omI$hO zJ7ORE3JXl4EUbtMBAGy7#Xc@KYnjGDW+r$f&zuo%%I2a#Kg_mSYS;u)WQ`D7xsGCO zhW-VQ3JQ`+&JbF}pMc?|D{Fx2jCfER8b3M3F~Gyp02_%<=CFw+g@4kP=<>IQ4XJz3o7aY3LxcX7!n3JEHD|%d%5jwB8HTX zEc__%c=bX-Ozg8} z(jz|C!@(7t?B`QTUZF>?Se+8yyFvHpouVEQtG|dZqv`w?KAI*Wu3u%c*7z#&$?U5Z z;qxdDzQhT5qGj+@ra-i8u`UoT4}yxX`%wrGH@r;hiKV_*U_?O7)#3*3@tNb zToTz8bOtp81#`q5CURyFTLr#`ss&qRCS$8W;!w3{tITX{l}YEHvsIV^+B($yTj4a> zeCv1r15|z9?^4=;PYQDdI_@(^yGhc_xLdeYvR*c?H*>e(A@;08wi%}3LgX;s(C_ky z?0aG6ukpKY=5|>Uxn9oJ{}8v!lIn*G*6p71x-)KbR1tZ z^&43~xy3qq%(yFxO?cO#37Xx8d7p|GoZz-70r3Kfq2ky+mZD@i0R=d2yGy?Of&v{s z%Z4q%GRZibgf<%Uj&qvbODkk)%YuUonw<&(2nDbNW3Ti^Bjxbuc<~wxe4ytKoKIzW zb!@;?=+y)t{+?e^XpE$B&OgH1;o{$F<>>b#t{c)QqhGI_)ldh)!C*f8yxF2D<%(TK z95fr1(KBBHZN%B}NOwc7)DIrIa(ab_6!m+9=3Ny|TbZOd$NE#7YjVhNLh2|~d``(2 z;}kidI5K+*>!61ZjfWsqRnbeW0C>ip|3YD&g8S+COF7<$Uc*!7Q@h6Y3+656+B|Nj zxQZvvh_L?;(VcL{6%4Nb0T-g}Iyao_j^Qbnk))mdJoMf}6MjbD?;|Aj`;2Y+eVhMB znMg;!4+o8F;##<_kZ~_;mDK<*o7*x3R>d^E{VRGF%eaSL37V2UP9XP4Nj;iqufa$j zNvlbN??bUIMMVdWXMnUth%ajb-P5 E0OpUxq5uE@ literal 0 HcmV?d00001 diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 00000000..23b5f4a6 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,2329 @@ +#include "Python.h" +#include "structmember.h" +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#ifndef Py_IS_FINITE +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *skipkeys; + int fast_encode; + int allow_nan; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {NULL} +}; + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +void init_speedups(void); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *const); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()); + return 1; + return 0; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t max_output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + max_output_size = 2 + (input_chars * MAX_EXPANSION); + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + Py_ssize_t new_output_size = output_size * 2; + /* This is an upper bound */ + if (new_output_size > max_output_size) { + new_output_size = max_output_size; + } + /* Make sure that the output size changed before resizing */ + if (new_output_size != output_size) { + output_size = new_output_size; + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (!S_CHAR(c)) { + /* If we have to escape something, scan the string for unicode */ + Py_ssize_t j; + for (j = i; j < input_chars; j++) { + c = (Py_UNICODE)(unsigned char)input_str[j]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + } + break; + } + } + + if (i == input_chars) { + /* Input is already ASCII */ + output_size = 2 + input_chars; + } + else { + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + } + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + output[0] = '"'; + + /* We know that everything up to i is ASCII already */ + chars = i + 1; + memcpy(&output[1], input_str, i); + + for (; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use the Python function simplejson.decoder.errmsg to raise a nice + looking ValueError exception */ + static PyObject *errmsg_fn = NULL; + PyObject *pymsg; + if (errmsg_fn == NULL) { + PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); + if (decoder == NULL) + return; + errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + Py_DECREF(decoder); + if (errmsg_fn == NULL) + return; + } + pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (pymsg) { + PyErr_SetObject(PyExc_ValueError, pymsg); + Py_DECREF(pymsg); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + if (c > 0x7f) { + has_unicode = 1; + } + if (has_unicode) { + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + chunk = PyUnicode_FromUnicode(&buf[end], next - end); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_DECREF(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } + else if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return 0; +} + +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = PyString_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += PyString_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromUnicode(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromString(numstr, NULL); + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_str(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_unicode(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } + else if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + /* PyString_AS_STRING is used on encoding */ + s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == Py_None) { + Py_DECREF(Py_None); + s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + } + else if (PyUnicode_Check(s->encoding)) { + PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); + Py_DECREF(s->encoding); + s->encoding = tmp; + } + if (s->encoding == NULL || !PyString_Check(s->encoding)) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->sort_keys = NULL; + s->skipkeys = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + + PyEncoderObject *s; + PyObject *allow_nan; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, + &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + return -1; + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->sort_keys); + Py_INCREF(s->skipkeys); + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + PyObject *rval; + Py_ssize_t indent_level; + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + rval = PyList_New(0); + if (rval == NULL) + return NULL; + if (encoder_listencode_obj(s, rval, obj, indent_level)) { + Py_DECREF(rval); + return NULL; + } + return rval; +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = PyString_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = PyString_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = PyString_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (i > 0) { + return PyString_FromString("Infinity"); + } + else if (i < 0) { + return PyString_FromString("-Infinity"); + } + else { + return PyString_FromString("NaN"); + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_list_append(PyObject *lst, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = PyList_Append(lst, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + PyObject *newobj; + int rv; + + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr == NULL) + return -1; + return _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + return encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + return encoder_listencode_dict(s, rval, obj, indent_level); + } + else { + PyObject *ident = NULL; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + return -1; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + return -1; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + return -1; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + return -1; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + return -1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + return -1; + } + Py_XDECREF(ident); + } + return rv; + } +} + +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term, rval is a PyList */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *key, *value; + Py_ssize_t pos; + int skipkeys; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = PyString_InternFromString("{"); + close_dict = PyString_InternFromString("}"); + empty_dict = PyString_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return PyList_Append(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (PyList_Append(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + /* TODO: C speedup not implemented for sort_keys */ + + pos = 0; + skipkeys = PyObject_IsTrue(s->skipkeys); + idx = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + PyObject *encoded; + + if (PyString_Check(key) || PyUnicode_Check(key)) { + Py_INCREF(key); + kstr = key; + } + else if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); + if (kstr == NULL) + goto bail; + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + if (kstr == NULL) + goto bail; + } + else if (key == Py_True || key == Py_False || key == Py_None) { + kstr = _encoded_const(key); + if (kstr == NULL) + goto bail; + } + else if (skipkeys) { + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_ValueError, "keys must be a string"); + goto bail; + } + + if (idx) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyList_Append(rval, encoded)) { + Py_DECREF(encoded); + goto bail; + } + Py_DECREF(encoded); + if (PyList_Append(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + idx += 1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term, rval is a PyList */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *s_fast = NULL; + Py_ssize_t num_items; + PyObject **seq_items; + Py_ssize_t i; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = PyString_InternFromString("["); + close_array = PyString_InternFromString("]"); + empty_array = PyString_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + if (s_fast == NULL) + return -1; + num_items = PySequence_Fast_GET_SIZE(s_fast); + if (num_items == 0) { + Py_DECREF(s_fast); + return PyList_Append(rval, empty_array); + } + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + seq_items = PySequence_Fast_ITEMS(s_fast); + if (PyList_Append(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + for (i = 0; i < num_items; i++) { + PyObject *obj = seq_items[i]; + if (i) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_array)) + goto bail; + Py_DECREF(s_fast); + return 0; + +bail: + Py_XDECREF(ident); + Py_DECREF(s_fast); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->sort_keys); + Py_VISIT(s->skipkeys); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->skipkeys); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +void +init_speedups(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); +} diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 00000000..b769ea48 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/decoder.pyc b/simplejson/decoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ae9b3591ee9c6400d5cd09eb0a05999ef680bdc GIT binary patch literal 11292 zcmcIq&2tmkcE2sjACi&37_iM}!weOf0h^grg(*VjJp*RO4@U6XFwFBak=p8(-B{|D zyIWwr$eTbG*{8C4o2;@-Ro*I9S){Vde~_xAD%-5H%x3cYo!i}#{XArtI09eY&vVZ` z=iKvk^}GKnmp=LJZ`T?s`IPbfbNogBXei|K(wGQ9BBIomM%g zsO}crSyW1_%%C!%?rxlw9p;pSt)|pl;4v>toH8a%Vx}{N$}}rnGRq2~eZplA;DT$P?H3UqU?swTMdF{uF3{A%N4Zo6Ljgj~FYVy)e2 zxoy{v?br)^GiaK>SbtJ|;5Gus4NKnl_*5(4wu(=oXxqDPEo#`l?}oa!L_p|>;?VQA z;|Vl}0Epz|(Km*_`<&WE0Xgwd=G0q59RM<#;7(q>%B$d(GWr-RC=>F<|0C)z@@hf% zT|U$|hqb<;j5x2n0)+sRz^%=kIsn|9>Km$$GN&e*c^sK2RAUq6?S~J)xHa@ad#H8m z762->rtLSq#v6BQIr7NLS5e?EYplYhPA{GUj9Rl5*l{BOldw-TgRY2tQ^u4rZQL^E zjRwzEz#k7Ld|F2Z^dz6;R)d&tag+0Ej+E8x%Bi3Pc7fP=<)4!pmtg+Tl6ef04SCh1 z;Bk{`M1d*+w#>$HxqZyN8$s9C)PguU?!L?y(l|jwzB@rA)N6UZ>j%lAmLHrj-6$>G zq7k&tqnglb!6+N0+=9{IG01F6hR|ytl^7%wnM=2d3=!+kmbMrmx%$r;+T(Zvcpu)w#cPpfUu%#VsTeE0br_&zScdU=tb0+O+ zwqv;=7)iKC@}UJxPn?0goo*NcuTM~mCQ+!|8KayZg}k3Z84>rJYq&nX7x!YKk~Mfo zt9CUa>XoJIKU=K)`s(K9=Kif)H#he`Yrb6UQi88SbD@;_rpWZB)R(1yS?c3b-;(+% zsW+uwlDaSTveZMV7o{FYybRsAdGm_~4??0+wTh_y9N6&&&Wlvx24V>=B_(KECBg!Q zdH^wzt2hrS+W+qyg-rwu(s@u+{bCB?f~ZjN6r#o%JqYVKq5le1X(r{!7;2OlVKOq_)Dg+L}Q~!zXDTobN zK?9~>5%&c+Ptby$Jvc=<1Csqy=`m6y_pTU5AHXMww6XVxvRiL5nYM6HRxkWdRRN+$ z@8Ys}7aG>Vf$vh2n2s90sky`LJ>5635^Tj)KDu zsJ0|4q_Et$8~T`Q#$EU$38fNToaEqg1TG(@=3pnGq(B=`6J|nFbO{ayct>mZ+y}aC z&$mh2lz${OTu~6hGC7g6H)Z--LIrq2hfii+1IJ_0yj^jR(wUo@_IFHZK=>*&dtwu^ z@$Pr{BpmXlIY<2~itqo8#rK%~G5QYA-qdTNF1WT0%*~Ds=Z^kHL^o)&>rvCgIf7eE zFU?c`j)GQzzO$-U#LUj28gs)3s}EP6KYmtQt=@m~V6FOmr&K z1z+ZDhe5X!$s8~8#5ZiX(Bs(0_vrr1&>Fok5iqfkkKCSakQAZY3B~xg5I^M6;rX#e zJ1Q&N_I?UTkO$D6P&|iMf#+AmPY_OOl(Z{=eHlh@0r?5fnKY?w>94j7GWD(LvU zf`JqC&lys8dJwn3>uA8N%WPOF2>M&B$I7GY#i#%V0z_SKGiK~6RC`}n{U;Yq&vQOGBg2ROmu*XPY} z`pP(hQ}36_oP~r=cd=E5v0k1IT^z!K)E*||-rs1*FZ|PRRe&{oo-uJ%#0BXsFd|8? z(ZTX5o>sd^;LyND(&Pl*hf&v_4EzLe1L>09y%cZF@&x-ocn^xnVG_SkQa=fzN$O`@ zd`5A_1{BC7K^(>MHW>s4U93jDDF;Cd)9I<#I2#}sEUCU<0mHVAfmBY-S4;! zF^nCg5RjednZDh2WyT#bYJ(YfV6IzUo6bN)8#5RQwq5}W0$nIjD(@RdR^Aw_tWcwi zZnv(pM(ZP5^jX+-EiSf}*hST{9tWK@x*kkISwy$OZqfQnx2)T!TMS(#e1x;)*8+G# zl5Nr$)pmklR~v~pqPuHvBI}R1Oza)40?llcDS6TORPwuSPmBf}q^^6y%ANL??%orj z+Fzj+?W4%6qA{AEfzB;M51+yJENUk8yc#pk7Z9H!Mm2M#T;7;L?~HK`T6zSn1$;2XoBhJrE{ zU{GwdGDRj*w1xs20}WMR(g^k;nVc$Dt)XC1g9L~P{pJwRs8_)p&_dJA0qhPKPRew?=PY1weHrc9v#MaEM7nz1GMM{ zI#pUX$kly$?A5j&Vb*&tHJt%{sTWy$%;Fms*&UZ#U7?s+RTfWJ{E~&mVsI&mkKDS* zdd88vEWSl?!ncrl9^S&0FtmIBfkt!>gYx;Eae)2 zA|KL(AfpE;T)ade+(ijr1WXfI~(x*y^F)w7aeNjt21Q&BLFgZH|5_TlFn} z`bj<_ZT~cweix~f?9(@^NLFV}seA~YBFbwqavxCucH?b_FDXuB*6YX;*XuV>Npr_+ zF+o$WXSwBi-LxZtfs}_AA~7F^Zqx~U#=4j&jKL(W3~!ikb`UBvGZS><$a5q&qi?Bfdh}-}lO{DA$yzLN^s0m-IUPq!zB6*w&i3CZ~ zAX~+ZPdZ|DBPPTGin<$iLKo!qd!PocSG_LOBA>KUh!R!jP;13IAWfREFe{QJAJS%v zOQs24?bwm&2N@i<*@WW?(jv>1sla+Yxw}9!*(jkdx2|UCy9RPKNF%g*X5gbpkH7;V zxhGjMGwQ;rj&KU323OYaudSJHf^hdnhJe-@;AztgK!#p92gIvgK`dL4{-Mqa^RCrT;95r4d< zR5!X&9JDh)j*FL0582P(B*v=ZE^RMSLO|M15XOS*P3!4>v(W;b%pFJ$F3{p3;&f~< zK|gr@c;aZ@WSAT#j#isM14l?Gv}E4PjAtT_k%{!IhZ_tz0nQmLSzUp8uVE92=5Yi8 zVL)>&6^@OU_+AGT4IvFUi%#rP9+{Vxo`tJ)*iez-Tj8!kzZWk`g%!TT9~Un0c|b_hT4S+2k+u;=sH8fo@9b0IDQJ%D`Bh< zK?38Un_zv;Vfw+k6NHnLXzl)!>iV;l>a%rOQLAmcF@pYftp+`xPEM&O0l4*okoZ#& z4M}Sb3qJ(nGg#UyUYCUULsK>B7z5~-Kz@)R34zFMHFb>7V_2IkUb3jOVBCMg5dkw3 zwHn`jYqcu~fg`?Ct4l@XTBjwq${PPQm?2>DfQD9#?4O|mlZUJ&U%6oZU{_I*hfYMY zZ)p<^#;kdD023>t2=uu92$O$m4k;^{)z=Lfr@^E zLcg%-EF1q}A(4zJ_ zD3Ow1@tl3o0Jt#FTir*LVN!MJU&gR6wBOXUy42y~XXqOQi89YZl#~R7j1L7a1WM+g zEXOVK?-%4WQBxz=CI5ZRkRxPFX4EB2{u-6ug#n+z#v~IVTz%nRM!XH?ArpbOZhXK` z$U?#@$y$ zJN9cRI|`HU23D-lpJzf>%;Ujj5?O_+Bu69@S*tMz3w2Aetkv3q(`~Uyajn%r@mfu?p*qik z)PjCmd-3%eemVAZ<^HPZ{5F2vwId>Kam&Feq;?WNBeN*P`Sd8G7e;eMW41CiA-^NE zc8M2fgswdQSv*b5XUf5E(w3OD#jgfM|Hss~Lt`~Ku&#sG$brph3_Xd7Wx)>>;RDL! zqmU$_5!~E_Uf-@-_nxe;ikbTcFQRLY*Q%@XVEEw4vz5n>g;Wn8uY9#`@kenMACP7E zX%NALOSS55qbk2~(yyg|%1Pu#BWQQLmQFdZKeO=AdjIR^)km`1y_Nfq)*r8|e{GTA znpbVszGBfrA?9?zwHhFZCDn$LdM7cYq~iYr|Dtigy{pCROHoL7(J_jbKm5y0> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/encoder.pyc b/simplejson/encoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e59d372a7ab88749c287a7a2a77dfad41ebd616b GIT binary patch literal 13938 zcmcgz%WoVu6UVGPCtt?yCBhuPSIZfHJC2EJw=^C=d zVfU!JMb#3`xaN zKxe3_sjlZ&Rp0mf9@YBQzmHU2|K^)Jb(MZjryf^SIHD|k&>9s%L0O-t?Mu>rKs^|IUi(4QUR%m9E3Z#| zjp-^Xee9RVq3m%$9*5PcQYCLt6}+;#Qt~Ru3^6l|%m_21$c!;Fj?6J;jw5q|nF(Z0 zsw)ph)aF}Ck1B7HW8Qf5ntCv%^oa7ul|H8Q7)!k=W*U~7{2EJo@^9+CKCZk|Lc|HB zC)9(J$~&!|VY4QccSbVkQc^AKj-FBvipqOQ=~EoSdzsZ=EA_>L(@LLF8Wk@q{fg4x zQy5jhsjS#LGY=BjivMeT=BqeFl}sVgQeInjV;v`vAFR~exbFL=!7v)EyKA~;T1D|G6g*tq zym)bLar3Rl;^Oq;@?v8#Sd12vNAVacs}E85X#3$W7VWv;dNdOcnGToOq0x907x28e z7<2kX^Dv$8VcI+_OUltJd}MK!^Hg@xc&W$?`$Rm$iAh2 z%uk(DaOx7i*YSacf#GCtmF18)Ll(Ezkd?^3 zw3kgeR~S3o?IWg$yLEs)r#TysQI*Pt{9L&y*IBcK#OyHG3I`d^C@U!_zbFTbO)sg( zWfh)K77w8>J%k;l0!tB}QidohbWi$~KhWU`9M+UqBwKK{_!^RoEdu-6!u30M=E9)4 z^+GHXbEtOGn7brP;S;~YEPsp)i4;b~aP}fExl1SEa``SY7(|flrfqyWZcBA@g!iCOODgBW zJ`VR)WJ=cjU5L~f%+T6O;rsyTzlqwIt5c;ntP@swJRy8{X^%SV_~Ds+h;O`zL_@(r zvXFvxpq{}-qUfRfb$|8`qBbc1P>W1SJu6e|gZZAJBUF38WT4v7azH)9I77-YycWZ1 zP&YRd9mM`}QyU4pvYI5V%NH&X$Ige*%2hiBGrJK+8610V;u4eFb>O#-&l_M~Xt!Em zwb)KpHCh8MH=y|JaQU&Wqtp#NJK1XK*k0bUJ>78I&E&G_`R3fg&;GJE1^l-&5~vB% z&$gjFd$hNu<(uyv{Om8w#`OGpl6RNPc1m_unvQvp%(>^$+4SRN)=t{c3F#o)wi`w6 zRyJft=L?L9EuXd9flod+!)&LjF&p&7>GT7T*$jh~S-a5;UF^fQ9kkb%b@YE9^Ip`> z7M%{VZG&67%3hq#TW&M%8UW*N+l`*G7ax<*k*(7Fa_!A#r|d<>6t=q;+L|H-USIVg z3L(szdbkq!UurvqU__Zb%-V5#x!!bRBN<{m(7d!=8E0wfd@3W=nWZIrUBeFY>)M(z0X~hZcz?g`sSiE56(GzjPteGLn%$5#Oby5M%ibyA{SF^RtGg4If+UCw9CV zwws<^cd-Q&;<(+?kgU10vBTbJZXG7NPerYf(X41A~2rVt6M^COMB^|rAxp90qn+SA<-0GS%n?8 zlU7?6veI@VH%PPwtB3Y7227%S_6+n5!-AcBI7I7QR|a48RdlZ>Fs^MK*Ikm(L;rpf zIy;%sumOvl^JXKOIeQmUU)A-sTHTN8?WP;`EG4iB(ebQJ^v4hNQ@tH0;d-j_yhB!F zW|UE}Zn8%{cBC6R!tp`Dd2PCUowH2Bd?>g$PesbX5AYlUoCgu29mVu2s~f964rwD5 z!!~5g4eYy5bp&kQ5CMaD@sRkrOFW_M&Vo72@d{;j0_QTe#p&qX4hVL*Q426;_n`*% zygNT@-!b%TnXQ=19>Mdn>U>&b5O}s5%bYm2dxW}Wz3M*ou_MTnS1C7k+|qR)4w69( zut2NngK1{-!Ilj()gg2^4epGv%z7AO1ox@CP>=jpV$+fKGE>ALrZM*-2%p)Fp8bHV zW$fYFpSjJpK__ymA>VA5XuSR63mc}E$#2g1fdFF4{c!%tG&nIg^0CwVcPPdXAQyHH z#`&qy1lScEXQSN|u8H&t3@17B1Q7yJ6o%vT)rjDmP0S#jGsb}DfNamrgdBS|_{Oz8 zcWW*Q=kUY5^7pOD4?J!yXmD{h!CatMa3W6u`Vin14Nwt;lG(&a$*IsL=wY@zRVC}1 z=0Uz}8$6vPTXQWiJ(n#9Nb2)J(&?M@nZYjC$ooQ&lrWZZ(d?mkda(X=l-PmZX!-%I z%j|qbU1JSYO~N3u1u)%uBL}@KpK_q~0Du}B<^tEjxJJ`q8(q6fOswUVvplrU<4iui z*mA<{IHve6=&&{wlv{!U;kVfupK2$Hk_Ey-=C^F=3aa0-f+>G#S(+7CE zzzP5a7^a1A!gH6IY9({pjdmc63W8uJfHLw=vx#FOc%?9%(qoT*o@WlwqzT+VCw`%wFUZfNwz(8gUhKS#gK$?t%2KJ%tZCcd3ik z`6}HxaZxj!D?Vtq0!v2Gq*oUubW3wQmM6l7Adk4SMns&G9Gu;IxilF;Q4ga^s41mW zsL57R6<43Ga;%#Tht8_=5|fviyuyS5PUlr5;(+I(5Ie82g2ADq4GRc;wHno5tp-Oo z4G)|zTb;iVYtkCQU!4XdzeRknvn^MEXq1WBATk5{Z3;iXgB(M)Y!|a-K`Ir6Da|wP{R_k#U&72DfE$O=#z)a62}Q$`&BrKK_y&^+Jz^-jVHKG5)fU; zTEVFj(F1sVZCQwSk_x0l3P6Y;Cj!Q;adaGFDJLnKxNuSVBWY0@{rxj6$?3TiFbKa- z!_#8O?*@8Z8jOkrOGikuO2BsHUV`9DWH1YHj@tAgj?id~yjpLse;ijP=ZRz-RYMQgAi$W0K1Xjgw-SRc!Hrj>nX!tA#)qR#G zd^R{$ev`=AB`RH-XTPeTwiD)oA+=AOI!sz!_E!u!hp3VDtvIZ&>E!%IgdSBHZ00aY z5)L7k@GF-!bI1@(1Y)rr*$tCi#Slr%4HLEtO$bnOnKv&cfQX-wH%gf)6X~GoR0|`2 zCA)xie#i#+9^u8`ASW@x5&aYLa)+uQ2PdpLv4iJID3^5-HLwEwBTIwjVbXw8gP#aH zf(L=sQVi}!@w6!ho~OkS5EKugSOQ?&3G7Mh47d>YTqQA;WP?&WNszk_+eJ#|<(dT{ z0I|w4w?8hUDh~q0+@ZU$xPiQ|tyd7>mtgTzy4K*SG(IH?H23l^+eUp7&^kW2!;KcU z7OWBjR3NQJUKL1#Vb~CJwgyL)d2UpKxaxOsMkWN1ME{H_!Zt!?Kl6Pt+`yW_~2_FXbZEOm0scYc6J*|c>xT7%=|{FwDuSxUsMb@jyK(Q&#% zA^kgi<2N{NrD9bI{n$aK61Ue{vF5PWT9xC#P66jU*#>GY)TE`sC8Yd$%fTx z>!H_f@{y~n)x5A?tC@}9mI$B7O@MPAk4A@bTCtX#cbFqN9X^rVQQYJ^G!$>)8&ie% zzua$`zp{1mQhB0$rq0(s_+>i+Sp2U@cJZ&Ec8hAaq;`kYZok?cP`iU_x2$$6YPV1A zj;P&HwR=qM9#^|3)b51ZJ*jpl)$Wwq#Vag$)x}c3tJLqI3#zQH(a<*^4YQa5F~x8; z=6Khz-D94jo5V3O@BctvPTSaX#=*NuMH%Om+B*d-p^(mtnM#50$Myr)KxRsU309vrkvHJ6IyDolA6_`4>9eD-f7c@VtHpxu;3obia3h2-Ep;t z7q0Twdswlt_@QlZiSjmvqFgvgi$`Kr2-uid3dOq#L!26*hS)t!^#nSL zA=WS%Xm8c&`LXXB5~>1%6x{5QEbL zsvoV4QaXX!&fg?VGMHtBKDa3+H)gRlTU4O1`tU2TMqbNd?KYU;P*@wz*qg8hArZSP zg|*=v*6>Q>eprJ{bzlu_56jKqzzZ;Ln3X8yk0m1>18mI?XAGRmn zxD5UR%QR)^g)(9RND;6>m*9^uBsv&KqiG8d3B^06ctVhl#qfPl+>l}}9H7Z!00@fj z=93hNyD3<4s>!aY@Kze&J_JHP`4<(u1nU>jftR}ky`WsCypb$Y2nZKUpAgHrAG4MS zHNYw%99ilrE(}XHi^V%%tOwq+3Nu*9r<9h3(!4ZXElUOZOj|%CSH)7|8b?Sc=w1}3 z!J@jRNgJ8%Wi__117&$~1V{FF`7dO)x}cl`<#-Tb*D| zq-mejQKa@uod}$WV@J3K(XN(W3jV*!)(uSjCzLG)AQ<*KR0O#(OJ%Q#&NWkyAav9A!X!83`|n^7BP$#5&1twW-nStC7(@AFvmX$9bO# zjdSM|61kNy!DxxuJf8)={q$wa3=3f7qjO*61g@A^*Hv07@jWE{)-WPu6L4RK7;)rp mVGM#ZW|bfaoxh>Ni0Ut#Ee>1#g>t!kdZaM<*Njmf_V?dj3=-%7 literal 0 HcmV?d00001 diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 00000000..adbc6ec9 --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/scanner.pyc b/simplejson/scanner.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30d94445f0a0c941ee46b6c4fa3bd255e662f6ef GIT binary patch literal 2340 zcmb_dUvC>l5T8B&cj6S1v}wr$X-SKd%5~uhP(>h$k&IGTvRgn&D(iB$PA@s%o$fXz z5y=AtVOzZO)Mvf`?|cM43}2x9X3kDsMZDG4&dtuw@3*rvb9;aNTPd%;dewAj{43%4 z6-F|IaEW#x6}c82DcVtVx+v2O9a-dOXeUeR``{L3b&d|p6jnX8C{7O5ZHEFAz? zAg#zNlA9ByB(hAKY@MOa3jk)x&C{>gutGXZ5r}n#b~3zmr&{2M79hUuJZY_%@JI(M ziDL(Wj?3O_{909oRWl3Gw~uspyx7K^k~N5GZKJyJ#ly4RPimh(-*ea3)~b6C_T2kx z8`WLic)nY^|9nHH4ioX1!N_1FzeAi6c|@aBQ8X%x#iCJ>OoqHHjk03N(I_WMo<=26 z3N*4rDbh%ZQle2_lroJnG|16F(ZHfXi3VjFWN83no(4pN0u74Po8grIhTRJ^EFc*c z;%PZ7ix_-l&P?(LET?l!e5UBuxkXYKLsNw@ihfaPVa_aOJ+vrXCN-4f0ET2Qq42{D zU1X^fye7qd8Sz_%UvW&&em#p)*I|i5OL5o+BD{!IPQGzefF z6A7CA^0ai@Er@-d6dG`B1h^A~DXjQEu+jvEl1#%sOJU`!uo>QM_7a9@5dw^|o5F#m zm@p#hbC9APku$HIr9^j}vIAKFOyA@i3eWG3{ zIxkRwH)e>o$Wl5#EAS>B(n%yi{GG-=Cow+6Js|1g*nk3Pu^1&KZ#*i zWPmpauP=+>*!RU|WNPALEz=&74Hp(Y+fOta8&f7~dHkB9=1}c)nG}o)>uL zR9tHw-){+v+GIg47gH8jSD!TEIE+mN(~b$FrqQu&yfBbpT4A6?dCF07DBnmZd1wc5 zcpN1Xg$~@R?9cYZ#9nY9cF#SLkOF;ToELU1A@vPkZeC#YfsTc|7u!zCa}voj)=8Cb zBLVBc30-F7Lqv9*=q|v9*V9?g4{c*6TRYQBb{yNM<4Y0|bc5smJ~m}+xPPb}(|r+! zM`rGl%L#+T*r4ZICZ$guC0}yOcEiBYQ|sw@tMI2}1ET&c(Q#5g@?y{T!9(P#W zX0gKJ9*esW>9>Z5Q%*a@zrvmZWcT(e3tKW%O|TMg;kttU^v-rjDngP~o6NYe`? n)!s$2k|Nk1^+WgA*I)9HlN%6uHH!vYlm;tVdFUTlrBD9@kb}!4 literal 0 HcmV?d00001 diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 00000000..17c97963 --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,23 @@ +import unittest +import doctest + +def additional_tests(): + import simplejson + import simplejson.encoder + import simplejson.decoder + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + +def main(): + suite = additional_tests() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 00000000..af6463d6 --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 00000000..1cd701d4 --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,22 @@ +import decimal +from unittest import TestCase + +import simplejson as json + +class TestDecode(TestCase): + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assert_(isinstance(rval, decimal.Decimal)) + self.assertEquals(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assert_(isinstance(rval, float)) + self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"}) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 00000000..139e42bf --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEquals( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 00000000..4de37cf4 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,21 @@ +from unittest import TestCase +from cStringIO import StringIO + +import simplejson as json + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEquals(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEquals(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEquals(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEquals(json.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 00000000..7128495f --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 00000000..002eea08 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# Fri Dec 30 18:57:26 2005 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '["Illegal backslash escape: \\\'"]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 00000000..1a2b98a2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,15 @@ +import math +from unittest import TestCase + +import simplejson as json + +class TestFloat(TestCase): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: + self.assertEquals(float(json.dumps(num)), num) + self.assertEquals(json.loads(json.dumps(num)), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEquals(json.dumps(num), str(num)) + self.assertEquals(int(json.dumps(num)), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 00000000..66e19b9e --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,41 @@ +from unittest import TestCase + +import simplejson as json +import textwrap + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 00000000..c3d6302d --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E666, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ], + "compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066 + + +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) + try: + json.dumps(res, allow_nan=False) + except ValueError: + pass + else: + self.fail("23456789012E666 should be out of range") diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 00000000..de4ee00b --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 00000000..f591aba9 --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 00000000..97422a66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(x) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 00000000..b08dec71 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,111 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson as json +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, "xx\xff") diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 00000000..8fa0dac6 --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 00000000..6f4384a5 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,64 @@ +from unittest import TestCase + +import simplejson as json + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEquals(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEquals(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEquals(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEquals(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEquals(j, u'"%s"' % (u,)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEquals(j, u'["%s"]' % (u,)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') + self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEquals(json.loads('"' + u + '"'), u) + self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u%04x"' % (i,) + self.assertEquals(json.loads(s), u) + + def test_default_encoding(self): + self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEquals(type(json.loads(u'""')), unicode) + self.assertEquals(type(json.loads(u'"a"')), unicode) + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 00000000..90443317 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,37 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/static/ajax-loader.gif b/static/ajax-loader.gif new file mode 100644 index 0000000000000000000000000000000000000000..f16ebf7cbd4f28620c0daba2f4a36ae0196b3d4c GIT binary patch literal 10819 zcmb`NXHZjX->;L91QJksO9Fy4X^NnNiVC_BuprxlbVKhX^w84?z4u7CGf5evn_!Tr3?d(NECJ2Pu0AJ)uTvu54bx_-a^t*&`j>8i;TfD`Z)060EAmb_Eg z)wf#RL@#+W)ka%x?pW*}*_fIC+&j2FF}pLjxVO0SWp(}A+xH6{y(=4A-?w-72Szu) zd^`O1{b+b%YyaTK$DxhUsqNjpgDLv%hfiy@)VYJh9~E^o`Bf9b$IM!4PoLaT)mD=~ zFUJ4`006?j5qF#|Ok6F@g*p)Y6Z7*nj+PjJ@F5rmKRHY0xC_AA8$W@FCrO7u?zADA-VZ`x?tC|{EeZ$+PS}L}+>0Lu3YulS{ zT4!^6L+^W9RZs87mhra9j_Iz!%2vK2CI%TW6b5HzImN~dVJ!O2!OQxY?dQ)g-}ZOc zHbx@}1$YJ+?Kz}{t&;P~OkgsY^BS?Vr*pM7Zz+VGy zGfn8J#m3_yoX+b#a zDpjI8oCMB@qx33`Xw_VU0cm=BragGsS+$bevLq%hnO&KCcBH=3mDfvuPp-Rdj{6&R zHa8Kz$xJs|?B}2IQQUsp?)jr!!0=#iu`;qPU^}gDYqibg#q;S@SNLF|EZL!#x_2db)8GULs#w(5+~toZv=1YdVas3W&uFU4AzBgx@vEBz#;l z-pu9tZ)?LyrFGoG)7w(&W-(&|Hk`4;RUNspd=I*5q2ess%;1CmgIS<4v~VcVWaPQH zANrnVeu}B%aK0|an0w{9^-)|O9DS)D9?uwbcai9O^ScC@pw(ezOGyY?wF-n!Sm#Ke zZy3)y(6~018z!}0`0yn?_&rY+<>YT(f~^#{5m)wlacRx^dP!x6)JAFbi0ww11o>n& z{9EsPXXod0%vV-{ohI1IX%fgQW4U^o;-lR1D2o$i=iuqZ>VxzPB9Jhr87P6lwqYLL zzAmxh0lsOW7;-|8F98?s9psNE+IvPo(qi06Ws!-wc9~e{KMO$+^=Bn?AdpZ9E6B?F z1A=K_u-m%NuW@h!26eF-W1SeBa_##tKQP?qva-5}9POT49d`BGnqS=b_~meVq;S`! zgb_=kIvvv?GI&6n9GF3ujY-D@NP@D-pFf91Cy^u1oI&?!IJb}(xK)nDnXbl&f}s>; z;#;8si@QnEHk&J3gSNcYVjbRBWRIc( zvKzVb5N~!MFj(-4jL_9=*-X>+JQKFctTLloj)kA> z9G&XPDujY_f!g_$c1eN~62X^~Q5s<^MdWux&z|6ZtNq_x%Nv)pIs% z<@aqc7|CWG#B)lO27spFDxV{ zdYr!DyZ`O|uSUbIfsI>M$}+#ezuJX`W_;eY)V^^4y_63+2J*_W0U% zepJ^$z`V=Il-pRMH-O)ERX<>)`;978*j-52{u7^zX^6D`^EA)}O5Ge(0N%+LbVWZ3 z7{O(%Xc&Ih8OP#|MC*!1{kR+_8YV)k6N#2`rwV(Ym*O{JWIvY#0x~KwnK+*jILrpH zgsEGa3u@KiQ+VRWO+tOg#nRJmpN$nKR4-XirTUoGoH4@A*S|=2kmW7Na^0?<&XIR7 zOh~8kdrdpif%1ohd7+!_eVX}T-u z5(Z#qmf?p!n&=XXnBierie$(vu4-qu#{&x`ScFc29Zax*vO;;4wG^3Ly>pi2dQBUe zbn0P&ztrG*zPEG!kCk45uPjJ2c}{EBa&mFKYwyEbh)Y9wN5z&wFSW8F5PKN_dY3b9 z;`7C$R?X?Lu(C_}IaUiLI}_}`M=#WHpaA=gENGdp>e$%g{UTdV>#4Ev@4p1+=Eh|Y zro1mdCCr%)mL1?cSM=v5Lq9Hsj%wJ?9j;^ro=bC<80f*9MQpkSM;v zL4@!y?`T^T#xa7`-zO=YObN=sCp&rsK!P*uQ*+WYZ3q>q-g%jJaSkZK)98~=ryUBy z2k%0_Pmlq!a)q!m4GqDWdi$e#2Zx7ykyeufGs7^bb-%U6(h%L%cXMWRWg-Z<_=RaD z#O82mXO+I86OmbWX%C*m%p0DiNZOz0kJ1LV0C>PqrE_SsDg%>{>IpYgh~2Rqg2z5A zl6$I}Fc>=JX*Y)`iIH17<*mx=<%c2iX8DHQdzM=1%E1)FU8;FN(#6;jGWN^Mz6`Zg z&<)Aqf{`qahF9kBr`Ecq!X-Ie#WAW3V6YsIbQW-t+~{$LdQAb4@DSW*OFC&20rFy& z2lu6B?cSk=GAiD6KiHjZ_Y_mQlBneTpfipWpo2P4u;}B|Sh(zvl(O1bEWwtawu#vs zDoxa*pm3GTgXO|jcs36RP#Groh?);nb#dXA(O`Z1h~(A`%ekL<(?k`t@zKbxR*vAa ze{vyYUvs+mw~hW<#_t);DWKTB-sc_>zRKTrezuR!80~y^%k=lqqYE7S#8-7r8!^k* zu@c*hu!cZ3ZM2|$xRF&72sAaU>RIVf`2g2N9%~6ACNs@m=ChA#32CY?oRJsOQZWma zc%1tn;!=!N3wm^FY!rGnMkz1+0{;Ffh|Fz;WC!gr9iD1 zMbiLDHYx$kZmpCK5pN38^Ko=!}~{zBHD+P|(rL2v)lL1JiZcvF1V8IYn36ul$2* zPXmQA!6NM!`ns)A&3bQWMdGq~84qeH)gynuzVDvLTL$P*=d`W^bhACum14kqbeDh~ zp_|ifzbq4R#1?r$S+!ur`v(`>1hn)7e!bfAk@|=bIq{*e2U%I=tP_;ClNTT5apGYplxKjaubZEPcYqHo($AKNbHXsNM!Sbn zpuPlK5683&f*%7Glan49m06IMkdz#S^YTh8!&k>)Q5LLLGDaR42)y$IEGJ~K4n6_P zAQODB*FVbU)6jIUwO{nq^e7bh(SKoaEM{_GXoP8VZhQJOYKy0duT_T$K{jZrJ6 zfw0{Fa!$S6Q1uq;)Ke8XJcRIsl`4m)RxJ=gjG{p(&pB06gdp>A&;ucY1u;hA@7ub& z=)8wX@>Vws`(G7+)8uU?3}IYiq!ez$5SjI=<^Du*7r}U{i|A0&xq$EHhYH0#xytqD zzomRr?TY89xoC~YI5bFd*^bjJG7If(LTG|fBTfvJ%7Y=zzH|H1k()r@ou!h> z^c2FBs_5;*M>-Ul@f1okzm#+hBjTc!zu%o>fp8XrgZ zb5XdTUO|K~2l0yP-o+uH3!TxU&-#3XzQP71v?a&Cg6fBP7-@|Vyo7}s6H2Sq;U2fEjE zkDvm0ep{%LSKY)_pu9H|@MIe5iAgYjPT~W35#u0~vye{i^DsWa#1<&+X<|=^;C5lQ zvC7R3vpc9)xY6LLX${jrkI!8K%G_r?vWb#kb?{IwdLZh)@Jt)k!<`T;XvQ36!|x(V zi?#Ip6kOet_Q75*--oAbZC)NW6`Yh@Sd?4ao}73o;S*{qpj zdE1rZ@Zr1()!az|g|P~%@v5CXO^{^wIxodG^0?|%->BW?r@++ABGReRtzteY-tnA{ zqd+k@7@MV1m^)G@97TuKCtcQDW)hApI2*O&|3fM@EF2k?v=R7oyFjHyjE2ZICx8BC z?*5$vD%_%g2AuT}riLuc< z#TDg;X0vJ@E_Km@`d`2|0+puvSj8QxKo3Mhz5?&)bGr2ku-sVsc$fL%W%-zkrwSl< zHeNm$8jpY2H@p-tP+oyKvQkx(#ybswbIwySsZ2k~MU_I6Ni5HdOqI@bAp~`ma%jS* ztH6w0g7yo-tx|--@k4~tnBoOvqrN16SW zgW4D&i&oMCH@wLn4sL>_qU_#o5H|HKzTHwb0SPzB<=lHsSRr_%;GihDqkEyKoD` zNyxD^)TZOfkwRjILv>ZLWU$1oV|{xSc@Yvw_(_QQ_@3CkI0;TezVVX)6oq#;Ew$m0 z=wiI2DSrUtSfM4bC4hdtRR%JpNSTapCqPq(L~#UJK=Y{Zsyo(kuYdH+TQb>O(< zlqymlin{i7`BlJ$PUA-{ov{8{`3uIqUq8R&tlRl^UihlR5|EJ3kg^z+A^@#CYa! zQj!Ntzarvlw}!R_4x<+Q!1HOj#=QEugnA$Yn--geg{2oX17fHD`T@uwL=hO5vemwv zJg>L%GEK5UEeB^VZYHSm&{;AO{9#w|p_Nwa8%ufn0T?e{l0KsD{m8u^$PoMRFuGvg z6TWU7NL!IC%8XH$4Ek7h#N%{^M-Nyur|B`4J52DfD&f4QR9u=b&sS>JR<6HkIi!Kz zi2hR4$qW71Y*l)k+c|f%QPjA1?@azVYkdItQJ>N@S!yDA0?M~)vk=tW7X*wJ+SWaP zFHTE(KDs-)c(nXhvs!U|P<_IALv`qAO7q3$wW8oMO_*o|du}T5T|YvpA{SWFxZTmY z?Jdd7pn=ANeD{7x5FPYjGnwIU(x!T_g81S%{qLIA8R*Bsg3v1$(!^3Qt&p!?N{gm}IRq=E{aKwWGg%&?lZQ5u8=5M0ZLcR} zml9LnJ$$VI$(s4)xf(Xi(pkxBJ^acK&NECv1`US2+@K z!jbFm1H!$Ugs;zF`-Gab#;#&WcaE2pH}SUSP_j411qlldfsuT?ph3a0;kYPrG^<-! zLVyoLN@{$P&*?08+pvf{EB~-!R7O$^-aUfi?Gj?Ij{6TvTu7|te^AoX!-VXP3x?2` zMn?OyLt)*cGvh_E(=#Leb45doOP@k*yW#8ojc!A`i&NQ!$hFVV)IruEVXA~|s)~#K zW|#=0))OED2eeX%PM9Du;|2fYD2#s`MN-vFJYgeoQSM1` z-njF^horN>V0+ImOX?}2_jzu>o{HU1d|g z@gsXTU!(Y5E4HPv-`7phyJOMpBzEqK!^h9ymf{MNLhbC$zx>ZKhm!3~hyhk0k1s}S zdg)~NTVGDrh^g{`rI4mx_sJi!U+o&iL^CVXD&MSOyl-77Z<%x&8f7-PerpP2JlH7v zbpJ+_vn!-^>vm?^^d=kx71ajXz_P@Xyh3Ef0mmd ze3o=dp2bKK#@)s`Qi16&r8%&GD^>`!)PkD}i6r12dWu}LI_n@2;uKNu^1b9`B*AwFL-4nvGGQJ0!6bA#Xq zK^qYUHL0p@cdyhD^vOm_6_EE6p)J0v2^*=DWBG(d11~rQlYT@~FKq~z>q?1KiGkWT z_EXg5!b&dpAX{3*HL7Z5ScC`w#)IyPI1W@)RuEZybk(ZiqV{qoCmM@y4=Z|*?jw6& zXAEEdkgeK=haXram+vgt!jDlU<%oXf= zjd?VxL3|vAs)Qk0@Or6x9Y<{?h(R0+fNcFrwGQF#ku92mTo50adK*xj5Z3TZKq)EY zK2^yuC{jv>?nzt0%}0NftyG?2zoUE5>$rA9$|puKgZgf<2YLVov3$A0VX#>KWBVVD zJazs1{h##nha(mLO+T4zpnt7S2>`XUNnYY0<9(Xq9~MKjgeF7<$9SOvtb;r}VNMCor~N%bb0QN13qnG$C3!*FF;$er z0!O5}1rS1^rm}!Y!W|U4;qUYmtTUqT6 zaah`#bPO5(0u3$PU+F*kzTB*uDjGgpi`^Le=7KK317RoatDME=Z9y39{{aeDGM(5F zAqYGU7qxK~xpx8-v|;e)#Sck*oTWWubg>?@YgQ?nhkGIq(`9`-SnfS;)y_O~Az~5s zY*|a0H7LhiTWEz=pva8sPY%St2SQ~`bwmG@C!F)Nbo@#xs201T)ulbUZP4W8meIzX zR8F>obJYG~=6hrcL2z)6d{AtWhcJmMjBF?wEcQAd3f>LV16tP;71XtSyej&J$4(Mf zc=l}w%dQ+f{( zPSwy0?^F8H6Ee5eE=*xxR~n_f6y_O+UKH%k4Md=|(QroNEIJV>*PmzDZDO4X)q89` z7T()cAci)&+rg5=q9S4FY$qS9l6uq*kx;lP4oHNwe@!5E)^H{d*TF05y$AXVu6<2 zf}g~IJ}?`oRcly2j+Zd2U^f=AFBT}BD6VQeo4OfXmcl4qO!Eq)9{ap6vQ0MX%&=-H z4f!Rp4r6o=XfXl-))vh!6eB01jKwV%8%NbB!eO-o8phH5EREBYE8D5L4aYTO>(rWZ zyUf6=6SR`3m(!zip_})DDzR;F@wq1O26Ed>D#ET6z`=x7f#8-5R-2PALXGls)fxf$ ztrlJY3WK=5OCCq5yO^KVi-&0Ge06oP}- zG~e`&j`7ueaPjq;HdRnRVF}+eEERY`{rQXbF2 zssTI7ci6=Nnu?bOc=Sj9@ z9s{y1Dpm0x((Ve_=f-!=Lc4}7CVV-ZzYCGeEe?cDdad!oRwY%8yc7Xfz$9tEJFo@H zpQV9NV3)$?;CPhy&_w^<;T+26GF>xUZjdG#4(56S4Biu?;zRB9RM;_aTcIVgtr34V z(V)-brMXy40u{8TCubSlYfl9vCQ6bPOs(Kn5^8(#tK#t=%k;u6Bb*8>vAEl7EDXai zb@mAVl!N>fQpD8)pTd`cR=Eh7U=YyFa6HLgmXgBb32c2vD4a4{IaT=a?@p?hqa-~; zU$$8)JxX8<&RSBvoRVk26(F@)l3yiJ0&b<;+l=hgk1DOmXlLYbYinJ9etfyN{+wai z2jA!6`4X2TxuEnPA}Oa78s3HJU(R(V$|Lh?CMGC zsG4<=l)!dze4Vz%ve;wm=p1anG36pPyq+Jz88yliqFE7XkTE6{`ucoKi3KitJigp= z=0@teVqPw2CIj%U`h+KSEC11wKRo&0>g0d5(EHD zUnYI16H34#y~)1*1R^UX%+Ji-BNRr7^&>=`Bp!yKM4S&984(3d4uDzN;qCH$66}Is zdd3DsRE0QsK^V;c!3l_i^>hm61Sm|1-d-d$yBgVj5_6bZQap#pMkcV;(=#n$7}&zt z(p=XGPCj`Jtc`9Au5Rxv?PY@w7eCz&$YG3}y-H*X;s3)Epb4CduP#fSs0NEgsEl5fJ6E7xf#wA@dUw=)&B*)yj_$#CwHVJ#y4QFl!8 z!IB(ny|Gtq^dG?8#5+Pb3Z28Go=Ma6U`$F}St`pA7CyGT@N@svB?=E-<lhO5|E@f1c-g)%4-6as`B$nM3B0$`o#9 zb+F`&Vnn)Q({ktR-3z}2cE4=5a%=Sa8F?)?z%P}0uuS{@+>6Mqh(LKZyVZ@gkMe@P zTvcqr#eDuV6Xvk41Va0!0#!i~O$h+I50y2lIInn~7v@OBx#xfV9KLJ?N^8= zO3Eb##*8pg@t!>BYJUImx^FMgr!0BQl9wOiqZP~tmeZUtt^EAPR+MB*a$2&@l#647 zm6etMdi^-w`K(8phTIhrK5D30>|ExHJ)F*LH{s)GCcloUGk?r`F~>0e}+8&W_xm?DZA%b zl_IKzw|2R%>>e!llzR&Xzx9i_a&XLmgCRZ1C|`d+0x5tL=w?qKhr=T6@x-`De=}%k z7#8N57?K14nQKXb#5 zPjnMOB?9Ci%zQ$BCdKWc9DVvbH8c5qNg%b1FcF z2of7wB}+xM|DUiU^q6qD7DVq$k5Uz#wB_SkoJNlB=r`Ika$cG0DML4TZ%}kAxBAwhl`K(e;;AnS&v+`X@ zdgZn62e02{+TFOabTrw%GV(`LfDG&^m4suntv6y+7hemV3=$h{lt+Q9Reh3pSoM+$ z<=5>HRuCi>3cp}B&0?`*)wj&H6uVuA_73;un>XgqI~bK0hX&;s^z8DPG75CXFZjgy zYUnbu6Yrwo(6iR;!8sSRP)<*XBUu0)4oi0L-$jEjBQtIE9)i4r6;&Da$_sm-X3#P5 zB!noH>N@nMJ`9R=ELe^kD|(H`;nsuk^6p2HiE1ehl+`TU`9zD$K^>CMlM%|nL;^EY zLb46)sfwtNfZT4*<+Fzxc}0EE`nIluet|QEpQo32aJl7hyYSi$HEGv4UkPAzeKXC@ zfQQ{6ue>CHr})+TS2P_`>$wf-10^G#G<$^B*#~F5=j1-v#5l53iNE7nvvm)zSL3YcQq`!7K5@OdZe^d>PEMBXgo8Sj2hPG z6GL8=rq~emG*JoanRhRBKVUEv8@f^wU_LL_7_0j literal 0 HcmV?d00001 diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..ad4ca66a17637746a5c33e5a1cfc46e35754fac8 GIT binary patch literal 21792 zcmeHv2UwKH^Z&l}`q2ag8^(eim7pLN>;*+)iBgRs8VhO=>2e36#%@FrrA3LTNz^C? zMVis5!2-qxh=LspNE9QSl>5!zduQcPN%`ja|Jlb6yKmXq*?DJoXJ_X`h(HX&i9lw^ zR0pBYq1?SYcl~91gciU(Gc)eGgC0V8RtPn1%3U88z{|rC($!VJPa|~L5up{14#Oov zQz1N+j2tm!9Da(yXCZU--ZcXu>CKTt22SuNN@KlNyG}GYm1E;K!?)^yTPKTIXI70F z^-UgWI-;%qFFH>@xo^Dm*I#wEMnyGESP^15Y*rhCJl^uU$&5n}va?G{O1zYr(Iwf}wglz$%P9+ZT$XV*-Ln6V zJ59|HF@_vyId}UqME4|UyB2nKQHa!a=)Y|+A{;4Q-S%=69x?X5t{u9B=x~7k&z{q>evb`!UXIQN-_%iw8 z9Odkvo%bbmujyp)N5?-motgMJGPpe8uIrlgq?i(GbmX$4m$%Cr;^1(DIYSWfqh93> zqtux>eqIK)hc2~UoL(I5u>0_$!gZ52Jesv|?!6IL61Et08`Cmr$4uM1?6CAhi7{To z$0ZabPS1(!*NZoJ3*SuM)6*lvqOj_Rn1UxufAK3lYMI?4bN-=)MFE)$bDv!ZanBkn zJUXU2Cxt4ru6S6G-EXE>TF)Yf{xPl>oJ+i(4+^%v=q>%iIwr{ZbfToyk=emDEsE~X zO7tAv*G*O%y23!`n%|s7cMK*iz4ap7>4;Z)`N4o6;+1J*g7Z#1$n^+(eEE|9ow6Q< z%CaU?nlIY>lLsjk^!(9sx*cja`qJ66`?CyczbL%up8rK^aiQXQO8A5P6@wM@?IAt} z%Lo5n=v+W=vrd^8?;GHxo9ulm&3JtUJGVks<50Y>N^YN9`t^gv;U2fmb55R2PubJ= z`mAQR){N7$Lpw&D%F6bi)U5Nds$P2!&dJKx3o86-zRtES^Ib-YTy7Sm>`VQ0k#!1H zn&q(gme;MTeTuX5OKO(gNRK`caVnzQm}1{9kRY)=GYq(zV-~> z;YNFw#LSMLar>8<<~v(mO}bDyywm3EpLNVGxV$I!(6r#2p(n2$ij2xGSn2LkR$~(# zn|Wuu?%uR(%bOhvU+~L!vDLOyPu2QP`|5b<&{CecYt8+O`=2~&clFjDePFxYs9`^J zUgKZ6WA}@qj9k;QiXN3uO3&yW4PVGpese26wVQb>UY_HcdC#6YUi0_rlu`MOWAKct zzKYVL1K6^awksd5D+xYo$r~SdxqWPWf=x5!s1b8DQ(+s#U-h@Nlp{eZNf zy&-2ty~s{G-uk@NgX0fad#4d*C+?V@J3|;}6h4~ld?o8}+O4bMvF4&kS4X4J8pH9~ zEfiiJ&mS4*pNiZZerxO87Hj)wpW=c|lnx{TGfAL{*xYoo^Nz*?sO( zof*4#7g^oQn6=N!Gh&LR;5(O&UvctqaMt<$D&U};%DXk?A%%>v6eDEfZ4HEwNv`8}d1=5T>S`Pdn=PIm!R_ z$1VFqPWY7Uxos({nU(4GE`^mYqr6Xn&^j z!BU~`9wxe*j-qqs!{p?gH9?uB3y1z0CrU5cVnLiut@X{H_T&qX=X*W6wV3U2E_0m0 z{jJZ&Rm5dW!tFA`d(S@?G;8i=okQ0hCu9UJ>d=2yMf2VVhWr?HtTr!i&H$;mU(5KS z&i8jXCAI(?tNZjDo~|*A{VEyT;L5xQFCr(+Nd4hUpI@uiD3%XQW-@~|Tg)qXc-*Mq zLhln(%Z!JQ3oOn!d2Le3h0O8ugnir!(lU~1-J@5ZTAa9GA-=gXr|PQ3*xW72D5l`c z0g~jD88cCdbRCTMWQ$iF8XsI9QnhkH)o1H>=DmntJ6+*E9Dhj^v~g}(e`Vav zIltY0zOwW{LB#V%R>iqq<&O_snXsx=d49SgbGqO2(FHld!&9Hktcfa5$STZlR@^e+ zyB>bI_8n_m7u7}=Ou5G1zS~xgMn!FQKKJ=PLfoc}$L)m63$NNZOue0G;h|GHDR$dm+;%%GkOve&$!h`kTr16TKXlBO~Yjt>?@3Y@1be&q5IsyzkE! zPg{8{{N_;8#a>qIqIn*QbkEcCV9M(~C`~6cDM}uD$y1K#31CxSK5NjMUX93a#b#!s z5y3Z)Ki^u8b`gBOfgL)QW$wc_L$eWijMkMecq(W?^5v#-d3S;~iwPdI9ICO6)fz?e zAJmc?%J9^Atxc-V1jC*ejU4JQWbdFa!a6`1HrMD20N!zHOG>2@HKI`ljXKb%1OE#h zP$(4Wui%A3K>{cZfEK~w1*K4Y6i9d}zqTe3Q9czOlVrmE&@Kh-fR9uWK%oFA>UQfxr(&Fp|qd zNx3|X=J=Bnb@8cfUHgFhBz~tQLZJf@Iz9lQj6Mkc0=GILq(2NHm*WUIWpKGOlqVn4 zj!8!mnwWx+!)}CZ*MZ;K73i1(a3TPl>sxjC<{y@2QA5CESrXTqpwnbexM53Wm|-(z zph0V;xq!7{`9s|GB|DTllJAuIlATIjNg@E}^%{K!wUiX zO_hM%P^}#sl>#6@RX(|Za{>{062pt3H&YomZaf-6kn3y zD=|_olo-62CvK`RqrhuV+B%8ZNF|@$SSbX)SSBEcriKO3)aWHNHA+TP_MrgzD>$xX zDAzb1voM*@tiC5;p&#t}XHd5mml@hA8dQi6B7<{}zb9$k zhjaWgU&^Ds(gkcZ^kE&y0rZG+*~;~hd*|QdAIl#nN~6Efm->>r4}@tq;QdblyRk~h zZmr=nVRs1Bv?Is2HeIZb?yF}XCx7P{t|q0K$MM{S`=AEa0}WL7;1SnXmiG7o>p#q8 z&+#{)i6M(PS}Z=bY;gLE{A0egCG|7aIH5?Q<8U?qFc&A(GSswPoSwb>mfGmld;cHg zU)?U|A18q>pIP?|<{?-=wkUZ_@MVMy+i}lc8}l%rpaKJ+KLjvYu=ssE|9`;z^S1uf zCx4E$mw!o`r52u2t#7J6tg9f%_Jg^{G^`1KEe++1qyItvp`QfFQcls_T)Gx?C?3{= zo|8DPS)oP`j7ZwDFDqQpn-v7wutK>tD_jf+(SpVBj;uE;40IFep7-I_ud2D+`c>Eb z-%$Chz7wp|VFES|#_;MgA-l7dBKyu%^Dn|WfXm%k!Q3z%W91^Lp)yEnr1S&$zXt*E ze38TuYzLM7Q1kcm&y4|^8tN2&gA=0}tU8I+*4e&e< zaCx4%$1B_7&OZ)tW=YFsy zo@2X|B)d;g^Z#-U#(G2(gb*NjA1HW-0V9zl2-co2Nz}-{9{aNa@_)I0>G9dMU;}M} zy}+g#0lVr6Nw(?7@vpJtK3@Ksd3VGs`@cT>`6Kw(*u*OPfh5U}BRJWE9nFiag+0y* z2-Y&@I9Bx$S0L4(Ab&5Qu4=8uG+_Vo-G|GcR?XFTO?qXoY3#Pwb4;1D^%#1(%6=2D zYs!VZ1hAneZNg7MUH;PA3;l$D2z{XZKgNCrx_F;}h1$Xr@1X^-9|jv5_F)^IkVH2J z{7l0?2!g`8{4v1ege3?Gi~}C%|IYWz-!+btH;ebMU^-#hV*61ATn6C%Er!O%=IgNZ z{7h_na^p5m$4eLHoV>hh~w1fl;4 z-fQtyd#?zEkllo3|0hXw8o||h#XrbVSD&Q18hjkOG$E;OP_MHCU%E%bzxMeb%OCFz z1rTG2=WHxPo>&dI7U&R!PBW4)?M{=nU-D?5OqDN%-3WW%h)N+ZPDxV^Uvs>_w3$^M zZb!4@W{3^Wd43$u;yTmC2B#N@o0Q@8*GFPd{VxAlcALR}5|YPI6C?0Go}ng3!QOl; z_DQG#W%unY?Ue%i!yMpOHOC8h>tW6WpAg>n(r()k;zQpx`*zjXQ}N%G5qJbijcWp= z##I2ESAQr1;awl8;fwhaqZbP#1{Jf!O`m}O!4mvelf>Y&;DqByXj)&Z!g;Lf) zENRk0xwJ`3Wq6Zj%Ebm|%8o*|5B47#sQiZsNP_8 zaNdtXzV&35<*EFXEGwX){En{^dSe3)*T{wfoB@#?5ZVNYXp7JWpw|L|eg`5n_7LD8 zLXMDQsIIl-4#^1F??q@JjPovwVSTa(I-oDAgk`Q?H@G=leP5M@fd4#57Sdn@L)}-9 zAxuvuUnY{v)-`(z&EZ-L)|$#?EA+6>Tqg4Y_yIz&|C|Eqnzp z8s*Zc1An6qyuOw-mK!DTu@V5g3z-4z0EDUpg*sB>I#;K?^RLfkYJ2H@@hzH|8XOM; z@2cz|ZSgeq#?1Un-yPeE*lv8UJnM=fdHdd~&`{IJuq8cLhlV!C&qC!J+lI=wb`6zp z>GCF`I?3hh;hg}mxBbE1*4Wx$!>i+J-sY)}q4CZw2v(S5hfuaLLTW#;T|C61WC4DI z7?jsA=>$TflMu364SRXDPmqhN&=7sQ{kmc}-o7_PthXwpNps~$ss2X%Oeze!q$}C3 z+TY=rl2_OT_YMH|Dy5<&xdOiBSO`mbV~_WC`x?IL%lojxCEjojF@Pt6enHP@A2|Pt zsAj0oH&SGu>95dl7ERiR@tD91LSDR*&nzz{2!qzI`-K|PzAYHMK z!S|{pi1x6rd^eHAC$>EBQQ*Bj_!_=_{f)n~eUj)gk@JtK{AX&v8Fvm&5?viIkKl8t zB#_uh!^7X(zQt!6x^L#3Pl(4X&IMm1oNx4=qIs^ZeF8e74R8*)tS_p<7q~q2DB&clOi<@JYrtqR*8M9WWc`+R17WqtD3Jkyr9M-?E zy5tY*&!D>M5JYPmSOd>ve=_*N(_Vk)ZTZ8Yi*O)MyrDig3w3|{N@Dl|*56f_7LQ&8 z=cf_X5K9?Bk=9dR0k6MfmhzZ@^W6Ho`VW$5hJSW~zo{y{-M%i%Gg@ubN?9M4<9Jr^ zOWRBJw{Z4H=tW)ecBRhSP{p(ESKA-UuM7CLE-XvD&;Ag1LnyI6SRk~k2SPtc5mK*X zgA*VQIQv~-n`{jDj(e?zDQ$?l%PM_Pm*3GpxlBeZlgo8J1WROcK4*VMnumtQ^aS(> z4UK*U`Bn?L{F_!>ytJRn-z$^JU)r9y?%VSJhps=^{>J+Wb<95AW7G?H7yEV9`;RKv zf7FHgWA^`{{{CBX{ePw%&3O3#HOGDu_|I%qzsYO#le~YQu5H}br#}=Tc$N%*K|fZq znol4hN%V4p*uyO}^%=z1k6+7B<5n}&XR9C<-=BcERzh_ijwxzlZM0HXt!E4(bex2e z%sJwwB{~Gsr>JS$MEIPRje|4t1f`I-9f0%i;kE{AT+Dx(WL*QSRqNc>bS+f!m&0slvGhrKsM`N2}6T~}Wy9Xy6L!dnYeepzr zBz1<^_yO<_^xdN|eXxz-=QR}6#jyuCW{D5+2YA0^K2*ql4}EZo;;6pt1GdRD`$HTX zgv%rg^2huG{fC@<++#TUX1%!jn(xTjg9MToK}^669PoU2ejd%jrznTN>e zmM-Wg3Gx@I^nHUEH?}nGyc}Pk z{~GaFpG zV|(!U>M~l$?uPN@7LTtrI>nXW{rj5!KjP$r=Lfu>G41m9+XT_3ACHk2aQ+@kC+<1T z7ZIe%1c6TKIDt;`c!5p|;IJAg69l?P=8E-8K|gPD`subs1pL#KTO3U~EK`AUTmio^ z_@2)!DT3b{z#QtL=ADam)FY_&GSE%X&zi-Mt5F*QWr*Xc4UjfbdWZ}$eHbQKKcm4X z76oxk_%~Jf_dIbBXS1zVz-)n7zR8<4bCIThcz@J}V#a(zG4_DbZ-O1&o@U0GQG9#w zVUPsbb3CjmYjJ!yO^pdx{l1H|52eU{vk8RfRMi*;9l_HXm$a>GjrTd5KqIIBN{$C5 z4!$x2{Fm)EU=Jsio`D_2#~0?b(9J=>+fERztOQKRRhVb4@tLK!0JkBQGKC5`Ok_0$YZ0DrX=;c+p9#$sFkcrzY#RP;r6y_W z3-ApKMzX+F$fiR5;R0sKEugH9+f=R31fTi(HbaeFs;P|UIVj_GUW)JaWF)>*gzQlq zHz{DSPgBPfOaY)Fg`*L$XQ*Z%T{FxFLcIU0BXcNm6a+BNZY z{aMkf86y2Vu%`a*Ce%3xcHW=PLiP|%cD4szp}r8S0wk^>s^F~7hHtw5sK_I3SvMJ(*k$AS%&NzlenDI=4?{qZs&8V+N&-0fbq4vZC;=aZ>OTH$Am)Oh++xAE za{`y4VF->UBqSmrNSADa?}7aNUVXo1_ra_I`;F5TEIWu-1iPV^3x2mH#SHcZpRSk0 zupEBR3%S7`{!&w=yO5Vk6CFn3I9?tde2GVgv$f97};1{7vw=p2(ndgMi5}h z2o_i|LSL}s=UXwNfc6yp- Date: Thu, 2 Dec 2010 17:36:52 -0600 Subject: [PATCH 095/482] Added tag fanficdownloader-0.4 for changeset f23fd0e4cbc3 From e674f7c7df534e0b6719564732b1cb683225c370 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Sun, 5 Dec 2010 20:52:13 -0600 Subject: [PATCH 096/482] There was still a 2 second sleep between chapters for the fictonpress adapter.. Keep in mind that if you do a lot of downloading from fanfiction.net or fictionpress.com it is possible that your IP address could be locked without the sleep in place. --- fanficdownloader/fpcom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/fpcom.py b/fanficdownloader/fpcom.py index 9aeef1ab..9fa05c52 100644 --- a/fanficdownloader/fpcom.py +++ b/fanficdownloader/fpcom.py @@ -246,7 +246,7 @@ class FPCom(FanfictionSiteAdapter): return urls def getText(self, url): - time.sleep( 2.0 ) + # time.sleep( 2.0 ) data = '' try: data = self.fetchUrl(url) From 48b882c745f0db6d846384f94f6ce1b36c61c932 Mon Sep 17 00:00:00 2001 From: wsuetholz Date: Sun, 5 Dec 2010 21:21:44 -0600 Subject: [PATCH 097/482] 2 second sleep was in Mediaminer as well. --- fanficdownloader/mediaminer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/mediaminer.py b/fanficdownloader/mediaminer.py index a5929cab..660d7bd0 100644 --- a/fanficdownloader/mediaminer.py +++ b/fanficdownloader/mediaminer.py @@ -284,7 +284,7 @@ class MediaMiner(FanfictionSiteAdapter): return urls def getText(self, url): - time.sleep( 2.0 ) + # time.sleep( 2.0 ) logging.debug('url=%s' % url) data = '' try: From 50b057303374ee3ec38a14ce1b1fcfc7e9b532ea Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Sun, 5 Dec 2010 21:38:16 -0600 Subject: [PATCH 098/482] Add status (In-Progress or Completed) to metadata as a Subject tag. --- fanficdownloader/ffnet.py | 4 +++- fanficdownloader/ficwad.py | 2 +- fanficdownloader/fpcom.py | 3 ++- fanficdownloader/hpfiction.py | 1 + fanficdownloader/mediaminer.py | 1 + fanficdownloader/potionsNsnitches.py | 3 ++- fanficdownloader/readme.txt | 4 ++-- fanficdownloader/twilighted.py | 1 + 8 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fanficdownloader/ffnet.py b/fanficdownloader/ffnet.py index d156f9fa..c4e1145d 100644 --- a/fanficdownloader/ffnet.py +++ b/fanficdownloader/ffnet.py @@ -190,6 +190,7 @@ class FFNet(FanfictionSiteAdapter): self.storyStatus = 'Completed' else: self.storyStatus = 'In-Progress' + s2 = bs.BeautifulStoneSoup(l) self.storyRating = unicode(s2.a.string).strip() logging.debug('self.storyRating=%s' % self.storyRating) @@ -265,7 +266,8 @@ class FFNet(FanfictionSiteAdapter): dateus = self._getVarValue (l) self.storyUpdated = datetime.datetime(*time.strptime ( dateus, "'%m-%d-%y'" )[0:5]) logging.debug('self.storyUpdated=%s' % self.storyUpdated.strftime("%Y-%m-%dT%I:%M:%S")) - + + self.addSubject(self.storyStatus) if len(urls) <= 0: # no chapters found, try url by itself. urls.append((self.url,self.storyName)) diff --git a/fanficdownloader/ficwad.py b/fanficdownloader/ficwad.py index 9cb353ce..aa8db7fa 100644 --- a/fanficdownloader/ficwad.py +++ b/fanficdownloader/ficwad.py @@ -159,7 +159,7 @@ class FicWad(FanfictionSiteAdapter): self.numWords=ss.replace('words','').replace(' ','') logging.debug('self.numWords=%s' % self.numWords) - + self.addSubject(self.storyStatus) logging.debug('Story "%s" by %s' % (self.storyName, self.authorName)) result = [] diff --git a/fanficdownloader/fpcom.py b/fanficdownloader/fpcom.py index 9fa05c52..b7808132 100644 --- a/fanficdownloader/fpcom.py +++ b/fanficdownloader/fpcom.py @@ -242,7 +242,8 @@ class FPCom(FanfictionSiteAdapter): self._processInfoLine (tdas[5]) self.authorURL = 'http://' + self.host + '/u/' + self.authorId - + self.addSubject(self.storyStatus) + return urls def getText(self, url): diff --git a/fanficdownloader/hpfiction.py b/fanficdownloader/hpfiction.py index aeda7d36..9107b705 100644 --- a/fanficdownloader/hpfiction.py +++ b/fanficdownloader/hpfiction.py @@ -227,6 +227,7 @@ class HPFiction(FanfictionSiteAdapter): if title != "Story Index": urls.append((url,title)) + self.addSubject(self.storyStatus) return urls def getText(self, url): diff --git a/fanficdownloader/mediaminer.py b/fanficdownloader/mediaminer.py index 660d7bd0..365131d5 100644 --- a/fanficdownloader/mediaminer.py +++ b/fanficdownloader/mediaminer.py @@ -280,6 +280,7 @@ class MediaMiner(FanfictionSiteAdapter): self.numChapters = unicode(numchapters) logging.debug('self.numChapters=%s' % self.numChapters) #logging.debug('urls=%s' % urls) + self.addSubject(self.storyStatus) return urls diff --git a/fanficdownloader/potionsNsnitches.py b/fanficdownloader/potionsNsnitches.py index 43dc47a7..81cfec40 100644 --- a/fanficdownloader/potionsNsnitches.py +++ b/fanficdownloader/potionsNsnitches.py @@ -313,7 +313,8 @@ class PotionsNSnitches(FanfictionSiteAdapter): if (self.storyName is None or len(self.storyName) == 0) and self.storyId == '0': logging.error('self.storyName is empty!! Exitting!') exit(1) - + + self.addSubject(self.storyStatus) return result def getText(self, url): diff --git a/fanficdownloader/readme.txt b/fanficdownloader/readme.txt index 108eff20..c8b2c8e9 100644 --- a/fanficdownloader/readme.txt +++ b/fanficdownloader/readme.txt @@ -1,10 +1,10 @@ To use, do: -python downloader.py (epub|html) +python downloader.py (epub|html|text|mobi) Eg: python downloader.py http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo epub -This tool uses Python 2.5.2, but should work with newer versions. +This tool uses Python 2.5.2, but should work with newer versions of Python. diff --git a/fanficdownloader/twilighted.py b/fanficdownloader/twilighted.py index f3b72e3c..7d9ca430 100644 --- a/fanficdownloader/twilighted.py +++ b/fanficdownloader/twilighted.py @@ -267,6 +267,7 @@ class Twilighted(FanfictionSiteAdapter): logging.debug('Skipped Label \"%s\" Value \"%s\"' % (strs[ii], strs[ii+1])) ii = ii+2 + self.addSubject(self.storyStatus) return result def getText(self, url): From f7d94105ee70a5e0589ce8a9078360cdcfd0853a Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Sun, 5 Dec 2010 21:40:04 -0600 Subject: [PATCH 099/482] Correct example URL. --- index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.html b/index.html index c084a399..1fc32eb4 100644 --- a/index.html +++ b/index.html @@ -100,7 +100,7 @@
    fanfiction.net
    Use the URL of any story chapter, with or without story title such as
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or -
    http://www.fanfiction.net/s/5192986/5/. +
    http://www.fanfiction.net/s/2345466/3/.
    fictionpress.com
    Use the URL of any story chapter, such as
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or From e05749678eb80c43211b99c34d456159d556c901 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Mon, 6 Dec 2010 15:03:07 -0600 Subject: [PATCH 100/482] Move status->subject into output.py as per Bill's suggestion. --- fanficdownloader/ffnet.py | 1 - fanficdownloader/ficwad.py | 1 - fanficdownloader/fpcom.py | 1 - fanficdownloader/hpfiction.py | 1 - fanficdownloader/mediaminer.py | 1 - fanficdownloader/output.py | 2 ++ fanficdownloader/potionsNsnitches.py | 1 - fanficdownloader/twilighted.py | 1 - 8 files changed, 2 insertions(+), 7 deletions(-) diff --git a/fanficdownloader/ffnet.py b/fanficdownloader/ffnet.py index c4e1145d..43253042 100644 --- a/fanficdownloader/ffnet.py +++ b/fanficdownloader/ffnet.py @@ -267,7 +267,6 @@ class FFNet(FanfictionSiteAdapter): self.storyUpdated = datetime.datetime(*time.strptime ( dateus, "'%m-%d-%y'" )[0:5]) logging.debug('self.storyUpdated=%s' % self.storyUpdated.strftime("%Y-%m-%dT%I:%M:%S")) - self.addSubject(self.storyStatus) if len(urls) <= 0: # no chapters found, try url by itself. urls.append((self.url,self.storyName)) diff --git a/fanficdownloader/ficwad.py b/fanficdownloader/ficwad.py index aa8db7fa..2f8fcdf4 100644 --- a/fanficdownloader/ficwad.py +++ b/fanficdownloader/ficwad.py @@ -159,7 +159,6 @@ class FicWad(FanfictionSiteAdapter): self.numWords=ss.replace('words','').replace(' ','') logging.debug('self.numWords=%s' % self.numWords) - self.addSubject(self.storyStatus) logging.debug('Story "%s" by %s' % (self.storyName, self.authorName)) result = [] diff --git a/fanficdownloader/fpcom.py b/fanficdownloader/fpcom.py index b7808132..7806af5c 100644 --- a/fanficdownloader/fpcom.py +++ b/fanficdownloader/fpcom.py @@ -242,7 +242,6 @@ class FPCom(FanfictionSiteAdapter): self._processInfoLine (tdas[5]) self.authorURL = 'http://' + self.host + '/u/' + self.authorId - self.addSubject(self.storyStatus) return urls diff --git a/fanficdownloader/hpfiction.py b/fanficdownloader/hpfiction.py index 9107b705..aeda7d36 100644 --- a/fanficdownloader/hpfiction.py +++ b/fanficdownloader/hpfiction.py @@ -227,7 +227,6 @@ class HPFiction(FanfictionSiteAdapter): if title != "Story Index": urls.append((url,title)) - self.addSubject(self.storyStatus) return urls def getText(self, url): diff --git a/fanficdownloader/mediaminer.py b/fanficdownloader/mediaminer.py index 365131d5..660d7bd0 100644 --- a/fanficdownloader/mediaminer.py +++ b/fanficdownloader/mediaminer.py @@ -280,7 +280,6 @@ class MediaMiner(FanfictionSiteAdapter): self.numChapters = unicode(numchapters) logging.debug('self.numChapters=%s' % self.numChapters) #logging.debug('urls=%s' % urls) - self.addSubject(self.storyStatus) return urls diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index da7503e1..46b3d7e9 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -382,6 +382,8 @@ class EPubFanficWriter(FanficWriter): # opf = open(opfFilePath, 'w') self._writeFile(opfFilePath, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName, self.adapter.getLanguageId(), published, created, updated, calibre, description)) + if self.adapter.storyStatus != 'Unknown': + self.adapter.addSubject(self.adapter.storyStatus) i = 0 subjs = [] subjs = self.adapter.getSubjects() diff --git a/fanficdownloader/potionsNsnitches.py b/fanficdownloader/potionsNsnitches.py index 81cfec40..81bc0b51 100644 --- a/fanficdownloader/potionsNsnitches.py +++ b/fanficdownloader/potionsNsnitches.py @@ -314,7 +314,6 @@ class PotionsNSnitches(FanfictionSiteAdapter): logging.error('self.storyName is empty!! Exitting!') exit(1) - self.addSubject(self.storyStatus) return result def getText(self, url): diff --git a/fanficdownloader/twilighted.py b/fanficdownloader/twilighted.py index 7d9ca430..f3b72e3c 100644 --- a/fanficdownloader/twilighted.py +++ b/fanficdownloader/twilighted.py @@ -267,7 +267,6 @@ class Twilighted(FanfictionSiteAdapter): logging.debug('Skipped Label \"%s\" Value \"%s\"' % (strs[ii], strs[ii+1])) ii = ii+2 - self.addSubject(self.storyStatus) return result def getText(self, url): From d95c72f4477b44356fa584daf920f569add066ea Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Wed, 8 Dec 2010 17:40:10 -0600 Subject: [PATCH 101/482] Split info line in ffnet by ' - ' instead of just '-' so Sci-Fi doesn't become just 'Sci'. fpcom already does. --- fanficdownloader/ffnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/ffnet.py b/fanficdownloader/ffnet.py index 43253042..8925f517 100644 --- a/fanficdownloader/ffnet.py +++ b/fanficdownloader/ffnet.py @@ -195,7 +195,7 @@ class FFNet(FanfictionSiteAdapter): self.storyRating = unicode(s2.a.string).strip() logging.debug('self.storyRating=%s' % self.storyRating) logging.debug('s2.a=%s' % s2.a) - s3 = l.split('-') + s3 = l.split(' - ') logging.debug('s3=%s' % s3) if len(s3) > 0: if s3[1].find("Reviews: around lots of the meta data values. + + # characters are given 'ln, fn'. Need to parse out + # separately. Of course, I only realized *after* doing this + # that output.py isn't actually doing anything with the + # characters... + for a in contentdiv.findAll('a'): + if a['href'].startswith('browse.php?type=characters'): + name=a.text + if a.text.find(', ') > -1: + names=a.text.split(', ') + names.reverse() + name=' '.join(names) + self.addCharacter(name) + + contentdivstring = contentdiv.__str__('utf8') + labeledlines = contentdivstring.strip().split('') # eats the tags. + metadata = dict() + for labeledline in labeledlines: + labeledline = re.sub(r'<[^>]+>','',labeledline) + (label,sep,value)=labeledline.strip().partition(':') # a bit like split, but splits on first separator. + metadata[label.strip()]=value.strip() + #print label+"->"+value + + self.storyDescription = metadata['Summary'] + self.genre = metadata['Genre'] + for genre in self.genre.split(", "): + self.addSubject(genre) + self.category = metadata['Categories'] + for category in self.category.split(", "): + self.addSubject(category) + if metadata['Completed'] == "No": + self.storyStatus = 'In-Progress' + else: + self.storyStatus = 'Completed' + + self.storyRating = metadata['Rated'] + self.storySeries = metadata['Series'] + self.numChapters = metadata['Chapters'] + self.numWords = metadata['Word count'] + self.storyPublished = datetime.datetime.fromtimestamp(time.mktime(time.strptime(metadata['Published'], "%m/%d/%Y"))) + self.storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(metadata['Updated'], "%m/%d/%Y"))) + + return result + + def getText(self, url): + if url.find('http://') == -1: + url = 'http://' + self.host + '/' + url + + logging.debug('Getting data from: %s' % url) + + data = '' + try: + data = self.opener.open(url).read() + except Exception, e: + data = '' + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") + if data is None: + raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) + + soup = None + try: + # I really wish I knew why adastra needs the selfClosingTags to make
    work, but ficwad doesn't. + soup = bs.BeautifulStoneSoup(data, convertEntities=bs.BeautifulStoneSoup.HTML_ENTITIES, selfClosingTags=('br','hr')) + except: + logging.info("Failed to decode: <%s>" % data) + raise FailedToDownload("Error downloading Chapter: %s! Problem decoding page!" % url) + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return div.__str__('utf8') + + +class Adastrafanfic_UnitTests(unittest.TestCase): + def setUp(self): + logging.basicConfig(level=logging.DEBUG) + pass + + def testGetUrlsWorks(self): + url = 'http://www.adastrafanfic.com/viewstory.php?sid=426' + self.assertEquals(32, len(Adastrafanfic(url).extractIndividualUrls())) + +if __name__ == '__main__': + unittest.main() diff --git a/fanficdownloader/downloader.py b/fanficdownloader/downloader.py index c31f1a4b..a7c8f0c4 100644 --- a/fanficdownloader/downloader.py +++ b/fanficdownloader/downloader.py @@ -29,6 +29,7 @@ import ficwad import fictionalley import hpfiction import twilighted +import adastrafanfic import potionsNsnitches import mediaminer @@ -113,7 +114,7 @@ class FanficLoader: if __name__ == '__main__': - logging.basicConfig(level=logging.DEBUG) + logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") argvlen = len(sys.argv) url = None bookFormat = 'epub' @@ -148,6 +149,8 @@ if __name__ == '__main__': adapter = hpfiction.HPFiction(url) elif url.find('twilighted.net') != -1: adapter = twilighted.Twilighted(url) + elif url.find('adastrafanfic.com') != -1: + adapter = adastrafanfic.Adastrafanfic(url) elif url.find('potionsandsnitches.net') != -1: adapter = potionsNsnitches.PotionsNSnitches(url) elif url.find('mediaminer.org') != -1: diff --git a/index.html b/index.html index 1fc32eb4..337e968c 100644 --- a/index.html +++ b/index.html @@ -122,6 +122,9 @@
    http://www.mediaminer.org/fanfic/view_st.php/166653. Or the story URL for one-shots, such as
    http://www.mediaminer.org/fanfic/view_st.php/167618. +
    adastrafanfic.com +
    Use the URL of the story's chapter list, such as +
    http://www.adastrafanfic.com/viewstory.php?sid=854. diff --git a/main.py b/main.py index e124982b..50ec30e1 100644 --- a/main.py +++ b/main.py @@ -32,6 +32,7 @@ from fanficdownloader.downloader import * from fanficdownloader.ffnet import * from fanficdownloader.output import * from fanficdownloader import twilighted +from fanficdownloader import adastrafanfic from google.appengine.ext import db @@ -192,6 +193,8 @@ class FanfictionDownloader(webapp.RequestHandler): adapter = hpfiction.HPFiction(url) elif url.find('twilighted.net') != -1: adapter = twilighted.Twilighted(url) + elif url.find('adastrafanfic.com') != -1: + adapter = adastrafanfic.Adastrafanfic(url) elif url.find('potionsandsnitches.net') != -1: adapter = potionsNsnitches.PotionsNSnitches(url) elif url.find('mediaminer.org') != -1: From a32ccf7dacef2c7f79c1dc440a08055fdb3ebfe7 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Sun, 19 Dec 2010 12:21:55 -0600 Subject: [PATCH 103/482] Work around an SGMLParser/BeautifulStoneSoup entity bug--incorrectly inserts (;) when it shouldn't, "AT&T" becomes "AT&T;". Also update to latest BeautifulSoup: has one minor, unrelated change. --- fanficdownloader/BeautifulSoup.py | 6 +++--- fanficdownloader/output.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py index 31ff0e5f..4b17b853 100644 --- a/fanficdownloader/BeautifulSoup.py +++ b/fanficdownloader/BeautifulSoup.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Beautiful Soup Elixir and Tonic "The Screen-Scraper's Friend" @@ -81,7 +79,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. from __future__ import generators __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "3.0.8.1" +__version__ = "3.2.0" __copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" __license__ = "New-style BSD" @@ -533,6 +531,8 @@ class Tag(PageElement): self.name = name if attrs is None: attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() self.attrs = attrs self.contents = [] self.setup(parent, previous) diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 46b3d7e9..6cbd4fdc 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -464,6 +464,12 @@ def replaceNumberEntities(data): p = re.compile(r'&#(x?)(\d+);') return p.sub(unirepl, data) +def replaceNotEntities(data): + # not just \w or \S. regexp from c:\Python25\lib\sgmllib.py + # (or equiv), SGMLParser, entityref + p = re.compile(r'&([a-zA-Z][-.a-zA-Z0-9]*);') + return p.sub(r'&\1', data) + def removeEntities(text): # replace numeric versions of [&<>] with named versions. @@ -492,6 +498,15 @@ def removeEntities(text): except UnicodeDecodeError, ex: # for the pound symbol in constants.py text = text.replace(e, v.decode('utf-8')) + + # SGMLParser, and in turn, BeautifulStoneSoup doesn't parse + # entities terribly well and inserts (;) after something that + # it thinks might be an entity. AT&T becomes AT&T; All of my + # attempts to fix this by changing the input to + # BeautifulStoneSoup break something else instead. But at + # this point, there should be *no* real entities left, so find + # these not-entities and removing them here should be safe. + text = replaceNotEntities(text) # < < and & are the only html entities allowed in xhtml, put those back. text = text.replace('&', '&').replace('&lt;', '<').replace('&gt;', '>') From 783a6143450cd62b85bc210a8c519cc0d0d6ee73 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Mon, 27 Dec 2010 11:29:12 -0600 Subject: [PATCH 104/482] Enable collection of user's own user/password again. Only used by twilighted.net. --- css/index.css | 4 ++-- index.html | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/css/index.css b/css/index.css index f4aec452..36c22034 100644 --- a/css/index.css +++ b/css/index.css @@ -35,7 +35,7 @@ h1 } #logpassword, #logpasswordtable { - display: none; +// display: none; } #urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile @@ -68,4 +68,4 @@ div.field { font-size: small; color: #f00; -} \ No newline at end of file +} diff --git a/index.html b/index.html index 337e968c..2bc40d75 100644 --- a/index.html +++ b/index.html @@ -59,9 +59,12 @@

    Login and Password

    - - If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide - your credentials to download it, otherwise just leave it empty + If the story requires a login and + password to download, you may need + to provide your credentials to + download it, otherwise just leave + it empty. Currently only needed + by twilighted.net.
    Login
    From fe557bbce53a2e22d8d769e588c0270121b097dc Mon Sep 17 00:00:00 2001 From: sigizmund Date: Mon, 3 Jan 2011 20:35:15 +0000 Subject: [PATCH 105/482] Fixed downloading mobi from fdown --- main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/main.py b/main.py index 50ec30e1..8858dcc4 100644 --- a/main.py +++ b/main.py @@ -115,6 +115,10 @@ class FileServer(webapp.RequestHandler): elif fanfic.format == 'text': self.response.headers['Content-Type'] = 'text/plain' self.response.headers['Content-disposition'] = 'attachment; filename=' +name + '.txt.zip' + elif fanfic.format == 'mobi': + self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' + self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi' + self.response.out.write(fanfic.blob) From 95e7ba636da809d18c2a5d69fc3fee1883d1403a Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Sat, 15 Jan 2011 10:41:28 -0600 Subject: [PATCH 106/482] Add commas to word counts. --- fanficdownloader/output.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 6cbd4fdc..de002216 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -368,7 +368,7 @@ class EPubFanficWriter(FanficWriter): self._writeFile(titleFilePath, TITLE_ENTRY % ('Packaged:', createda)) tmpstr = self.adapter.getStoryRating() + " / " + self.adapter.getStoryUserRating() self._writeFile(titleFilePath, TITLE_ENTRY % ('Rating Age/User:', tmpstr)) - tmpstr = unicode(self.adapter.getNumChapters()) + " / " + unicode(self.adapter.getNumWords()) + tmpstr = unicode(self.adapter.getNumChapters()) + " / " + commaGroups(unicode(self.adapter.getNumWords())) self._writeFile(titleFilePath, TITLE_ENTRY % ('Chapters/Words:', tmpstr)) self._writeFile(titleFilePath, TITLE_ENTRY % ('Publisher:', self.adapter.getHost())) self._writeFile(titleFilePath, TITLE_ENTRY % ('Story ID:', self.adapter.getStoryId())) @@ -515,3 +515,10 @@ def removeEntities(text): def makeAcceptableFilename(text): return re.sub('[^a-zA-Z0-9_-]+','',removeEntities(text).replace(" ", "_").replace(":","_")) + +def commaGroups(s): + groups = [] + while s and s[-1].isdigit(): + groups.append(s[-3:]) + s = s[:-3] + return s + ','.join(reversed(groups)) From 7064873b4e15d288c847973c0709b17e0959457a Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Sun, 23 Jan 2011 13:37:29 -0600 Subject: [PATCH 107/482] Put title, author and publisher into mobi files properly as meta data. Also strips non-ascii chars from t,a,p--caused problems with mobi gen. Calibre's book reader cannot read the mobi files produced from fanficdownloader, either before or after these changes. --- fanficdownloader/mobi.py | 19 +++++++++++-------- fanficdownloader/output.py | 3 ++- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/fanficdownloader/mobi.py b/fanficdownloader/mobi.py index 4facb556..cd9502e0 100644 --- a/fanficdownloader/mobi.py +++ b/fanficdownloader/mobi.py @@ -47,8 +47,11 @@ class _SubEntry: return self.html.RenameAnchors(self._name + '_') class Converter: - def __init__(self, refresh_url=''): + def __init__(self, refresh_url='', title='Unknown', author='Unknown', publisher='Unknown'): self._header = Header() + self._header.SetTitle(title) + self._header.SetAuthor(author) + self._header.SetPublisher(publisher) self._refresh_url = refresh_url def ConvertString(self, s): @@ -114,9 +117,9 @@ class Converter: html = HtmlProcessor(html_data) data = html.CleanHtml() records = [] - title = html.title - if title: - self._header.SetTitle(title) +# title = html.title +# if title: +# self._header.SetTitle(title) record_id = 1 for start_pos in range(0, len(data), Record.MAX_SIZE): end = min(len(data), start_pos + Record.MAX_SIZE) @@ -190,16 +193,16 @@ class Header: self._first_image_index = 0 def SetAuthor(self, author): - self._author = author + self._author = author.encode('ascii','ignore') def SetTitle(self, title): # TODO(chatham): Reevaluate whether this needs to be ASCII. # maybe just do sys.setdefaultencoding('utf-8')? Problems # appending self._title with other things. - self._title = title.encode('ascii') + self._title = title.encode('ascii','ignore') def SetPublisher(self, publisher): - self._publisher = publisher + self._publisher = publisher.encode('ascii','ignore') def AddRecord(self, data, record_id): self.max_record_size = max(Record.MAX_SIZE, len(data)) @@ -341,4 +344,4 @@ class Header: if __name__ == '__main__': import sys m = Converter() - m.ConvertFiles(sys.argv[1:], '/tmp/test.mobi') \ No newline at end of file + m.ConvertFiles(sys.argv[1:], '/tmp/test.mobi') diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index de002216..63c1168c 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -99,6 +99,7 @@ class MobiWriter(FanficWriter): self.name = makeAcceptableFilename(adapter.getOutputName()) self.fileName = self.basePath + '/' + self.name + self.getFormatExt() self.authorName = removeEntities(adapter.getAuthorName()) + self.publisher = adapter.getPublisher() self.adapter = adapter self.mobi = mobi self.inmemory = inmemory @@ -136,7 +137,7 @@ class MobiWriter(FanficWriter): # f.write(result) # f.close() - c = mobi.Converter() + c = mobi.Converter(title=self.storyTitle, author=self.authorName, publisher=self.publisher) mobidata = c.ConvertString(result) self.output.write(mobidata) From 790aca39dad2caf10838275a58029d38d0039f43 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Thu, 24 Feb 2011 18:02:49 -0600 Subject: [PATCH 108/482] Add epubmerge.py, a standalone CLI program to merge multiple epubs together into one. --- fanficdownloader/epubmerge.py | 293 ++++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 fanficdownloader/epubmerge.py diff --git a/fanficdownloader/epubmerge.py b/fanficdownloader/epubmerge.py new file mode 100644 index 00000000..6d35be6a --- /dev/null +++ b/fanficdownloader/epubmerge.py @@ -0,0 +1,293 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import getopt +import os + +import zlib +import zipfile +from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED +from time import time + +from xml.dom.minidom import parse, parseString, getDOMImplementation + +def usage(): + print "epubmerge 1.0 Merges multiple epub format ebooks together" + print "\nUsage: " + sys.argv[0]+" [options] [ ...]\n" + print " Options:" + print " -h --help" + print " -o --output= Default: merge.epub" + print " -t --title= Default: ' Anthology'" + print " -a --author= Default: " + print " Multiple authors may be given." + +def main(): + try: + opts, args = getopt.getopt(sys.argv[1:], "t:a:o:h", ["title=","author=", "output=","help"]) + except getopt.GetoptError, err: + # print help information and exit: + print str(err) # will print something like "option -a not recognized" + usage() + sys.exit(2) + + if( len(args) < 1 ): + usage() + sys.exit() + + outputopt = "merge.epub" + titleopt = None + authoropts = [] # list of strings + + for o, a in opts: + if o in ("-h", "--help"): + usage() + sys.exit() + elif o in ("-t", "--title"): + titleopt = a + elif o in ("-a", "--author"): + authoropts.append(a) + elif o in ("-o", "--output"): + outputopt = a + else: + assert False, "unhandled option" + + ## Add .epub if not already there. + if( not outputopt.lower().endswith(".epub") ): + outputopt=outputopt+".epub" + + print "output file: "+outputopt + + ## Write mimetype file, must be first and uncompressed. + ## Older versions of python(2.4/5) don't allow you to specify + ## compression by individual file. + ## Overwrite if existing output file. + outputepub = ZipFile(outputopt, "w", compression=ZIP_STORED) + outputepub.debug = 3 + outputepub.writestr("mimetype", "application/epub+zip") + outputepub.close() + + ## Re-open file for content. + outputepub = ZipFile(outputopt, "a", compression=ZIP_DEFLATED) + outputepub.debug = 3 + + ## Create META-INF/container.xml file. The only thing it does is + ## point to content.opf + containerdom = getDOMImplementation().createDocument(None, "container", None) + containertop = containerdom.documentElement + containertop.setAttribute("version","1.0") + containertop.setAttribute("xmlns","urn:oasis:names:tc:opendocument:xmlns:container") + rootfiles = containerdom.createElement("rootfiles") + containertop.appendChild(rootfiles) + rootfiles.appendChild(newTag(containerdom,"rootfile",{"full-path":"content.opf", + "media-type":"application/oebps-package+xml"})) + outputepub.writestr("META-INF/container.xml",containerdom.toprettyxml(indent=' ',encoding='utf-8')) + + ## Process input epubs. + + items = [] # list of (id, href, type) tuples(all strings) -- From .opfs' manifests + items.append(("ncx","toc.ncx","application/x-dtbncx+xml")) ## we'll generate the toc.ncx file, + ## but it needs to be in the items manifest. + itemrefs = [] # list of strings -- idrefs from .opfs' spines + navmaps = [] # list of navMap DOM elements -- TOC data for each from toc.ncx files + + booktitles = [] # list of strings -- Each book's title + allauthors = [] # list of lists of strings -- Each book's list of authors. + + booknum=1 + for filename in args: + print "input file: "+filename + book = "%d" % booknum + + epub = ZipFile(filename, 'r') + + ## Find the .opf file. + container = epub.read("META-INF/container.xml") + containerdom = parseString(container) + rootfilenodelist = containerdom.getElementsByTagName("rootfile") + rootfilename = rootfilenodelist[0].getAttribute("full-path") + + ## Save the path to the .opf file--hrefs inside it are relative to it. + relpath = os.path.dirname(rootfilename) + if( len(relpath) > 0 ): + relpath=relpath+"/" + + metadom = parseString(epub.read(rootfilename)) + + ## Save indiv book title + booktitles.append(metadom.getElementsByTagName("dc:title")[0].firstChild.data) + + ## Save authors. + authors=[] + for creator in metadom.getElementsByTagName("dc:creator"): + if( creator.getAttribute("opf:role") == "aut" ): + authors.append(creator.firstChild.data) + allauthors.append(authors) + + for item in metadom.getElementsByTagName("item"): + if( item.getAttribute("media-type") == "application/x-dtbncx+xml" ): + # TOC file is only one with this type--as far as I know. + # grab the whole navmap, deal with it later. + tocdom = parseString(epub.read(relpath+item.getAttribute("href"))) + + for navpoint in tocdom.getElementsByTagName("navPoint"): + navpoint.setAttribute("id","a"+book+navpoint.getAttribute("id")) + + for content in tocdom.getElementsByTagName("content"): + content.setAttribute("src",book+"/"+relpath+content.getAttribute("src")) + + navmaps.append(tocdom.getElementsByTagName("navMap")[0]) + else: + id="a"+book+item.getAttribute("id") + href=book+"/"+relpath+item.getAttribute("href") + href=href.encode('utf8') + items.append((id,href,item.getAttribute("media-type"))) + outputepub.writestr(href, + epub.read(relpath+item.getAttribute("href"))) + + for itemref in metadom.getElementsByTagName("itemref"): + itemrefs.append("a"+book+itemref.getAttribute("idref")) + + booknum=booknum+1; + + ## create content.opf file. + uniqueid="epubmerge-uid-%d" % time() # real sophisticated uid scheme. + contentdom = getDOMImplementation().createDocument(None, "package", None) + package = contentdom.documentElement + package.setAttribute("version","2.0") + package.setAttribute("xmlns","http://www.idpf.org/2007/opf") + package.setAttribute("unique-identifier","epubmerge-id") + metadata=newTag(contentdom,"metadata", + attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/", + "xmlns:opf":"http://www.idpf.org/2007/opf"}) + package.appendChild(metadata) + metadata.appendChild(newTag(contentdom,"dc:identifier",text=uniqueid,attrs={"id":"epubmerge-id"})) + if( titleopt is None ): + titleopt = booktitles[0]+" Anthology" + metadata.appendChild(newTag(contentdom,"dc:title",text=titleopt)) + + # If cmdline authors, use those instead of those collected from the epubs + # (allauthors kept for TOC & description gen below. + if( len(authoropts) > 1 ): + useauthors=[authoropts] + else: + useauthors=allauthors + + usedauthors=dict() + for authorlist in useauthors: + for author in authorlist: + if( not usedauthors.has_key(author) ): + usedauthors[author]=author + metadata.appendChild(newTag(contentdom,"dc:creator", + attrs={"opf:role":"aut"}, + text=author)) + + metadata.appendChild(newTag(contentdom,"dc:contributor",text="epubmerge",attrs={"opf:role":"bkp"})) + metadata.appendChild(newTag(contentdom,"dc:rights",text="Copyrights as per source stories")) + metadata.appendChild(newTag(contentdom,"dc:language",text="en")) + + # created now, but not filled in until TOC generation to save loops. + description = newTag(contentdom,"dc:description",text="Anthology containing:\n") + metadata.appendChild(description) + + manifest = contentdom.createElement("manifest") + package.appendChild(manifest) + for item in items: + (id,href,type)=item + manifest.appendChild(newTag(contentdom,"item", + attrs={'id':id, + 'href':href, + 'media-type':type})) + + spine = newTag(contentdom,"spine",attrs={"toc":"ncx"}) + package.appendChild(spine) + for itemref in itemrefs: + spine.appendChild(newTag(contentdom,"itemref", + attrs={"idref":itemref, + "linear":"yes"})) + + ## create toc.ncx file + tocncxdom = getDOMImplementation().createDocument(None, "ncx", None) + ncx = tocncxdom.documentElement + ncx.setAttribute("version","2005-1") + ncx.setAttribute("xmlns","http://www.daisy.org/z3986/2005/ncx/") + head = tocncxdom.createElement("head") + ncx.appendChild(head) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:uid", "content":uniqueid})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:depth", "content":"1"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:totalPageCount", "content":"0"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:maxPageNumber", "content":"0"})) + + docTitle = tocncxdom.createElement("docTitle") + docTitle.appendChild(newTag(tocncxdom,"text",text=titleopt)) + ncx.appendChild(docTitle) + + tocnavMap = tocncxdom.createElement("navMap") + ncx.appendChild(tocnavMap) + + ## TOC navPoints can ge nested, but this flattens them for + ## simplicity, plus adds a navPoint for each epub. + booknum=0 + for navmap in navmaps: + navpoints = navmap.getElementsByTagName("navPoint") + ## Copy first navPoint of each epub, give a different id and + ## text: bookname by authorname + newnav = navpoints[0].cloneNode(True) + newnav.setAttribute("id","book"+newnav.getAttribute("id")) + ## For purposes of TOC titling & desc, use first book author + newtext = newTag(tocncxdom,"text",text=booktitles[booknum]+" by "+allauthors[booknum][0]) + description.appendChild(contentdom.createTextNode(booktitles[booknum]+" by "+allauthors[booknum][0]+"\n")) + text = newnav.getElementsByTagName("text")[0] + text.parentNode.replaceChild(newtext,text) + tocnavMap.appendChild(newnav) + + for navpoint in navpoints: + tocnavMap.appendChild(navpoint) + booknum=booknum+1; + + ## Force strict ordering of playOrder + playorder=1 + for navpoint in tocncxdom.getElementsByTagName("navPoint"): + navpoint.setAttribute("playOrder","%d" % playorder) + if( not navpoint.getAttribute("id").startswith("book") ): + playorder = playorder + 1 + + ## content.opf written now due to description being filled in + ## during TOC generation to save loops. + outputepub.writestr("content.opf",contentdom.toxml('utf-8')) + outputepub.writestr("toc.ncx",tocncxdom.toxml('utf-8')) + + outputepub.close() + +## Utility method for creating new tags. +def newTag(dom,name,attrs=None,text=None): + tag = dom.createElement(name) + if( attrs is not None ): + for attr in attrs.keys(): + tag.setAttribute(attr,attrs[attr]) + if( text is not None ): + tag.appendChild(dom.createTextNode(text)) + return tag + +if __name__ == "__main__": + main() From 0e9d992e1310878738c909e6d4f882c95c2b47e7 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Fri, 4 Mar 2011 11:58:53 -0600 Subject: [PATCH 109/482] Make Branch for conversion to Queue Processing. --- app.yaml | 29 + cron.yaml | 4 + css/index.css | 71 + delete_fic.py | 59 + fanficdownloader/BeautifulSoup.py | 2014 ++++++++ fanficdownloader/__init__.py | 1 + fanficdownloader/adapter.py | 229 + fanficdownloader/adastrafanfic.py | 223 + fanficdownloader/books/place holder.txt | 0 fanficdownloader/constants.py | 542 ++ fanficdownloader/downloader.py | 210 + fanficdownloader/epubmerge.py | 293 ++ fanficdownloader/ffnet.py | 359 ++ fanficdownloader/fictionalley.py | 301 ++ fanficdownloader/ficwad.py | 256 + fanficdownloader/fpcom.py | 301 ++ fanficdownloader/hpfiction.py | 280 ++ fanficdownloader/html.py | 121 + fanficdownloader/html2text.py | 452 ++ fanficdownloader/html_constants.py | 19 + fanficdownloader/mediaminer.py | 366 ++ fanficdownloader/mobi.py | 347 ++ fanficdownloader/output.py | 525 ++ fanficdownloader/potionsNsnitches.py | 367 ++ fanficdownloader/readme.txt | 10 + fanficdownloader/twilighted.py | 316 ++ fanficdownloader/twipassword.py | 4 + fanficdownloader/zipdir.py | 177 + ffstorage.py | 21 + index-ajax.html | 109 + index.html | 212 + index.yaml | 22 + js/fdownloader.js | 116 + js/jquery-1.3.2.js | 4376 +++++++++++++++++ main.py | 366 ++ queue.yaml | 5 + recent.html | 69 + simplejson/__init__.py | 318 ++ simplejson/__init__.pyc | Bin 0 -> 12071 bytes simplejson/_speedups.c | 2329 +++++++++ simplejson/decoder.py | 354 ++ simplejson/decoder.pyc | Bin 0 -> 11292 bytes simplejson/encoder.py | 440 ++ simplejson/encoder.pyc | Bin 0 -> 13938 bytes simplejson/scanner.py | 65 + simplejson/scanner.pyc | Bin 0 -> 2340 bytes simplejson/tests/__init__.py | 23 + simplejson/tests/test_check_circular.py | 30 + simplejson/tests/test_decode.py | 22 + simplejson/tests/test_default.py | 9 + simplejson/tests/test_dump.py | 21 + .../tests/test_encode_basestring_ascii.py | 38 + simplejson/tests/test_fail.py | 76 + simplejson/tests/test_float.py | 15 + simplejson/tests/test_indent.py | 41 + simplejson/tests/test_pass1.py | 76 + simplejson/tests/test_pass2.py | 14 + simplejson/tests/test_pass3.py | 20 + simplejson/tests/test_recursion.py | 67 + simplejson/tests/test_scanstring.py | 111 + simplejson/tests/test_separators.py | 42 + simplejson/tests/test_unicode.py | 64 + simplejson/tool.py | 37 + static/ajax-loader.gif | Bin 0 -> 10819 bytes static/favicon.ico | Bin 0 -> 21792 bytes utils/remover.py | 53 + 66 files changed, 17437 insertions(+) create mode 100644 app.yaml create mode 100644 cron.yaml create mode 100644 css/index.css create mode 100644 delete_fic.py create mode 100644 fanficdownloader/BeautifulSoup.py create mode 100644 fanficdownloader/__init__.py create mode 100644 fanficdownloader/adapter.py create mode 100644 fanficdownloader/adastrafanfic.py create mode 100644 fanficdownloader/books/place holder.txt create mode 100644 fanficdownloader/constants.py create mode 100644 fanficdownloader/downloader.py create mode 100644 fanficdownloader/epubmerge.py create mode 100644 fanficdownloader/ffnet.py create mode 100644 fanficdownloader/fictionalley.py create mode 100644 fanficdownloader/ficwad.py create mode 100644 fanficdownloader/fpcom.py create mode 100644 fanficdownloader/hpfiction.py create mode 100644 fanficdownloader/html.py create mode 100644 fanficdownloader/html2text.py create mode 100644 fanficdownloader/html_constants.py create mode 100644 fanficdownloader/mediaminer.py create mode 100644 fanficdownloader/mobi.py create mode 100644 fanficdownloader/output.py create mode 100644 fanficdownloader/potionsNsnitches.py create mode 100644 fanficdownloader/readme.txt create mode 100644 fanficdownloader/twilighted.py create mode 100644 fanficdownloader/twipassword.py create mode 100644 fanficdownloader/zipdir.py create mode 100644 ffstorage.py create mode 100644 index-ajax.html create mode 100644 index.html create mode 100644 index.yaml create mode 100644 js/fdownloader.js create mode 100644 js/jquery-1.3.2.js create mode 100644 main.py create mode 100644 queue.yaml create mode 100644 recent.html create mode 100644 simplejson/__init__.py create mode 100644 simplejson/__init__.pyc create mode 100644 simplejson/_speedups.c create mode 100644 simplejson/decoder.py create mode 100644 simplejson/decoder.pyc create mode 100644 simplejson/encoder.py create mode 100644 simplejson/encoder.pyc create mode 100644 simplejson/scanner.py create mode 100644 simplejson/scanner.pyc create mode 100644 simplejson/tests/__init__.py create mode 100644 simplejson/tests/test_check_circular.py create mode 100644 simplejson/tests/test_decode.py create mode 100644 simplejson/tests/test_default.py create mode 100644 simplejson/tests/test_dump.py create mode 100644 simplejson/tests/test_encode_basestring_ascii.py create mode 100644 simplejson/tests/test_fail.py create mode 100644 simplejson/tests/test_float.py create mode 100644 simplejson/tests/test_indent.py create mode 100644 simplejson/tests/test_pass1.py create mode 100644 simplejson/tests/test_pass2.py create mode 100644 simplejson/tests/test_pass3.py create mode 100644 simplejson/tests/test_recursion.py create mode 100644 simplejson/tests/test_scanstring.py create mode 100644 simplejson/tests/test_separators.py create mode 100644 simplejson/tests/test_unicode.py create mode 100644 simplejson/tool.py create mode 100644 static/ajax-loader.gif create mode 100644 static/favicon.ico create mode 100644 utils/remover.py diff --git a/app.yaml b/app.yaml new file mode 100644 index 00000000..9c55df49 --- /dev/null +++ b/app.yaml @@ -0,0 +1,29 @@ +application: fanfictionloader +version: 2-6-beta +runtime: python +api_version: 1 + +handlers: +- url: /r3m0v3r + script: utils/remover.py + login: admin + +- url: /r3m0v3r + script: main.py + login: admin + +- url: /fdownloadtask + script: main.py + login: admin + +- url: /css + static_dir: css + +- url: /js + static_dir: js + +- url: /static + static_dir: static + +- url: /.* + script: main.py diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 00000000..1d9c70a0 --- /dev/null +++ b/cron.yaml @@ -0,0 +1,4 @@ +cron: +- description: cleanup job + url: /r3m0v3r + schedule: every 3 hours \ No newline at end of file diff --git a/css/index.css b/css/index.css new file mode 100644 index 00000000..36c22034 --- /dev/null +++ b/css/index.css @@ -0,0 +1,71 @@ +body +{ + font: 0.9em "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif; +} + +#main +{ + width: 43%; + margin-left: 23%; + background-color: #dae6ff; + padding: 2em; +} + +#greeting +{ + margin-bottom: 1em; + border-color: #efefef; +} + + + +#logpassword:hover, #logpasswordtable:hover, #urlbox:hover, #typebox:hover, #helpbox:hover, #yourfile:hover +{ + border: thin solid #fffeff; +} + +h1 +{ + text-decoration: none; +} + +#logpasswordtable +{ + padding: 1em; +} + +#logpassword, #logpasswordtable { +// display: none; +} + +#urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile +{ + margin: 1em; + padding: 1em; + border: thin dotted #fffeff; +} + +div.field +{ + margin-bottom: 0.5em; +} + +#submitbtn +{ + padding: 1em; +} + +#typelabel +{ +} + +#typeoptions +{ + margin-top: 0.5em; +} + +#error +{ + font-size: small; + color: #f00; +} diff --git a/delete_fic.py b/delete_fic.py new file mode 100644 index 00000000..73722724 --- /dev/null +++ b/delete_fic.py @@ -0,0 +1,59 @@ +import os +import cgi +import sys +import logging +import traceback +import StringIO + +from google.appengine.api import users +from google.appengine.ext import webapp +from google.appengine.ext.webapp import util + +from fanficdownloader.downaloder import * +from fanficdownloader.ffnet import * +from fanficdownloader.output import * + +from google.appengine.ext import db + +from fanficdownloader.zipdir import * + +from ffstorage import * + +def create_mac(user, fic_id, fic_url): + return str(abs(hash(user)+hash(fic_id)))+str(abs(hash(fic_url))) + +def check_mac(user, fic_id, fic_url, mac): + return (create_mac(user, fic_id, fic_url) == mac) + +def create_mac_for_fic(user, fic_id): + key = db.Key(fic_id) + fanfic = db.get(key) + if fanfic.user != user: + return None + else: + return create_mac(user, key, fanfic.url) + +class DeleteFicHandler(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect('/login') + + fic_id = self.request.get('fic_id') + fic_mac = self.request.get('key_id') + + actual_mac = create_mac_for_fic(user, fic_id) + if actual_mac != fic_mac: + self.response.out.write("Ooops") + else: + key = db.Key(fic_id) + fanfic = db.get(key) + fanfic.delete() + self.redirect('/recent') + + + fics = db.GqlQuery("Select * From DownloadedFanfic WHERE user = :1", user) + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + \ No newline at end of file diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py new file mode 100644 index 00000000..4b17b853 --- /dev/null +++ b/fanficdownloader/BeautifulSoup.py @@ -0,0 +1,2014 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.2.0" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
    (No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

    FooBar *

    * should pop to 'p', not 'b'. +

    FooBar *

    * should pop to 'table', not 'p'. +

    Foo

    Bar *

    * should pop to 'tr', not 'p'. + +

    • *
    • * should pop to 'ul', not the first 'li'. +
  • ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

    tag should implicitly close the previous

    tag. + +

    Para1

    Para2 + should be transformed into: +

    Para1

    Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

    tag should _not_ implicitly close the previous +
    tag. + + Alice said:
    Bob said:
    Blah + should NOT be transformed into: + Alice said:
    Bob said:
    Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
    , + but not close a tag in another table. + +
    BlahBlah + should be transformed into: +
    BlahBlah + but, + Blah
    Blah + should NOT be transformed into + Blah
    Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + + + + + + + + +
    +

    + FanFiction Downloader +

    + + +
    +
    + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
    + +
    + Ebook format   +
    + +
    + +
    + + + +
    + + + +
    +
    + +

    + Login and Password +

    +
    + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
    +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    +
    + + +
    + + +
    + +
    +
    + Few things to know, which will make your life substantially easier: +
      +
    1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
    2. +
    3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
    4. +
    5. Paste a URL of the first chapter of the fanfic, not the index page
    6. +
    7. Fics with a single chapter are not supported (you can just copy and paste it)
    8. +
    9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
    10. +
    11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
    12. +
    13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
    14. +
    15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
    16. +
    17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
    18. +
    + Otherwise, just have fun, and if you want to say thank you — use the email above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..2bc40d75 --- /dev/null +++ b/index.html @@ -0,0 +1,212 @@ + + + + + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + + {{yourfile}} + + + {% if authorized %} +
    +
    +
    +

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier.

    +

    For Amazon Kindle use Mobi output, for Sony Reader, Nook and iPad use ePub

    +

    To support new features, such as including story summaries, + the URL you need to use for some sites has changed. See below for example URLs for each site.

    +

    Or see your personal list of previously downloaded fanfics.

    +
    +
    + {{ error_message }} +
    + +
    + +
    +
    Ebook format
    +
    + EPub + HTML + Plain Text + Mobi (Kindle) +
    +
    + +
    +

    Login and Password

    +
    + If the story requires a login and + password to download, you may need + to provide your credentials to + download it, otherwise just leave + it empty. Currently only needed + by twilighted.net. +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    + +
    + +
    + + {% else %} +
    +
    +

    + This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. +

    +

    Login using Google account

    +
    +
    + {% endif %} + +
    +
    +
    fictionalley.org +
    Use the URL of the story's chapter list, such as +
    http://www.fictionalley.org/authors/drt/DA.html. Or the story text URL for + fictionalley.org one-shots, such as +
    http://www.fictionalley.org/authors/drt/JOTP01a.html. +
    fanfiction.net +
    Use the URL of any story chapter, with or without story title such as +
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
    http://www.fanfiction.net/s/2345466/3/. +
    fictionpress.com +
    Use the URL of any story chapter, such as +
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
    http://www.fictionpress.com/s/2847338/6/. +
    twilighted.net +
    Use the URL of the start of the story, such as +
    http://twilighted.net/viewstory.php?sid=8422. +
    ficwad.com +
    Use the URL of any story chapter, such as +
    http://www.ficwad.com/story/75246. +
    harrypotterfanfiction.com +
    Use the URL of the story's chapter list, such as +
    http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
    potionsandsnitches.net +
    Use the URL of the story's chapter list, such as +
    http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
    mediaminer.org +
    Use the URL of the story's chapter list, such as +
    http://www.mediaminer.org/fanfic/view_st.php/166653. + Or the story URL for one-shots, such as +
    http://www.mediaminer.org/fanfic/view_st.php/167618. +
    adastrafanfic.com +
    Use the URL of the story's chapter list, such as +
    http://www.adastrafanfic.com/viewstory.php?sid=854. +
    + + + A few additional things to know, which will make your life substantially easier: +
      +
    1. + First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
    2. +
    3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
    4. +
    5. + Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, mediaminer.org and twilighted.net. + fanficauthors.net and tthfanfic.org offer native ePub functionality. +
    6. +
    7. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
    8. +
    9. + One-shots, fics with a single chapter, are now supported. +
    10. +
    11. + You can download fanfics and store them for 'later' by just downloading them and visiting recent + downloads section. +
    12. +
    13. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
    14. +
    15. + If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is + too large to save in the database and you need to download it straight away. +
    16. +
    17. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
    18. +
    19. + If you think that something that should work in fact doesn't, drop me a mail + to sigizmund@gmail.com, or, even better, write an email to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
    20. +
    + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + +
    + +
    + + + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..2b67374d --- /dev/null +++ b/index.yaml @@ -0,0 +1,22 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
    " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "
    ", "
    " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + + + + + {{yourfile}} + + +

    +
    + Hi, {{ nickname }}! These fanfics you've downloaded previously. +
    +
    + +
    + {% for fic in fics %} +

    {{ fic.name }} by {{ fic.author }} ({{ fic.format }})
    {{ fic.url }}

    + {% endfor %} +
    + + + + + +
    + + + + diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 00000000..d5b4d399 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/__init__.pyc b/simplejson/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f01003d4f81d37513d0f8a2a5fb857b8448ae2bd GIT binary patch literal 12071 zcmeHNL37+jc5aXoC52lM??!ES`^Va5uF#&l87#d{Ux!3 zn-|3n?q3p7OB|dN$$7DJUN}^KM;t7P z_xSsL5nU6}&*=IaadGDz>Vo#>kD8f3w86!7@%u|+hsD0O&EIev42oGpIC}l95x%f< zrIyx+#l_FX@3E|_XH|W`RXp2m??ckj2g^eIdi&8s>HRu*9&Cq2oR{*^@Rna)x_EB5coSj#}_YN%Byvr%iNvp!DC;7EE8?)~QTmG#@}@@5f9 z6~#tUrBx&Y>YT*;?9r*L2+zFO@cy?gJgjIku=it zI6O$yKw_vWQQDVVC9RKys3XiN4U*(oP6A929~HHpV;JbA9?3{Cv$KQAFtd$ioXRhc z%Q2d-`?tGtSe1<^-3qfw4jm7%gz{J(#^re0_!dvG>H9Gky|5|@m6pkIM~(yC((!&8 zkK!;$OPQ;J^_GT82GMie3ig%mO7&c&EIY&4m5$SWUR##amIR5s*P>;nyd(&aI#(*H zat-xANW(0m4#PmlVLi9Z*vB|lP;7`Fy|K}1N&LHe7p5`Ev!ayKJ)`|5?KCaejG}6i zYj4*bWtrQRFWg~JxEs>L@7E|l%u>~rYyL-Fx!yT>+Tp(LZX2!JXx&EZ_J-WW@7E}& zRg%=LpPoE*o00MYo5q9tX1w+uiP)p=M&`_o*Y~R2y=ra!<}J7G!=?7?JGgs$P20Wi zX!oKWVi{OuduV?H`aS7N4ITCm)Un=tTvW=8`=ZUYGp)JzNi&a8kxk@wiAC>kJ*qdN zE;p^>Ol~%eL#+kpb%I85+Dcc=8GuNYyJ!st{ z8`R+21;w0HWLzV4KEj*5=_9?0)o==6&jfOlQ&B&Q%x(N&GdP9*(Wn zUq*IYzTb{SY6J(`r$~{gBQFZe&IXU>`#zb1j7QS#*Y*9rOZJ0S^Npw>48JN;gr-K) zu8UKi(D6oxT{oTt`>wUMTDt9o`g&0QZTyAZ@)zxyDZmy>Y$yB_iAQM-mn0mQ>nE-; z+j;<_oc=h=4mPLjG|KnZe!2c^x(_z8K#vfXoH>s*e+|(CPC={w2y-hpZEGKgf_kw5 zox10_)Xj{;cKG@|d^=x8d&oUiy-yyN{pvo(2+o9CLPho6daF(~oY~7=H1kQxT{=iU z>DU~}TDwIMYb75a=juUGWQA9#yzsJ){H1IY#!0i%m?)4F+iWmQl#PrKF|T41LD$iD z4Rgbqf+{ID=htPF=mhd|eQ&^Hzw1I*0} zKSO$}^@JhP6u_e~QbSX{Pn zImHbun2iFO;RRRaXyw!L0$N!E4QW|4!u$CA3qJbK_FN%{Tkvir!S~ADQoIrCiB`{sg2xOP$a6!=E7X^)aNnkh(@~yZy zEC1+2@p4(*e|k-vTox}eHl$ppv7N8}UHlFc-}Xou`VUagwsjdw4hAsn0VoymI*xdT zzm?#61{TtB84N}_8hHWR@nGN|7C4YzXE0dN6%3+a+Z@G-T1nyqMGg)2+5rn8hqpC? zfZ6~ch6oyB3^AD$HBlUvxJ%Z7TR|y6_SjB#L0mQp7&_~?TDhn!KM>;Ms%#-y9n0V`o9<)gl;VvG->!xMNIJT&8OrK31S zV#Cg2TAWjiamz+40qitgN!31*BF_~DFV(&(?1|v%1w{dqSBaYNaI{&*QShWDYBHo8 zP`#(KQ5olx6D;f=%%CzsZY1&L=P8CFqGoQeDCqbjBPUCd{(&A4&}6C(nUncYz3~Kf zs%VmFqf?^11hbTeKu(~|n(#F8*cFond2oc2ep3Z-g$;?Aaz(gL;9?SVG_VUddlDT zC1Qh_Wrmp)aRm1QrC3e-_2OtFCJH&hh`f2d0Jyx!q)FkY*)?_G!TB`8{K!R=hk`JsN_&yRZ~ z?rVJX-_cd~s&mD0owjpr;j+_m-pBuC=lbFWvE+obaMD);`~D3DfTHCr1Ka{=8{-G) zFTfAb-wu9&wG6oX4GIlT`wWE!1KvadFwDTde?$Rj%=GyHg%+g0HxK)^8QnFKE$BOM zKp$=c^x@gYB11pGH9$XrK0rT!K7PnSLj&|-On^SdeY2p?0=hrLVzyrabg<4>0G)*n zb3V=Da&urA#E{vOXZ!2KWi~0oF}H~|HLjhaA9BhZU*72mP19r zQt2=t%t?EMM&#qDHV8qBGT71omI$hO zJ7ORE3JXl4EUbtMBAGy7#Xc@KYnjGDW+r$f&zuo%%I2a#Kg_mSYS;u)WQ`D7xsGCO zhW-VQ3JQ`+&JbF}pMc?|D{Fx2jCfER8b3M3F~Gyp02_%<=CFw+g@4kP=<>IQ4XJz3o7aY3LxcX7!n3JEHD|%d%5jwB8HTX zEc__%c=bX-Ozg8} z(jz|C!@(7t?B`QTUZF>?Se+8yyFvHpouVEQtG|dZqv`w?KAI*Wu3u%c*7z#&$?U5Z z;qxdDzQhT5qGj+@ra-i8u`UoT4}yxX`%wrGH@r;hiKV_*U_?O7)#3*3@tNb zToTz8bOtp81#`q5CURyFTLr#`ss&qRCS$8W;!w3{tITX{l}YEHvsIV^+B($yTj4a> zeCv1r15|z9?^4=;PYQDdI_@(^yGhc_xLdeYvR*c?H*>e(A@;08wi%}3LgX;s(C_ky z?0aG6ukpKY=5|>Uxn9oJ{}8v!lIn*G*6p71x-)KbR1tZ z^&43~xy3qq%(yFxO?cO#37Xx8d7p|GoZz-70r3Kfq2ky+mZD@i0R=d2yGy?Of&v{s z%Z4q%GRZibgf<%Uj&qvbODkk)%YuUonw<&(2nDbNW3Ti^Bjxbuc<~wxe4ytKoKIzW zb!@;?=+y)t{+?e^XpE$B&OgH1;o{$F<>>b#t{c)QqhGI_)ldh)!C*f8yxF2D<%(TK z95fr1(KBBHZN%B}NOwc7)DIrIa(ab_6!m+9=3Ny|TbZOd$NE#7YjVhNLh2|~d``(2 z;}kidI5K+*>!61ZjfWsqRnbeW0C>ip|3YD&g8S+COF7<$Uc*!7Q@h6Y3+656+B|Nj zxQZvvh_L?;(VcL{6%4Nb0T-g}Iyao_j^Qbnk))mdJoMf}6MjbD?;|Aj`;2Y+eVhMB znMg;!4+o8F;##<_kZ~_;mDK<*o7*x3R>d^E{VRGF%eaSL37V2UP9XP4Nj;iqufa$j zNvlbN??bUIMMVdWXMnUth%ajb-P5 E0OpUxq5uE@ literal 0 HcmV?d00001 diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 00000000..23b5f4a6 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,2329 @@ +#include "Python.h" +#include "structmember.h" +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#ifndef Py_IS_FINITE +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *skipkeys; + int fast_encode; + int allow_nan; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {NULL} +}; + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +void init_speedups(void); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *const); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()); + return 1; + return 0; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t max_output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + max_output_size = 2 + (input_chars * MAX_EXPANSION); + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + Py_ssize_t new_output_size = output_size * 2; + /* This is an upper bound */ + if (new_output_size > max_output_size) { + new_output_size = max_output_size; + } + /* Make sure that the output size changed before resizing */ + if (new_output_size != output_size) { + output_size = new_output_size; + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (!S_CHAR(c)) { + /* If we have to escape something, scan the string for unicode */ + Py_ssize_t j; + for (j = i; j < input_chars; j++) { + c = (Py_UNICODE)(unsigned char)input_str[j]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + } + break; + } + } + + if (i == input_chars) { + /* Input is already ASCII */ + output_size = 2 + input_chars; + } + else { + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + } + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + output[0] = '"'; + + /* We know that everything up to i is ASCII already */ + chars = i + 1; + memcpy(&output[1], input_str, i); + + for (; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use the Python function simplejson.decoder.errmsg to raise a nice + looking ValueError exception */ + static PyObject *errmsg_fn = NULL; + PyObject *pymsg; + if (errmsg_fn == NULL) { + PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); + if (decoder == NULL) + return; + errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + Py_DECREF(decoder); + if (errmsg_fn == NULL) + return; + } + pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (pymsg) { + PyErr_SetObject(PyExc_ValueError, pymsg); + Py_DECREF(pymsg); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + if (c > 0x7f) { + has_unicode = 1; + } + if (has_unicode) { + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + chunk = PyUnicode_FromUnicode(&buf[end], next - end); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_DECREF(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } + else if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return 0; +} + +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = PyString_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += PyString_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromUnicode(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromString(numstr, NULL); + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_str(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_unicode(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } + else if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + /* PyString_AS_STRING is used on encoding */ + s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == Py_None) { + Py_DECREF(Py_None); + s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + } + else if (PyUnicode_Check(s->encoding)) { + PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); + Py_DECREF(s->encoding); + s->encoding = tmp; + } + if (s->encoding == NULL || !PyString_Check(s->encoding)) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->sort_keys = NULL; + s->skipkeys = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + + PyEncoderObject *s; + PyObject *allow_nan; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, + &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + return -1; + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->sort_keys); + Py_INCREF(s->skipkeys); + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + PyObject *rval; + Py_ssize_t indent_level; + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + rval = PyList_New(0); + if (rval == NULL) + return NULL; + if (encoder_listencode_obj(s, rval, obj, indent_level)) { + Py_DECREF(rval); + return NULL; + } + return rval; +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = PyString_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = PyString_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = PyString_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (i > 0) { + return PyString_FromString("Infinity"); + } + else if (i < 0) { + return PyString_FromString("-Infinity"); + } + else { + return PyString_FromString("NaN"); + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_list_append(PyObject *lst, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = PyList_Append(lst, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + PyObject *newobj; + int rv; + + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr == NULL) + return -1; + return _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + return encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + return encoder_listencode_dict(s, rval, obj, indent_level); + } + else { + PyObject *ident = NULL; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + return -1; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + return -1; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + return -1; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + return -1; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + return -1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + return -1; + } + Py_XDECREF(ident); + } + return rv; + } +} + +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term, rval is a PyList */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *key, *value; + Py_ssize_t pos; + int skipkeys; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = PyString_InternFromString("{"); + close_dict = PyString_InternFromString("}"); + empty_dict = PyString_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return PyList_Append(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (PyList_Append(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + /* TODO: C speedup not implemented for sort_keys */ + + pos = 0; + skipkeys = PyObject_IsTrue(s->skipkeys); + idx = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + PyObject *encoded; + + if (PyString_Check(key) || PyUnicode_Check(key)) { + Py_INCREF(key); + kstr = key; + } + else if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); + if (kstr == NULL) + goto bail; + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + if (kstr == NULL) + goto bail; + } + else if (key == Py_True || key == Py_False || key == Py_None) { + kstr = _encoded_const(key); + if (kstr == NULL) + goto bail; + } + else if (skipkeys) { + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_ValueError, "keys must be a string"); + goto bail; + } + + if (idx) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyList_Append(rval, encoded)) { + Py_DECREF(encoded); + goto bail; + } + Py_DECREF(encoded); + if (PyList_Append(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + idx += 1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term, rval is a PyList */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *s_fast = NULL; + Py_ssize_t num_items; + PyObject **seq_items; + Py_ssize_t i; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = PyString_InternFromString("["); + close_array = PyString_InternFromString("]"); + empty_array = PyString_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + if (s_fast == NULL) + return -1; + num_items = PySequence_Fast_GET_SIZE(s_fast); + if (num_items == 0) { + Py_DECREF(s_fast); + return PyList_Append(rval, empty_array); + } + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + seq_items = PySequence_Fast_ITEMS(s_fast); + if (PyList_Append(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + for (i = 0; i < num_items; i++) { + PyObject *obj = seq_items[i]; + if (i) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_array)) + goto bail; + Py_DECREF(s_fast); + return 0; + +bail: + Py_XDECREF(ident); + Py_DECREF(s_fast); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->sort_keys); + Py_VISIT(s->skipkeys); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->skipkeys); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +void +init_speedups(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); +} diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 00000000..b769ea48 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/decoder.pyc b/simplejson/decoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ae9b3591ee9c6400d5cd09eb0a05999ef680bdc GIT binary patch literal 11292 zcmcIq&2tmkcE2sjACi&37_iM}!weOf0h^grg(*VjJp*RO4@U6XFwFBak=p8(-B{|D zyIWwr$eTbG*{8C4o2;@-Ro*I9S){Vde~_xAD%-5H%x3cYo!i}#{XArtI09eY&vVZ` z=iKvk^}GKnmp=LJZ`T?s`IPbfbNogBXei|K(wGQ9BBIomM%g zsO}crSyW1_%%C!%?rxlw9p;pSt)|pl;4v>toH8a%Vx}{N$}}rnGRq2~eZplA;DT$P?H3UqU?swTMdF{uF3{A%N4Zo6Ljgj~FYVy)e2 zxoy{v?br)^GiaK>SbtJ|;5Gus4NKnl_*5(4wu(=oXxqDPEo#`l?}oa!L_p|>;?VQA z;|Vl}0Epz|(Km*_`<&WE0Xgwd=G0q59RM<#;7(q>%B$d(GWr-RC=>F<|0C)z@@hf% zT|U$|hqb<;j5x2n0)+sRz^%=kIsn|9>Km$$GN&e*c^sK2RAUq6?S~J)xHa@ad#H8m z762->rtLSq#v6BQIr7NLS5e?EYplYhPA{GUj9Rl5*l{BOldw-TgRY2tQ^u4rZQL^E zjRwzEz#k7Ld|F2Z^dz6;R)d&tag+0Ej+E8x%Bi3Pc7fP=<)4!pmtg+Tl6ef04SCh1 z;Bk{`M1d*+w#>$HxqZyN8$s9C)PguU?!L?y(l|jwzB@rA)N6UZ>j%lAmLHrj-6$>G zq7k&tqnglb!6+N0+=9{IG01F6hR|ytl^7%wnM=2d3=!+kmbMrmx%$r;+T(Zvcpu)w#cPpfUu%#VsTeE0br_&zScdU=tb0+O+ zwqv;=7)iKC@}UJxPn?0goo*NcuTM~mCQ+!|8KayZg}k3Z84>rJYq&nX7x!YKk~Mfo zt9CUa>XoJIKU=K)`s(K9=Kif)H#he`Yrb6UQi88SbD@;_rpWZB)R(1yS?c3b-;(+% zsW+uwlDaSTveZMV7o{FYybRsAdGm_~4??0+wTh_y9N6&&&Wlvx24V>=B_(KECBg!Q zdH^wzt2hrS+W+qyg-rwu(s@u+{bCB?f~ZjN6r#o%JqYVKq5le1X(r{!7;2OlVKOq_)Dg+L}Q~!zXDTobN zK?9~>5%&c+Ptby$Jvc=<1Csqy=`m6y_pTU5AHXMww6XVxvRiL5nYM6HRxkWdRRN+$ z@8Ys}7aG>Vf$vh2n2s90sky`LJ>5635^Tj)KDu zsJ0|4q_Et$8~T`Q#$EU$38fNToaEqg1TG(@=3pnGq(B=`6J|nFbO{ayct>mZ+y}aC z&$mh2lz${OTu~6hGC7g6H)Z--LIrq2hfii+1IJ_0yj^jR(wUo@_IFHZK=>*&dtwu^ z@$Pr{BpmXlIY<2~itqo8#rK%~G5QYA-qdTNF1WT0%*~Ds=Z^kHL^o)&>rvCgIf7eE zFU?c`j)GQzzO$-U#LUj28gs)3s}EP6KYmtQt=@m~V6FOmr&K z1z+ZDhe5X!$s8~8#5ZiX(Bs(0_vrr1&>Fok5iqfkkKCSakQAZY3B~xg5I^M6;rX#e zJ1Q&N_I?UTkO$D6P&|iMf#+AmPY_OOl(Z{=eHlh@0r?5fnKY?w>94j7GWD(LvU zf`JqC&lys8dJwn3>uA8N%WPOF2>M&B$I7GY#i#%V0z_SKGiK~6RC`}n{U;Yq&vQOGBg2ROmu*XPY} z`pP(hQ}36_oP~r=cd=E5v0k1IT^z!K)E*||-rs1*FZ|PRRe&{oo-uJ%#0BXsFd|8? z(ZTX5o>sd^;LyND(&Pl*hf&v_4EzLe1L>09y%cZF@&x-ocn^xnVG_SkQa=fzN$O`@ zd`5A_1{BC7K^(>MHW>s4U93jDDF;Cd)9I<#I2#}sEUCU<0mHVAfmBY-S4;! zF^nCg5RjednZDh2WyT#bYJ(YfV6IzUo6bN)8#5RQwq5}W0$nIjD(@RdR^Aw_tWcwi zZnv(pM(ZP5^jX+-EiSf}*hST{9tWK@x*kkISwy$OZqfQnx2)T!TMS(#e1x;)*8+G# zl5Nr$)pmklR~v~pqPuHvBI}R1Oza)40?llcDS6TORPwuSPmBf}q^^6y%ANL??%orj z+Fzj+?W4%6qA{AEfzB;M51+yJENUk8yc#pk7Z9H!Mm2M#T;7;L?~HK`T6zSn1$;2XoBhJrE{ zU{GwdGDRj*w1xs20}WMR(g^k;nVc$Dt)XC1g9L~P{pJwRs8_)p&_dJA0qhPKPRew?=PY1weHrc9v#MaEM7nz1GMM{ zI#pUX$kly$?A5j&Vb*&tHJt%{sTWy$%;Fms*&UZ#U7?s+RTfWJ{E~&mVsI&mkKDS* zdd88vEWSl?!ncrl9^S&0FtmIBfkt!>gYx;Eae)2 zA|KL(AfpE;T)ade+(ijr1WXfI~(x*y^F)w7aeNjt21Q&BLFgZH|5_TlFn} z`bj<_ZT~cweix~f?9(@^NLFV}seA~YBFbwqavxCucH?b_FDXuB*6YX;*XuV>Npr_+ zF+o$WXSwBi-LxZtfs}_AA~7F^Zqx~U#=4j&jKL(W3~!ikb`UBvGZS><$a5q&qi?Bfdh}-}lO{DA$yzLN^s0m-IUPq!zB6*w&i3CZ~ zAX~+ZPdZ|DBPPTGin<$iLKo!qd!PocSG_LOBA>KUh!R!jP;13IAWfREFe{QJAJS%v zOQs24?bwm&2N@i<*@WW?(jv>1sla+Yxw}9!*(jkdx2|UCy9RPKNF%g*X5gbpkH7;V zxhGjMGwQ;rj&KU323OYaudSJHf^hdnhJe-@;AztgK!#p92gIvgK`dL4{-Mqa^RCrT;95r4d< zR5!X&9JDh)j*FL0582P(B*v=ZE^RMSLO|M15XOS*P3!4>v(W;b%pFJ$F3{p3;&f~< zK|gr@c;aZ@WSAT#j#isM14l?Gv}E4PjAtT_k%{!IhZ_tz0nQmLSzUp8uVE92=5Yi8 zVL)>&6^@OU_+AGT4IvFUi%#rP9+{Vxo`tJ)*iez-Tj8!kzZWk`g%!TT9~Un0c|b_hT4S+2k+u;=sH8fo@9b0IDQJ%D`Bh< zK?38Un_zv;Vfw+k6NHnLXzl)!>iV;l>a%rOQLAmcF@pYftp+`xPEM&O0l4*okoZ#& z4M}Sb3qJ(nGg#UyUYCUULsK>B7z5~-Kz@)R34zFMHFb>7V_2IkUb3jOVBCMg5dkw3 zwHn`jYqcu~fg`?Ct4l@XTBjwq${PPQm?2>DfQD9#?4O|mlZUJ&U%6oZU{_I*hfYMY zZ)p<^#;kdD023>t2=uu92$O$m4k;^{)z=Lfr@^E zLcg%-EF1q}A(4zJ_ zD3Ow1@tl3o0Jt#FTir*LVN!MJU&gR6wBOXUy42y~XXqOQi89YZl#~R7j1L7a1WM+g zEXOVK?-%4WQBxz=CI5ZRkRxPFX4EB2{u-6ug#n+z#v~IVTz%nRM!XH?ArpbOZhXK` z$U?#@$y$ zJN9cRI|`HU23D-lpJzf>%;Ujj5?O_+Bu69@S*tMz3w2Aetkv3q(`~Uyajn%r@mfu?p*qik z)PjCmd-3%eemVAZ<^HPZ{5F2vwId>Kam&Feq;?WNBeN*P`Sd8G7e;eMW41CiA-^NE zc8M2fgswdQSv*b5XUf5E(w3OD#jgfM|Hss~Lt`~Ku&#sG$brph3_Xd7Wx)>>;RDL! zqmU$_5!~E_Uf-@-_nxe;ikbTcFQRLY*Q%@XVEEw4vz5n>g;Wn8uY9#`@kenMACP7E zX%NALOSS55qbk2~(yyg|%1Pu#BWQQLmQFdZKeO=AdjIR^)km`1y_Nfq)*r8|e{GTA znpbVszGBfrA?9?zwHhFZCDn$LdM7cYq~iYr|Dtigy{pCROHoL7(J_jbKm5y0> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/encoder.pyc b/simplejson/encoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e59d372a7ab88749c287a7a2a77dfad41ebd616b GIT binary patch literal 13938 zcmcgz%WoVu6UVGPCtt?yCBhuPSIZfHJC2EJw=^C=d zVfU!JMb#3`xaN zKxe3_sjlZ&Rp0mf9@YBQzmHU2|K^)Jb(MZjryf^SIHD|k&>9s%L0O-t?Mu>rKs^|IUi(4QUR%m9E3Z#| zjp-^Xee9RVq3m%$9*5PcQYCLt6}+;#Qt~Ru3^6l|%m_21$c!;Fj?6J;jw5q|nF(Z0 zsw)ph)aF}Ck1B7HW8Qf5ntCv%^oa7ul|H8Q7)!k=W*U~7{2EJo@^9+CKCZk|Lc|HB zC)9(J$~&!|VY4QccSbVkQc^AKj-FBvipqOQ=~EoSdzsZ=EA_>L(@LLF8Wk@q{fg4x zQy5jhsjS#LGY=BjivMeT=BqeFl}sVgQeInjV;v`vAFR~exbFL=!7v)EyKA~;T1D|G6g*tq zym)bLar3Rl;^Oq;@?v8#Sd12vNAVacs}E85X#3$W7VWv;dNdOcnGToOq0x907x28e z7<2kX^Dv$8VcI+_OUltJd}MK!^Hg@xc&W$?`$Rm$iAh2 z%uk(DaOx7i*YSacf#GCtmF18)Ll(Ezkd?^3 zw3kgeR~S3o?IWg$yLEs)r#TysQI*Pt{9L&y*IBcK#OyHG3I`d^C@U!_zbFTbO)sg( zWfh)K77w8>J%k;l0!tB}QidohbWi$~KhWU`9M+UqBwKK{_!^RoEdu-6!u30M=E9)4 z^+GHXbEtOGn7brP;S;~YEPsp)i4;b~aP}fExl1SEa``SY7(|flrfqyWZcBA@g!iCOODgBW zJ`VR)WJ=cjU5L~f%+T6O;rsyTzlqwIt5c;ntP@swJRy8{X^%SV_~Ds+h;O`zL_@(r zvXFvxpq{}-qUfRfb$|8`qBbc1P>W1SJu6e|gZZAJBUF38WT4v7azH)9I77-YycWZ1 zP&YRd9mM`}QyU4pvYI5V%NH&X$Ige*%2hiBGrJK+8610V;u4eFb>O#-&l_M~Xt!Em zwb)KpHCh8MH=y|JaQU&Wqtp#NJK1XK*k0bUJ>78I&E&G_`R3fg&;GJE1^l-&5~vB% z&$gjFd$hNu<(uyv{Om8w#`OGpl6RNPc1m_unvQvp%(>^$+4SRN)=t{c3F#o)wi`w6 zRyJft=L?L9EuXd9flod+!)&LjF&p&7>GT7T*$jh~S-a5;UF^fQ9kkb%b@YE9^Ip`> z7M%{VZG&67%3hq#TW&M%8UW*N+l`*G7ax<*k*(7Fa_!A#r|d<>6t=q;+L|H-USIVg z3L(szdbkq!UurvqU__Zb%-V5#x!!bRBN<{m(7d!=8E0wfd@3W=nWZIrUBeFY>)M(z0X~hZcz?g`sSiE56(GzjPteGLn%$5#Oby5M%ibyA{SF^RtGg4If+UCw9CV zwws<^cd-Q&;<(+?kgU10vBTbJZXG7NPerYf(X41A~2rVt6M^COMB^|rAxp90qn+SA<-0GS%n?8 zlU7?6veI@VH%PPwtB3Y7227%S_6+n5!-AcBI7I7QR|a48RdlZ>Fs^MK*Ikm(L;rpf zIy;%sumOvl^JXKOIeQmUU)A-sTHTN8?WP;`EG4iB(ebQJ^v4hNQ@tH0;d-j_yhB!F zW|UE}Zn8%{cBC6R!tp`Dd2PCUowH2Bd?>g$PesbX5AYlUoCgu29mVu2s~f964rwD5 z!!~5g4eYy5bp&kQ5CMaD@sRkrOFW_M&Vo72@d{;j0_QTe#p&qX4hVL*Q426;_n`*% zygNT@-!b%TnXQ=19>Mdn>U>&b5O}s5%bYm2dxW}Wz3M*ou_MTnS1C7k+|qR)4w69( zut2NngK1{-!Ilj()gg2^4epGv%z7AO1ox@CP>=jpV$+fKGE>ALrZM*-2%p)Fp8bHV zW$fYFpSjJpK__ymA>VA5XuSR63mc}E$#2g1fdFF4{c!%tG&nIg^0CwVcPPdXAQyHH z#`&qy1lScEXQSN|u8H&t3@17B1Q7yJ6o%vT)rjDmP0S#jGsb}DfNamrgdBS|_{Oz8 zcWW*Q=kUY5^7pOD4?J!yXmD{h!CatMa3W6u`Vin14Nwt;lG(&a$*IsL=wY@zRVC}1 z=0Uz}8$6vPTXQWiJ(n#9Nb2)J(&?M@nZYjC$ooQ&lrWZZ(d?mkda(X=l-PmZX!-%I z%j|qbU1JSYO~N3u1u)%uBL}@KpK_q~0Du}B<^tEjxJJ`q8(q6fOswUVvplrU<4iui z*mA<{IHve6=&&{wlv{!U;kVfupK2$Hk_Ey-=C^F=3aa0-f+>G#S(+7CE zzzP5a7^a1A!gH6IY9({pjdmc63W8uJfHLw=vx#FOc%?9%(qoT*o@WlwqzT+VCw`%wFUZfNwz(8gUhKS#gK$?t%2KJ%tZCcd3ik z`6}HxaZxj!D?Vtq0!v2Gq*oUubW3wQmM6l7Adk4SMns&G9Gu;IxilF;Q4ga^s41mW zsL57R6<43Ga;%#Tht8_=5|fviyuyS5PUlr5;(+I(5Ie82g2ADq4GRc;wHno5tp-Oo z4G)|zTb;iVYtkCQU!4XdzeRknvn^MEXq1WBATk5{Z3;iXgB(M)Y!|a-K`Ir6Da|wP{R_k#U&72DfE$O=#z)a62}Q$`&BrKK_y&^+Jz^-jVHKG5)fU; zTEVFj(F1sVZCQwSk_x0l3P6Y;Cj!Q;adaGFDJLnKxNuSVBWY0@{rxj6$?3TiFbKa- z!_#8O?*@8Z8jOkrOGikuO2BsHUV`9DWH1YHj@tAgj?id~yjpLse;ijP=ZRz-RYMQgAi$W0K1Xjgw-SRc!Hrj>nX!tA#)qR#G zd^R{$ev`=AB`RH-XTPeTwiD)oA+=AOI!sz!_E!u!hp3VDtvIZ&>E!%IgdSBHZ00aY z5)L7k@GF-!bI1@(1Y)rr*$tCi#Slr%4HLEtO$bnOnKv&cfQX-wH%gf)6X~GoR0|`2 zCA)xie#i#+9^u8`ASW@x5&aYLa)+uQ2PdpLv4iJID3^5-HLwEwBTIwjVbXw8gP#aH zf(L=sQVi}!@w6!ho~OkS5EKugSOQ?&3G7Mh47d>YTqQA;WP?&WNszk_+eJ#|<(dT{ z0I|w4w?8hUDh~q0+@ZU$xPiQ|tyd7>mtgTzy4K*SG(IH?H23l^+eUp7&^kW2!;KcU z7OWBjR3NQJUKL1#Vb~CJwgyL)d2UpKxaxOsMkWN1ME{H_!Zt!?Kl6Pt+`yW_~2_FXbZEOm0scYc6J*|c>xT7%=|{FwDuSxUsMb@jyK(Q&#% zA^kgi<2N{NrD9bI{n$aK61Ue{vF5PWT9xC#P66jU*#>GY)TE`sC8Yd$%fTx z>!H_f@{y~n)x5A?tC@}9mI$B7O@MPAk4A@bTCtX#cbFqN9X^rVQQYJ^G!$>)8&ie% zzua$`zp{1mQhB0$rq0(s_+>i+Sp2U@cJZ&Ec8hAaq;`kYZok?cP`iU_x2$$6YPV1A zj;P&HwR=qM9#^|3)b51ZJ*jpl)$Wwq#Vag$)x}c3tJLqI3#zQH(a<*^4YQa5F~x8; z=6Khz-D94jo5V3O@BctvPTSaX#=*NuMH%Om+B*d-p^(mtnM#50$Myr)KxRsU309vrkvHJ6IyDolA6_`4>9eD-f7c@VtHpxu;3obia3h2-Ep;t z7q0Twdswlt_@QlZiSjmvqFgvgi$`Kr2-uid3dOq#L!26*hS)t!^#nSL zA=WS%Xm8c&`LXXB5~>1%6x{5QEbL zsvoV4QaXX!&fg?VGMHtBKDa3+H)gRlTU4O1`tU2TMqbNd?KYU;P*@wz*qg8hArZSP zg|*=v*6>Q>eprJ{bzlu_56jKqzzZ;Ln3X8yk0m1>18mI?XAGRmn zxD5UR%QR)^g)(9RND;6>m*9^uBsv&KqiG8d3B^06ctVhl#qfPl+>l}}9H7Z!00@fj z=93hNyD3<4s>!aY@Kze&J_JHP`4<(u1nU>jftR}ky`WsCypb$Y2nZKUpAgHrAG4MS zHNYw%99ilrE(}XHi^V%%tOwq+3Nu*9r<9h3(!4ZXElUOZOj|%CSH)7|8b?Sc=w1}3 z!J@jRNgJ8%Wi__117&$~1V{FF`7dO)x}cl`<#-Tb*D| zq-mejQKa@uod}$WV@J3K(XN(W3jV*!)(uSjCzLG)AQ<*KR0O#(OJ%Q#&NWkyAav9A!X!83`|n^7BP$#5&1twW-nStC7(@AFvmX$9bO# zjdSM|61kNy!DxxuJf8)={q$wa3=3f7qjO*61g@A^*Hv07@jWE{)-WPu6L4RK7;)rp mVGM#ZW|bfaoxh>Ni0Ut#Ee>1#g>t!kdZaM<*Njmf_V?dj3=-%7 literal 0 HcmV?d00001 diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 00000000..adbc6ec9 --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/scanner.pyc b/simplejson/scanner.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30d94445f0a0c941ee46b6c4fa3bd255e662f6ef GIT binary patch literal 2340 zcmb_dUvC>l5T8B&cj6S1v}wr$X-SKd%5~uhP(>h$k&IGTvRgn&D(iB$PA@s%o$fXz z5y=AtVOzZO)Mvf`?|cM43}2x9X3kDsMZDG4&dtuw@3*rvb9;aNTPd%;dewAj{43%4 z6-F|IaEW#x6}c82DcVtVx+v2O9a-dOXeUeR``{L3b&d|p6jnX8C{7O5ZHEFAz? zAg#zNlA9ByB(hAKY@MOa3jk)x&C{>gutGXZ5r}n#b~3zmr&{2M79hUuJZY_%@JI(M ziDL(Wj?3O_{909oRWl3Gw~uspyx7K^k~N5GZKJyJ#ly4RPimh(-*ea3)~b6C_T2kx z8`WLic)nY^|9nHH4ioX1!N_1FzeAi6c|@aBQ8X%x#iCJ>OoqHHjk03N(I_WMo<=26 z3N*4rDbh%ZQle2_lroJnG|16F(ZHfXi3VjFWN83no(4pN0u74Po8grIhTRJ^EFc*c z;%PZ7ix_-l&P?(LET?l!e5UBuxkXYKLsNw@ihfaPVa_aOJ+vrXCN-4f0ET2Qq42{D zU1X^fye7qd8Sz_%UvW&&em#p)*I|i5OL5o+BD{!IPQGzefF z6A7CA^0ai@Er@-d6dG`B1h^A~DXjQEu+jvEl1#%sOJU`!uo>QM_7a9@5dw^|o5F#m zm@p#hbC9APku$HIr9^j}vIAKFOyA@i3eWG3{ zIxkRwH)e>o$Wl5#EAS>B(n%yi{GG-=Cow+6Js|1g*nk3Pu^1&KZ#*i zWPmpauP=+>*!RU|WNPALEz=&74Hp(Y+fOta8&f7~dHkB9=1}c)nG}o)>uL zR9tHw-){+v+GIg47gH8jSD!TEIE+mN(~b$FrqQu&yfBbpT4A6?dCF07DBnmZd1wc5 zcpN1Xg$~@R?9cYZ#9nY9cF#SLkOF;ToELU1A@vPkZeC#YfsTc|7u!zCa}voj)=8Cb zBLVBc30-F7Lqv9*=q|v9*V9?g4{c*6TRYQBb{yNM<4Y0|bc5smJ~m}+xPPb}(|r+! zM`rGl%L#+T*r4ZICZ$guC0}yOcEiBYQ|sw@tMI2}1ET&c(Q#5g@?y{T!9(P#W zX0gKJ9*esW>9>Z5Q%*a@zrvmZWcT(e3tKW%O|TMg;kttU^v-rjDngP~o6NYe`? n)!s$2k|Nk1^+WgA*I)9HlN%6uHH!vYlm;tVdFUTlrBD9@kb}!4 literal 0 HcmV?d00001 diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 00000000..17c97963 --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,23 @@ +import unittest +import doctest + +def additional_tests(): + import simplejson + import simplejson.encoder + import simplejson.decoder + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + +def main(): + suite = additional_tests() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 00000000..af6463d6 --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 00000000..1cd701d4 --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,22 @@ +import decimal +from unittest import TestCase + +import simplejson as json + +class TestDecode(TestCase): + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assert_(isinstance(rval, decimal.Decimal)) + self.assertEquals(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assert_(isinstance(rval, float)) + self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"}) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 00000000..139e42bf --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEquals( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 00000000..4de37cf4 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,21 @@ +from unittest import TestCase +from cStringIO import StringIO + +import simplejson as json + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEquals(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEquals(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEquals(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEquals(json.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 00000000..7128495f --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 00000000..002eea08 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# Fri Dec 30 18:57:26 2005 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '["Illegal backslash escape: \\\'"]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 00000000..1a2b98a2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,15 @@ +import math +from unittest import TestCase + +import simplejson as json + +class TestFloat(TestCase): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: + self.assertEquals(float(json.dumps(num)), num) + self.assertEquals(json.loads(json.dumps(num)), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEquals(json.dumps(num), str(num)) + self.assertEquals(int(json.dumps(num)), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 00000000..66e19b9e --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,41 @@ +from unittest import TestCase + +import simplejson as json +import textwrap + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 00000000..c3d6302d --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E666, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ], + "compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066 + + +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) + try: + json.dumps(res, allow_nan=False) + except ValueError: + pass + else: + self.fail("23456789012E666 should be out of range") diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 00000000..de4ee00b --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 00000000..f591aba9 --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 00000000..97422a66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(x) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 00000000..b08dec71 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,111 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson as json +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, "xx\xff") diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 00000000..8fa0dac6 --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 00000000..6f4384a5 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,64 @@ +from unittest import TestCase + +import simplejson as json + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEquals(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEquals(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEquals(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEquals(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEquals(j, u'"%s"' % (u,)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEquals(j, u'["%s"]' % (u,)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') + self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEquals(json.loads('"' + u + '"'), u) + self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u%04x"' % (i,) + self.assertEquals(json.loads(s), u) + + def test_default_encoding(self): + self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEquals(type(json.loads(u'""')), unicode) + self.assertEquals(type(json.loads(u'"a"')), unicode) + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 00000000..90443317 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,37 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/static/ajax-loader.gif b/static/ajax-loader.gif new file mode 100644 index 0000000000000000000000000000000000000000..f16ebf7cbd4f28620c0daba2f4a36ae0196b3d4c GIT binary patch literal 10819 zcmb`NXHZjX->;L91QJksO9Fy4X^NnNiVC_BuprxlbVKhX^w84?z4u7CGf5evn_!Tr3?d(NECJ2Pu0AJ)uTvu54bx_-a^t*&`j>8i;TfD`Z)060EAmb_Eg z)wf#RL@#+W)ka%x?pW*}*_fIC+&j2FF}pLjxVO0SWp(}A+xH6{y(=4A-?w-72Szu) zd^`O1{b+b%YyaTK$DxhUsqNjpgDLv%hfiy@)VYJh9~E^o`Bf9b$IM!4PoLaT)mD=~ zFUJ4`006?j5qF#|Ok6F@g*p)Y6Z7*nj+PjJ@F5rmKRHY0xC_AA8$W@FCrO7u?zADA-VZ`x?tC|{EeZ$+PS}L}+>0Lu3YulS{ zT4!^6L+^W9RZs87mhra9j_Iz!%2vK2CI%TW6b5HzImN~dVJ!O2!OQxY?dQ)g-}ZOc zHbx@}1$YJ+?Kz}{t&;P~OkgsY^BS?Vr*pM7Zz+VGy zGfn8J#m3_yoX+b#a zDpjI8oCMB@qx33`Xw_VU0cm=BragGsS+$bevLq%hnO&KCcBH=3mDfvuPp-Rdj{6&R zHa8Kz$xJs|?B}2IQQUsp?)jr!!0=#iu`;qPU^}gDYqibg#q;S@SNLF|EZL!#x_2db)8GULs#w(5+~toZv=1YdVas3W&uFU4AzBgx@vEBz#;l z-pu9tZ)?LyrFGoG)7w(&W-(&|Hk`4;RUNspd=I*5q2ess%;1CmgIS<4v~VcVWaPQH zANrnVeu}B%aK0|an0w{9^-)|O9DS)D9?uwbcai9O^ScC@pw(ezOGyY?wF-n!Sm#Ke zZy3)y(6~018z!}0`0yn?_&rY+<>YT(f~^#{5m)wlacRx^dP!x6)JAFbi0ww11o>n& z{9EsPXXod0%vV-{ohI1IX%fgQW4U^o;-lR1D2o$i=iuqZ>VxzPB9Jhr87P6lwqYLL zzAmxh0lsOW7;-|8F98?s9psNE+IvPo(qi06Ws!-wc9~e{KMO$+^=Bn?AdpZ9E6B?F z1A=K_u-m%NuW@h!26eF-W1SeBa_##tKQP?qva-5}9POT49d`BGnqS=b_~meVq;S`! zgb_=kIvvv?GI&6n9GF3ujY-D@NP@D-pFf91Cy^u1oI&?!IJb}(xK)nDnXbl&f}s>; z;#;8si@QnEHk&J3gSNcYVjbRBWRIc( zvKzVb5N~!MFj(-4jL_9=*-X>+JQKFctTLloj)kA> z9G&XPDujY_f!g_$c1eN~62X^~Q5s<^MdWux&z|6ZtNq_x%Nv)pIs% z<@aqc7|CWG#B)lO27spFDxV{ zdYr!DyZ`O|uSUbIfsI>M$}+#ezuJX`W_;eY)V^^4y_63+2J*_W0U% zepJ^$z`V=Il-pRMH-O)ERX<>)`;978*j-52{u7^zX^6D`^EA)}O5Ge(0N%+LbVWZ3 z7{O(%Xc&Ih8OP#|MC*!1{kR+_8YV)k6N#2`rwV(Ym*O{JWIvY#0x~KwnK+*jILrpH zgsEGa3u@KiQ+VRWO+tOg#nRJmpN$nKR4-XirTUoGoH4@A*S|=2kmW7Na^0?<&XIR7 zOh~8kdrdpif%1ohd7+!_eVX}T-u z5(Z#qmf?p!n&=XXnBierie$(vu4-qu#{&x`ScFc29Zax*vO;;4wG^3Ly>pi2dQBUe zbn0P&ztrG*zPEG!kCk45uPjJ2c}{EBa&mFKYwyEbh)Y9wN5z&wFSW8F5PKN_dY3b9 z;`7C$R?X?Lu(C_}IaUiLI}_}`M=#WHpaA=gENGdp>e$%g{UTdV>#4Ev@4p1+=Eh|Y zro1mdCCr%)mL1?cSM=v5Lq9Hsj%wJ?9j;^ro=bC<80f*9MQpkSM;v zL4@!y?`T^T#xa7`-zO=YObN=sCp&rsK!P*uQ*+WYZ3q>q-g%jJaSkZK)98~=ryUBy z2k%0_Pmlq!a)q!m4GqDWdi$e#2Zx7ykyeufGs7^bb-%U6(h%L%cXMWRWg-Z<_=RaD z#O82mXO+I86OmbWX%C*m%p0DiNZOz0kJ1LV0C>PqrE_SsDg%>{>IpYgh~2Rqg2z5A zl6$I}Fc>=JX*Y)`iIH17<*mx=<%c2iX8DHQdzM=1%E1)FU8;FN(#6;jGWN^Mz6`Zg z&<)Aqf{`qahF9kBr`Ecq!X-Ie#WAW3V6YsIbQW-t+~{$LdQAb4@DSW*OFC&20rFy& z2lu6B?cSk=GAiD6KiHjZ_Y_mQlBneTpfipWpo2P4u;}B|Sh(zvl(O1bEWwtawu#vs zDoxa*pm3GTgXO|jcs36RP#Groh?);nb#dXA(O`Z1h~(A`%ekL<(?k`t@zKbxR*vAa ze{vyYUvs+mw~hW<#_t);DWKTB-sc_>zRKTrezuR!80~y^%k=lqqYE7S#8-7r8!^k* zu@c*hu!cZ3ZM2|$xRF&72sAaU>RIVf`2g2N9%~6ACNs@m=ChA#32CY?oRJsOQZWma zc%1tn;!=!N3wm^FY!rGnMkz1+0{;Ffh|Fz;WC!gr9iD1 zMbiLDHYx$kZmpCK5pN38^Ko=!}~{zBHD+P|(rL2v)lL1JiZcvF1V8IYn36ul$2* zPXmQA!6NM!`ns)A&3bQWMdGq~84qeH)gynuzVDvLTL$P*=d`W^bhACum14kqbeDh~ zp_|ifzbq4R#1?r$S+!ur`v(`>1hn)7e!bfAk@|=bIq{*e2U%I=tP_;ClNTT5apGYplxKjaubZEPcYqHo($AKNbHXsNM!Sbn zpuPlK5683&f*%7Glan49m06IMkdz#S^YTh8!&k>)Q5LLLGDaR42)y$IEGJ~K4n6_P zAQODB*FVbU)6jIUwO{nq^e7bh(SKoaEM{_GXoP8VZhQJOYKy0duT_T$K{jZrJ6 zfw0{Fa!$S6Q1uq;)Ke8XJcRIsl`4m)RxJ=gjG{p(&pB06gdp>A&;ucY1u;hA@7ub& z=)8wX@>Vws`(G7+)8uU?3}IYiq!ez$5SjI=<^Du*7r}U{i|A0&xq$EHhYH0#xytqD zzomRr?TY89xoC~YI5bFd*^bjJG7If(LTG|fBTfvJ%7Y=zzH|H1k()r@ou!h> z^c2FBs_5;*M>-Ul@f1okzm#+hBjTc!zu%o>fp8XrgZ zb5XdTUO|K~2l0yP-o+uH3!TxU&-#3XzQP71v?a&Cg6fBP7-@|Vyo7}s6H2Sq;U2fEjE zkDvm0ep{%LSKY)_pu9H|@MIe5iAgYjPT~W35#u0~vye{i^DsWa#1<&+X<|=^;C5lQ zvC7R3vpc9)xY6LLX${jrkI!8K%G_r?vWb#kb?{IwdLZh)@Jt)k!<`T;XvQ36!|x(V zi?#Ip6kOet_Q75*--oAbZC)NW6`Yh@Sd?4ao}73o;S*{qpj zdE1rZ@Zr1()!az|g|P~%@v5CXO^{^wIxodG^0?|%->BW?r@++ABGReRtzteY-tnA{ zqd+k@7@MV1m^)G@97TuKCtcQDW)hApI2*O&|3fM@EF2k?v=R7oyFjHyjE2ZICx8BC z?*5$vD%_%g2AuT}riLuc< z#TDg;X0vJ@E_Km@`d`2|0+puvSj8QxKo3Mhz5?&)bGr2ku-sVsc$fL%W%-zkrwSl< zHeNm$8jpY2H@p-tP+oyKvQkx(#ybswbIwySsZ2k~MU_I6Ni5HdOqI@bAp~`ma%jS* ztH6w0g7yo-tx|--@k4~tnBoOvqrN16SW zgW4D&i&oMCH@wLn4sL>_qU_#o5H|HKzTHwb0SPzB<=lHsSRr_%;GihDqkEyKoD` zNyxD^)TZOfkwRjILv>ZLWU$1oV|{xSc@Yvw_(_QQ_@3CkI0;TezVVX)6oq#;Ew$m0 z=wiI2DSrUtSfM4bC4hdtRR%JpNSTapCqPq(L~#UJK=Y{Zsyo(kuYdH+TQb>O(< zlqymlin{i7`BlJ$PUA-{ov{8{`3uIqUq8R&tlRl^UihlR5|EJ3kg^z+A^@#CYa! zQj!Ntzarvlw}!R_4x<+Q!1HOj#=QEugnA$Yn--geg{2oX17fHD`T@uwL=hO5vemwv zJg>L%GEK5UEeB^VZYHSm&{;AO{9#w|p_Nwa8%ufn0T?e{l0KsD{m8u^$PoMRFuGvg z6TWU7NL!IC%8XH$4Ek7h#N%{^M-Nyur|B`4J52DfD&f4QR9u=b&sS>JR<6HkIi!Kz zi2hR4$qW71Y*l)k+c|f%QPjA1?@azVYkdItQJ>N@S!yDA0?M~)vk=tW7X*wJ+SWaP zFHTE(KDs-)c(nXhvs!U|P<_IALv`qAO7q3$wW8oMO_*o|du}T5T|YvpA{SWFxZTmY z?Jdd7pn=ANeD{7x5FPYjGnwIU(x!T_g81S%{qLIA8R*Bsg3v1$(!^3Qt&p!?N{gm}IRq=E{aKwWGg%&?lZQ5u8=5M0ZLcR} zml9LnJ$$VI$(s4)xf(Xi(pkxBJ^acK&NECv1`US2+@K z!jbFm1H!$Ugs;zF`-Gab#;#&WcaE2pH}SUSP_j411qlldfsuT?ph3a0;kYPrG^<-! zLVyoLN@{$P&*?08+pvf{EB~-!R7O$^-aUfi?Gj?Ij{6TvTu7|te^AoX!-VXP3x?2` zMn?OyLt)*cGvh_E(=#Leb45doOP@k*yW#8ojc!A`i&NQ!$hFVV)IruEVXA~|s)~#K zW|#=0))OED2eeX%PM9Du;|2fYD2#s`MN-vFJYgeoQSM1` z-njF^horN>V0+ImOX?}2_jzu>o{HU1d|g z@gsXTU!(Y5E4HPv-`7phyJOMpBzEqK!^h9ymf{MNLhbC$zx>ZKhm!3~hyhk0k1s}S zdg)~NTVGDrh^g{`rI4mx_sJi!U+o&iL^CVXD&MSOyl-77Z<%x&8f7-PerpP2JlH7v zbpJ+_vn!-^>vm?^^d=kx71ajXz_P@Xyh3Ef0mmd ze3o=dp2bKK#@)s`Qi16&r8%&GD^>`!)PkD}i6r12dWu}LI_n@2;uKNu^1b9`B*AwFL-4nvGGQJ0!6bA#Xq zK^qYUHL0p@cdyhD^vOm_6_EE6p)J0v2^*=DWBG(d11~rQlYT@~FKq~z>q?1KiGkWT z_EXg5!b&dpAX{3*HL7Z5ScC`w#)IyPI1W@)RuEZybk(ZiqV{qoCmM@y4=Z|*?jw6& zXAEEdkgeK=haXram+vgt!jDlU<%oXf= zjd?VxL3|vAs)Qk0@Or6x9Y<{?h(R0+fNcFrwGQF#ku92mTo50adK*xj5Z3TZKq)EY zK2^yuC{jv>?nzt0%}0NftyG?2zoUE5>$rA9$|puKgZgf<2YLVov3$A0VX#>KWBVVD zJazs1{h##nha(mLO+T4zpnt7S2>`XUNnYY0<9(Xq9~MKjgeF7<$9SOvtb;r}VNMCor~N%bb0QN13qnG$C3!*FF;$er z0!O5}1rS1^rm}!Y!W|U4;qUYmtTUqT6 zaah`#bPO5(0u3$PU+F*kzTB*uDjGgpi`^Le=7KK317RoatDME=Z9y39{{aeDGM(5F zAqYGU7qxK~xpx8-v|;e)#Sck*oTWWubg>?@YgQ?nhkGIq(`9`-SnfS;)y_O~Az~5s zY*|a0H7LhiTWEz=pva8sPY%St2SQ~`bwmG@C!F)Nbo@#xs201T)ulbUZP4W8meIzX zR8F>obJYG~=6hrcL2z)6d{AtWhcJmMjBF?wEcQAd3f>LV16tP;71XtSyej&J$4(Mf zc=l}w%dQ+f{( zPSwy0?^F8H6Ee5eE=*xxR~n_f6y_O+UKH%k4Md=|(QroNEIJV>*PmzDZDO4X)q89` z7T()cAci)&+rg5=q9S4FY$qS9l6uq*kx;lP4oHNwe@!5E)^H{d*TF05y$AXVu6<2 zf}g~IJ}?`oRcly2j+Zd2U^f=AFBT}BD6VQeo4OfXmcl4qO!Eq)9{ap6vQ0MX%&=-H z4f!Rp4r6o=XfXl-))vh!6eB01jKwV%8%NbB!eO-o8phH5EREBYE8D5L4aYTO>(rWZ zyUf6=6SR`3m(!zip_})DDzR;F@wq1O26Ed>D#ET6z`=x7f#8-5R-2PALXGls)fxf$ ztrlJY3WK=5OCCq5yO^KVi-&0Ge06oP}- zG~e`&j`7ueaPjq;HdRnRVF}+eEERY`{rQXbF2 zssTI7ci6=Nnu?bOc=Sj9@ z9s{y1Dpm0x((Ve_=f-!=Lc4}7CVV-ZzYCGeEe?cDdad!oRwY%8yc7Xfz$9tEJFo@H zpQV9NV3)$?;CPhy&_w^<;T+26GF>xUZjdG#4(56S4Biu?;zRB9RM;_aTcIVgtr34V z(V)-brMXy40u{8TCubSlYfl9vCQ6bPOs(Kn5^8(#tK#t=%k;u6Bb*8>vAEl7EDXai zb@mAVl!N>fQpD8)pTd`cR=Eh7U=YyFa6HLgmXgBb32c2vD4a4{IaT=a?@p?hqa-~; zU$$8)JxX8<&RSBvoRVk26(F@)l3yiJ0&b<;+l=hgk1DOmXlLYbYinJ9etfyN{+wai z2jA!6`4X2TxuEnPA}Oa78s3HJU(R(V$|Lh?CMGC zsG4<=l)!dze4Vz%ve;wm=p1anG36pPyq+Jz88yliqFE7XkTE6{`ucoKi3KitJigp= z=0@teVqPw2CIj%U`h+KSEC11wKRo&0>g0d5(EHD zUnYI16H34#y~)1*1R^UX%+Ji-BNRr7^&>=`Bp!yKM4S&984(3d4uDzN;qCH$66}Is zdd3DsRE0QsK^V;c!3l_i^>hm61Sm|1-d-d$yBgVj5_6bZQap#pMkcV;(=#n$7}&zt z(p=XGPCj`Jtc`9Au5Rxv?PY@w7eCz&$YG3}y-H*X;s3)Epb4CduP#fSs0NEgsEl5fJ6E7xf#wA@dUw=)&B*)yj_$#CwHVJ#y4QFl!8 z!IB(ny|Gtq^dG?8#5+Pb3Z28Go=Ma6U`$F}St`pA7CyGT@N@svB?=E-<lhO5|E@f1c-g)%4-6as`B$nM3B0$`o#9 zb+F`&Vnn)Q({ktR-3z}2cE4=5a%=Sa8F?)?z%P}0uuS{@+>6Mqh(LKZyVZ@gkMe@P zTvcqr#eDuV6Xvk41Va0!0#!i~O$h+I50y2lIInn~7v@OBx#xfV9KLJ?N^8= zO3Eb##*8pg@t!>BYJUImx^FMgr!0BQl9wOiqZP~tmeZUtt^EAPR+MB*a$2&@l#647 zm6etMdi^-w`K(8phTIhrK5D30>|ExHJ)F*LH{s)GCcloUGk?r`F~>0e}+8&W_xm?DZA%b zl_IKzw|2R%>>e!llzR&Xzx9i_a&XLmgCRZ1C|`d+0x5tL=w?qKhr=T6@x-`De=}%k z7#8N57?K14nQKXb#5 zPjnMOB?9Ci%zQ$BCdKWc9DVvbH8c5qNg%b1FcF z2of7wB}+xM|DUiU^q6qD7DVq$k5Uz#wB_SkoJNlB=r`Ika$cG0DML4TZ%}kAxBAwhl`K(e;;AnS&v+`X@ zdgZn62e02{+TFOabTrw%GV(`LfDG&^m4suntv6y+7hemV3=$h{lt+Q9Reh3pSoM+$ z<=5>HRuCi>3cp}B&0?`*)wj&H6uVuA_73;un>XgqI~bK0hX&;s^z8DPG75CXFZjgy zYUnbu6Yrwo(6iR;!8sSRP)<*XBUu0)4oi0L-$jEjBQtIE9)i4r6;&Da$_sm-X3#P5 zB!noH>N@nMJ`9R=ELe^kD|(H`;nsuk^6p2HiE1ehl+`TU`9zD$K^>CMlM%|nL;^EY zLb46)sfwtNfZT4*<+Fzxc}0EE`nIluet|QEpQo32aJl7hyYSi$HEGv4UkPAzeKXC@ zfQQ{6ue>CHr})+TS2P_`>$wf-10^G#G<$^B*#~F5=j1-v#5l53iNE7nvvm)zSL3YcQq`!7K5@OdZe^d>PEMBXgo8Sj2hPG z6GL8=rq~emG*JoanRhRBKVUEv8@f^wU_LL_7_0j literal 0 HcmV?d00001 diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..ad4ca66a17637746a5c33e5a1cfc46e35754fac8 GIT binary patch literal 21792 zcmeHv2UwKH^Z&l}`q2ag8^(eim7pLN>;*+)iBgRs8VhO=>2e36#%@FrrA3LTNz^C? zMVis5!2-qxh=LspNE9QSl>5!zduQcPN%`ja|Jlb6yKmXq*?DJoXJ_X`h(HX&i9lw^ zR0pBYq1?SYcl~91gciU(Gc)eGgC0V8RtPn1%3U88z{|rC($!VJPa|~L5up{14#Oov zQz1N+j2tm!9Da(yXCZU--ZcXu>CKTt22SuNN@KlNyG}GYm1E;K!?)^yTPKTIXI70F z^-UgWI-;%qFFH>@xo^Dm*I#wEMnyGESP^15Y*rhCJl^uU$&5n}va?G{O1zYr(Iwf}wglz$%P9+ZT$XV*-Ln6V zJ59|HF@_vyId}UqME4|UyB2nKQHa!a=)Y|+A{;4Q-S%=69x?X5t{u9B=x~7k&z{q>evb`!UXIQN-_%iw8 z9Odkvo%bbmujyp)N5?-motgMJGPpe8uIrlgq?i(GbmX$4m$%Cr;^1(DIYSWfqh93> zqtux>eqIK)hc2~UoL(I5u>0_$!gZ52Jesv|?!6IL61Et08`Cmr$4uM1?6CAhi7{To z$0ZabPS1(!*NZoJ3*SuM)6*lvqOj_Rn1UxufAK3lYMI?4bN-=)MFE)$bDv!ZanBkn zJUXU2Cxt4ru6S6G-EXE>TF)Yf{xPl>oJ+i(4+^%v=q>%iIwr{ZbfToyk=emDEsE~X zO7tAv*G*O%y23!`n%|s7cMK*iz4ap7>4;Z)`N4o6;+1J*g7Z#1$n^+(eEE|9ow6Q< z%CaU?nlIY>lLsjk^!(9sx*cja`qJ66`?CyczbL%up8rK^aiQXQO8A5P6@wM@?IAt} z%Lo5n=v+W=vrd^8?;GHxo9ulm&3JtUJGVks<50Y>N^YN9`t^gv;U2fmb55R2PubJ= z`mAQR){N7$Lpw&D%F6bi)U5Nds$P2!&dJKx3o86-zRtES^Ib-YTy7Sm>`VQ0k#!1H zn&q(gme;MTeTuX5OKO(gNRK`caVnzQm}1{9kRY)=GYq(zV-~> z;YNFw#LSMLar>8<<~v(mO}bDyywm3EpLNVGxV$I!(6r#2p(n2$ij2xGSn2LkR$~(# zn|Wuu?%uR(%bOhvU+~L!vDLOyPu2QP`|5b<&{CecYt8+O`=2~&clFjDePFxYs9`^J zUgKZ6WA}@qj9k;QiXN3uO3&yW4PVGpese26wVQb>UY_HcdC#6YUi0_rlu`MOWAKct zzKYVL1K6^awksd5D+xYo$r~SdxqWPWf=x5!s1b8DQ(+s#U-h@Nlp{eZNf zy&-2ty~s{G-uk@NgX0fad#4d*C+?V@J3|;}6h4~ld?o8}+O4bMvF4&kS4X4J8pH9~ zEfiiJ&mS4*pNiZZerxO87Hj)wpW=c|lnx{TGfAL{*xYoo^Nz*?sO( zof*4#7g^oQn6=N!Gh&LR;5(O&UvctqaMt<$D&U};%DXk?A%%>v6eDEfZ4HEwNv`8}d1=5T>S`Pdn=PIm!R_ z$1VFqPWY7Uxos({nU(4GE`^mYqr6Xn&^j z!BU~`9wxe*j-qqs!{p?gH9?uB3y1z0CrU5cVnLiut@X{H_T&qX=X*W6wV3U2E_0m0 z{jJZ&Rm5dW!tFA`d(S@?G;8i=okQ0hCu9UJ>d=2yMf2VVhWr?HtTr!i&H$;mU(5KS z&i8jXCAI(?tNZjDo~|*A{VEyT;L5xQFCr(+Nd4hUpI@uiD3%XQW-@~|Tg)qXc-*Mq zLhln(%Z!JQ3oOn!d2Le3h0O8ugnir!(lU~1-J@5ZTAa9GA-=gXr|PQ3*xW72D5l`c z0g~jD88cCdbRCTMWQ$iF8XsI9QnhkH)o1H>=DmntJ6+*E9Dhj^v~g}(e`Vav zIltY0zOwW{LB#V%R>iqq<&O_snXsx=d49SgbGqO2(FHld!&9Hktcfa5$STZlR@^e+ zyB>bI_8n_m7u7}=Ou5G1zS~xgMn!FQKKJ=PLfoc}$L)m63$NNZOue0G;h|GHDR$dm+;%%GkOve&$!h`kTr16TKXlBO~Yjt>?@3Y@1be&q5IsyzkE! zPg{8{{N_;8#a>qIqIn*QbkEcCV9M(~C`~6cDM}uD$y1K#31CxSK5NjMUX93a#b#!s z5y3Z)Ki^u8b`gBOfgL)QW$wc_L$eWijMkMecq(W?^5v#-d3S;~iwPdI9ICO6)fz?e zAJmc?%J9^Atxc-V1jC*ejU4JQWbdFa!a6`1HrMD20N!zHOG>2@HKI`ljXKb%1OE#h zP$(4Wui%A3K>{cZfEK~w1*K4Y6i9d}zqTe3Q9czOlVrmE&@Kh-fR9uWK%oFA>UQfxr(&Fp|qd zNx3|X=J=Bnb@8cfUHgFhBz~tQLZJf@Iz9lQj6Mkc0=GILq(2NHm*WUIWpKGOlqVn4 zj!8!mnwWx+!)}CZ*MZ;K73i1(a3TPl>sxjC<{y@2QA5CESrXTqpwnbexM53Wm|-(z zph0V;xq!7{`9s|GB|DTllJAuIlATIjNg@E}^%{K!wUiX zO_hM%P^}#sl>#6@RX(|Za{>{062pt3H&YomZaf-6kn3y zD=|_olo-62CvK`RqrhuV+B%8ZNF|@$SSbX)SSBEcriKO3)aWHNHA+TP_MrgzD>$xX zDAzb1voM*@tiC5;p&#t}XHd5mml@hA8dQi6B7<{}zb9$k zhjaWgU&^Ds(gkcZ^kE&y0rZG+*~;~hd*|QdAIl#nN~6Efm->>r4}@tq;QdblyRk~h zZmr=nVRs1Bv?Is2HeIZb?yF}XCx7P{t|q0K$MM{S`=AEa0}WL7;1SnXmiG7o>p#q8 z&+#{)i6M(PS}Z=bY;gLE{A0egCG|7aIH5?Q<8U?qFc&A(GSswPoSwb>mfGmld;cHg zU)?U|A18q>pIP?|<{?-=wkUZ_@MVMy+i}lc8}l%rpaKJ+KLjvYu=ssE|9`;z^S1uf zCx4E$mw!o`r52u2t#7J6tg9f%_Jg^{G^`1KEe++1qyItvp`QfFQcls_T)Gx?C?3{= zo|8DPS)oP`j7ZwDFDqQpn-v7wutK>tD_jf+(SpVBj;uE;40IFep7-I_ud2D+`c>Eb z-%$Chz7wp|VFES|#_;MgA-l7dBKyu%^Dn|WfXm%k!Q3z%W91^Lp)yEnr1S&$zXt*E ze38TuYzLM7Q1kcm&y4|^8tN2&gA=0}tU8I+*4e&e< zaCx4%$1B_7&OZ)tW=YFsy zo@2X|B)d;g^Z#-U#(G2(gb*NjA1HW-0V9zl2-co2Nz}-{9{aNa@_)I0>G9dMU;}M} zy}+g#0lVr6Nw(?7@vpJtK3@Ksd3VGs`@cT>`6Kw(*u*OPfh5U}BRJWE9nFiag+0y* z2-Y&@I9Bx$S0L4(Ab&5Qu4=8uG+_Vo-G|GcR?XFTO?qXoY3#Pwb4;1D^%#1(%6=2D zYs!VZ1hAneZNg7MUH;PA3;l$D2z{XZKgNCrx_F;}h1$Xr@1X^-9|jv5_F)^IkVH2J z{7l0?2!g`8{4v1ege3?Gi~}C%|IYWz-!+btH;ebMU^-#hV*61ATn6C%Er!O%=IgNZ z{7h_na^p5m$4eLHoV>hh~w1fl;4 z-fQtyd#?zEkllo3|0hXw8o||h#XrbVSD&Q18hjkOG$E;OP_MHCU%E%bzxMeb%OCFz z1rTG2=WHxPo>&dI7U&R!PBW4)?M{=nU-D?5OqDN%-3WW%h)N+ZPDxV^Uvs>_w3$^M zZb!4@W{3^Wd43$u;yTmC2B#N@o0Q@8*GFPd{VxAlcALR}5|YPI6C?0Go}ng3!QOl; z_DQG#W%unY?Ue%i!yMpOHOC8h>tW6WpAg>n(r()k;zQpx`*zjXQ}N%G5qJbijcWp= z##I2ESAQr1;awl8;fwhaqZbP#1{Jf!O`m}O!4mvelf>Y&;DqByXj)&Z!g;Lf) zENRk0xwJ`3Wq6Zj%Ebm|%8o*|5B47#sQiZsNP_8 zaNdtXzV&35<*EFXEGwX){En{^dSe3)*T{wfoB@#?5ZVNYXp7JWpw|L|eg`5n_7LD8 zLXMDQsIIl-4#^1F??q@JjPovwVSTa(I-oDAgk`Q?H@G=leP5M@fd4#57Sdn@L)}-9 zAxuvuUnY{v)-`(z&EZ-L)|$#?EA+6>Tqg4Y_yIz&|C|Eqnzp z8s*Zc1An6qyuOw-mK!DTu@V5g3z-4z0EDUpg*sB>I#;K?^RLfkYJ2H@@hzH|8XOM; z@2cz|ZSgeq#?1Un-yPeE*lv8UJnM=fdHdd~&`{IJuq8cLhlV!C&qC!J+lI=wb`6zp z>GCF`I?3hh;hg}mxBbE1*4Wx$!>i+J-sY)}q4CZw2v(S5hfuaLLTW#;T|C61WC4DI z7?jsA=>$TflMu364SRXDPmqhN&=7sQ{kmc}-o7_PthXwpNps~$ss2X%Oeze!q$}C3 z+TY=rl2_OT_YMH|Dy5<&xdOiBSO`mbV~_WC`x?IL%lojxCEjojF@Pt6enHP@A2|Pt zsAj0oH&SGu>95dl7ERiR@tD91LSDR*&nzz{2!qzI`-K|PzAYHMK z!S|{pi1x6rd^eHAC$>EBQQ*Bj_!_=_{f)n~eUj)gk@JtK{AX&v8Fvm&5?viIkKl8t zB#_uh!^7X(zQt!6x^L#3Pl(4X&IMm1oNx4=qIs^ZeF8e74R8*)tS_p<7q~q2DB&clOi<@JYrtqR*8M9WWc`+R17WqtD3Jkyr9M-?E zy5tY*&!D>M5JYPmSOd>ve=_*N(_Vk)ZTZ8Yi*O)MyrDig3w3|{N@Dl|*56f_7LQ&8 z=cf_X5K9?Bk=9dR0k6MfmhzZ@^W6Ho`VW$5hJSW~zo{y{-M%i%Gg@ubN?9M4<9Jr^ zOWRBJw{Z4H=tW)ecBRhSP{p(ESKA-UuM7CLE-XvD&;Ag1LnyI6SRk~k2SPtc5mK*X zgA*VQIQv~-n`{jDj(e?zDQ$?l%PM_Pm*3GpxlBeZlgo8J1WROcK4*VMnumtQ^aS(> z4UK*U`Bn?L{F_!>ytJRn-z$^JU)r9y?%VSJhps=^{>J+Wb<95AW7G?H7yEV9`;RKv zf7FHgWA^`{{{CBX{ePw%&3O3#HOGDu_|I%qzsYO#le~YQu5H}br#}=Tc$N%*K|fZq znol4hN%V4p*uyO}^%=z1k6+7B<5n}&XR9C<-=BcERzh_ijwxzlZM0HXt!E4(bex2e z%sJwwB{~Gsr>JS$MEIPRje|4t1f`I-9f0%i;kE{AT+Dx(WL*QSRqNc>bS+f!m&0slvGhrKsM`N2}6T~}Wy9Xy6L!dnYeepzr zBz1<^_yO<_^xdN|eXxz-=QR}6#jyuCW{D5+2YA0^K2*ql4}EZo;;6pt1GdRD`$HTX zgv%rg^2huG{fC@<++#TUX1%!jn(xTjg9MToK}^669PoU2ejd%jrznTN>e zmM-Wg3Gx@I^nHUEH?}nGyc}Pk z{~GaFpG zV|(!U>M~l$?uPN@7LTtrI>nXW{rj5!KjP$r=Lfu>G41m9+XT_3ACHk2aQ+@kC+<1T z7ZIe%1c6TKIDt;`c!5p|;IJAg69l?P=8E-8K|gPD`subs1pL#KTO3U~EK`AUTmio^ z_@2)!DT3b{z#QtL=ADam)FY_&GSE%X&zi-Mt5F*QWr*Xc4UjfbdWZ}$eHbQKKcm4X z76oxk_%~Jf_dIbBXS1zVz-)n7zR8<4bCIThcz@J}V#a(zG4_DbZ-O1&o@U0GQG9#w zVUPsbb3CjmYjJ!yO^pdx{l1H|52eU{vk8RfRMi*;9l_HXm$a>GjrTd5KqIIBN{$C5 z4!$x2{Fm)EU=Jsio`D_2#~0?b(9J=>+fERztOQKRRhVb4@tLK!0JkBQGKC5`Ok_0$YZ0DrX=;c+p9#$sFkcrzY#RP;r6y_W z3-ApKMzX+F$fiR5;R0sKEugH9+f=R31fTi(HbaeFs;P|UIVj_GUW)JaWF)>*gzQlq zHz{DSPgBPfOaY)Fg`*L$XQ*Z%T{FxFLcIU0BXcNm6a+BNZY z{aMkf86y2Vu%`a*Ce%3xcHW=PLiP|%cD4szp}r8S0wk^>s^F~7hHtw5sK_I3SvMJ(*k$AS%&NzlenDI=4?{qZs&8V+N&-0fbq4vZC;=aZ>OTH$Am)Oh++xAE za{`y4VF->UBqSmrNSADa?}7aNUVXo1_ra_I`;F5TEIWu-1iPV^3x2mH#SHcZpRSk0 zupEBR3%S7`{!&w=yO5Vk6CFn3I9?tde2GVgv$f97};1{7vw=p2(ndgMi5}h z2o_i|LSL}s=UXwNfc6yp- Date: Thu, 10 Mar 2011 13:00:11 -0600 Subject: [PATCH 110/482] First version with background queue processing and support for stories >1M. --- ffstorage.py | 22 ++++++- index.yaml | 11 ++++ main.py | 152 ++++++++++++++++++++++++++--------------------- queue.yaml | 4 +- recent.html | 17 +++++- status.html | 86 +++++++++++++++++++++++++++ utils/remover.py | 19 +++--- 7 files changed, 228 insertions(+), 83 deletions(-) create mode 100644 status.html diff --git a/ffstorage.py b/ffstorage.py index 8b53013c..df1e6ff9 100644 --- a/ffstorage.py +++ b/ffstorage.py @@ -4,8 +4,8 @@ class OneDownload(db.Model): user = db.UserProperty() url = db.StringProperty() format = db.StringProperty() - #login = db.StringProperty() - #password = db.StringProperty() + login = db.StringProperty() + password = db.StringProperty() failure = db.StringProperty() date = db.DateTimeProperty(auto_now_add=True) @@ -19,3 +19,21 @@ class DownloadedFanfic(db.Model): blob = db.BlobProperty() mac = db.StringProperty() cleared = db.BooleanProperty(default=False) + +class DownloadMeta(db.Model): + user = db.UserProperty() + url = db.StringProperty() + name = db.StringProperty() + title = db.StringProperty() + author = db.StringProperty() + format = db.StringProperty() + failure = db.StringProperty() + completed = db.BooleanProperty(default=False) + date = db.DateTimeProperty(auto_now_add=True) + # data_chunks is implicit from DownloadData def. + +class DownloadData(db.Model): + download = db.ReferenceProperty(DownloadMeta, + collection_name='data_chunks') + blob = db.BlobProperty() + index = db.IntegerProperty() diff --git a/index.yaml b/index.yaml index 2b67374d..16bcaefe 100644 --- a/index.yaml +++ b/index.yaml @@ -10,6 +10,17 @@ indexes: # automatically uploaded to the admin console when you next deploy # your application using appcfg.py. +- kind: DownloadData + properties: + - name: download + - name: index + +- kind: DownloadMeta + properties: + - name: user + - name: date + direction: desc + - kind: DownloadedFanfic properties: - name: cleared diff --git a/main.py b/main.py index 8559ba38..85d5f937 100644 --- a/main.py +++ b/main.py @@ -41,8 +41,6 @@ from fanficdownloader.zipdir import * from ffstorage import * - - class LoginRequired(webapp.RequestHandler): def get(self): user = users.get_current_user() @@ -100,6 +98,8 @@ class FileServer(webapp.RequestHandler): key = db.Key(fileId) fanfic = db.get(key) + + # check for completed & failure. name = fanfic.name.encode('utf-8') @@ -119,20 +119,40 @@ class FileServer(webapp.RequestHandler): elif fanfic.format == 'mobi': self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi' - - - self.response.out.write(fanfic.blob) + data = DownloadData.all().filter("download =", fanfic).order("index") + for datum in data: + self.response.out.write(datum.blob) + +class FileStatusServer(webapp.RequestHandler): + def get(self): + logging.info("Status id: %s" % id) + user = users.get_current_user() + if not user: + self.redirect('/login') + + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + + key = db.Key(fileId) + fic = db.get(key) + + logging.info("Status url: %s" % fic.url) + + template_values = dict(fic = fic, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + class RecentFilesServer(webapp.RequestHandler): def get(self): user = users.get_current_user() if not user: self.redirect('/login') -# fics = db.GqlQuery("Select * From DownloadedFanfic WHERE user = :1 and cleared = :2", user) - q = DownloadedFanfic.all() - q.filter('user =', user) - q.filter('cleared =', False) + q = DownloadMeta.all() + q.filter('user =', user).order('-date') fics = q.fetch(100) template_values = dict(fics = fics, nickname = user.nickname()) @@ -164,8 +184,24 @@ class FanfictionDownloader(webapp.RequestHandler): login = self.request.get('login') password = self.request.get('password') - logging.info("Downloading: " + url) + logging.info("Queuing Download: " + url) + # use existing record if available. + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is None or len(q) < 1 ): + download = DownloadMeta() + else: + download = q[0] + download.completed=False + for c in download.data_chunks: + c.delete() + + download.user = user + download.url = url + download.format = format + download.put() + + taskqueue.add(url='/fdowntask', queue_name="download", params={'format':format, @@ -174,7 +210,9 @@ class FanfictionDownloader(webapp.RequestHandler): 'password':password, 'user':user.email()}) - self.redirect('/?error=custom&url=' + urlEscape(url) + '&errtext=Check recent in a bit for the download.' ) + logging.info("enqueued download key: " + str(download.key())) + self.redirect('/status?id='+str(download.key())) + return @@ -191,25 +229,32 @@ class FanfictionDownloaderTask(webapp.RequestHandler): def post(self): logging.getLogger().setLevel(logging.DEBUG) - format = self.request.get('format') url = self.request.get('url') login = self.request.get('login') password = self.request.get('password') # User object can't pass, just email address - user = user = users.User(self.request.get('user')) + user = users.User(self.request.get('user')) logging.info("Downloading: " + url + " for user: "+user.nickname()) adapter = None writerClass = None - download = OneDownload() + # use existing record if available. + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is None or len(q) < 1 ): + download = DownloadMeta() + else: + download = q[0] + download.completed=False + for c in download.data_chunks: + c.delete() + download.user = user download.url = url - #download.login = login - #download.password = password download.format = format + download.put() logging.info('Creating adapter...') try: @@ -233,14 +278,13 @@ class FanfictionDownloaderTask(webapp.RequestHandler): adapter = mediaminer.MediaMiner(url) else: logging.debug("Bad URL detected") - self.redirect('/?error=bad_url&url=' + urlEscape(url) ) + download.failure = url +" is not a valid story URL." + download.put() return except Exception, e: logging.exception(e) download.failure = "Adapter was not created: " + str(e) download.put() - - self.redirect('/?error=custom&url=' + urlEscape(url) + '&errtext=' + urlEscape(str(traceback.format_exc())) ) return logging.info('Created an adaper: %s' % adapter) @@ -277,67 +321,38 @@ class FanfictionDownloaderTask(webapp.RequestHandler): logging.exception(e) download.failure = 'Login problem detected' download.put() - - self.redirect('/?error=login_required&url=' + urlEscape(url)) return - except: - e = sys.exc_info()[0] - + except Exception, e: logging.exception(e) - download.failure = 'Some exception happened in downloader: ' + str(e) + download.failure = 'Some exception happened in downloader: ' + str(e) download.put() - - self.redirect('/?error=custom&url=' + urlEscape(url) + '&errtext=' + urlEscape(str(traceback.format_exc())) ) return if data == None: if loader.badLogin: logging.debug("Bad login detected") - - download.failure = 'Login problem detected' + download.failure = 'Login failed' download.put() - - self.redirect('/?error=login_required&url=' + urlEscape(url)) - else: - fic = DownloadedFanfic() - fic.user = user - fic.url = url - fic.format = format - fic.name = self._printableVersion(adapter.getOutputName()) - fic.author = self._printableVersion(adapter.getAuthorName()) - if( len(data)<1024*1000 ): - fic.blob = data - else: - logging.debug("Long file, split required") - fic.blob = data[:1024*1000] - -# try: - fic.put() - key = fic.key() + return + download.failure = 'No data returned by adaptor' download.put() -# self.redirect('/?file='+str(key)+'&name=' + urlEscape(fic.name) + '&author=' + urlEscape(fic.author)) - + else: + download.name = self._printableVersion(adapter.getOutputName()) + download.title = self._printableVersion(adapter.getStoryName()) + download.author = self._printableVersion(adapter.getAuthorName()) + download.put() + index=0 + while( len(data) > 0 ): + DownloadData(download=download, + index=index, + blob=data[:1024*1000]).put() + index += 1 + data = data[1024*1000:] + download.completed=True + download.put() + logging.info("Download finished OK") - self.response.clear() - self.response.set_status(200) return - # except Exception, e: - # logging.exception(e) - # # it was too large, won't save it - # name = str(makeAcceptableFilename(adapter.getStoryName())) - # if format == 'epub': - # self.response.headers['Content-Type'] = 'application/epub+zip' - # self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.epub' - # elif format == 'html': - # self.response.headers['Content-Type'] = 'application/zip' - # self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.html.zip' - # elif format == 'text': - # self.response.headers['Content-Type'] = 'application/zip' - # self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.txt.zip' - # elif format == 'mobi': - # self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' - # self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi' - # self.response.out.write(data) def toPercentDecimal(match): "Return the %decimal number for the character for url escaping" @@ -354,6 +369,7 @@ def main(): ('/fdowntask', FanfictionDownloaderTask), ('/fdown', FanfictionDownloader), ('/file', FileServer), + ('/status', FileStatusServer), ('/recent', RecentFilesServer), ('/r2d2', RecentAllFilesServer), ('/login', LoginRequired)], diff --git a/queue.yaml b/queue.yaml index 0bfb85d0..2acafa27 100644 --- a/queue.yaml +++ b/queue.yaml @@ -2,4 +2,6 @@ queue: - name: default rate: 1/s - name: download - rate: 10/s \ No newline at end of file + rate: 10/s + retry_parameters: + task_retry_limit: 3 diff --git a/recent.html b/recent.html index 1b199e5e..dbe04fab 100644 --- a/recent.html +++ b/recent.html @@ -32,13 +32,26 @@
    - Hi, {{ nickname }}! These fanfics you've downloaded previously. + Hi, {{ nickname }}! These are the fanfics you've recently requested.
    {% for fic in fics %} -

    {{ fic.name }} by {{ fic.author }} ({{ fic.format }})
    {{ fic.url }}

    +

    + {% if fic.completed %} + {{ fic.title }} + by {{ fic.author }} ({{ fic.format }})
    + {% endif %} + {% if fic.failure %} +

    {{ fic.failure }}
    + {% endif %} + {% if not fic.completed and not fic.failure %} + Request Processing...
    + {% endif %} + {{ fic.url }} ({{ fic.format }}) + +

    {% endfor %}
    diff --git a/status.html b/status.html new file mode 100644 index 00000000..64c03c30 --- /dev/null +++ b/status.html @@ -0,0 +1,86 @@ + + + + + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + {% if not fic.completed and not fic.failure %} + + {% endif %} + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + +
    + +
    + {% if fic.completed %} +

    Your fic has finished processing and you can download it now:

    +

    {{ fic.title }} + by {{ fic.author }} ({{ fic.format }})

    + {% else %} + {% if fic.failure %} + Your fic failed to process. Please check the URL and the error message below.
    +
    + {{ fic.failure }} +
    + {% else %} +

    Not done yet. This page will periodically poll to see if your story has finished.

    + {% endif %} + {% endif %} +
    +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + +
    + +
    + + + + diff --git a/utils/remover.py b/utils/remover.py index 327db984..954e151b 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -24,20 +24,19 @@ class Remover(webapp.RequestHandler): theDate = datetime.date.today() - datetime.timedelta(days=2) logging.debug("Will delete stuff older than %s" % theDate) - fics = DownloadedFanfic.all() - fics.order("date") - - results = fics.fetch(50) - - + fics = DownloadMeta.all() + fics.filter("date <",theDate).order("date") + results = fics.fetch(100) logging.debug([x.name for x in results]) - + num = 0 for d in results: -# d.blob = None -# d.cleared = True d.delete() + for c in d.data_chunks: + c.delete() num = num + 1 + logging.debug('Delete '+d.url) + logging.info('Deleted instances: %d' % num) self.response.out.write('Deleted instances: %d' % num) @@ -50,4 +49,4 @@ def main(): if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) - main() \ No newline at end of file + main() From ef5e7700b911fc89ad8ebc4d06817607318ef6cb Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Thu, 10 Mar 2011 13:14:51 -0600 Subject: [PATCH 111/482] Back out some changes I didn't actually mean to commit. Just some custom stuff. --- fanficdownloader/downloader.py | 4 ++-- fanficdownloader/twilighted.py | 4 ++-- fanficdownloader/twipassword.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fanficdownloader/downloader.py b/fanficdownloader/downloader.py index dccda983..a8bf6039 100644 --- a/fanficdownloader/downloader.py +++ b/fanficdownloader/downloader.py @@ -181,8 +181,8 @@ if __name__ == '__main__': loader = FanficLoader(adapter, writerClass) loader.setStandAlone(True) -# if bookFormat != 'epub': - loader.setOverWrite(True) + if bookFormat != 'epub': + loader.setOverWrite(True) try: diff --git a/fanficdownloader/twilighted.py b/fanficdownloader/twilighted.py index efd8e521..7560834b 100644 --- a/fanficdownloader/twilighted.py +++ b/fanficdownloader/twilighted.py @@ -27,7 +27,7 @@ class Twilighted(FanfictionSiteAdapter): self.path = parsedUrl.path self.opener = u2.build_opener(u2.HTTPCookieProcessor()) self.password=twipassword.password - self.login='BobsClue' + self.login='sigizmund' self.storyDescription = 'Fanfiction Story' self.authorId = '0' self.authorURL = '' @@ -85,7 +85,7 @@ class Twilighted(FanfictionSiteAdapter): return False def requiresLogin(self, url = None): - return False + return True def performLogin(self, url = None): data = {} diff --git a/fanficdownloader/twipassword.py b/fanficdownloader/twipassword.py index 52329a71..105c09c3 100644 --- a/fanficdownloader/twipassword.py +++ b/fanficdownloader/twipassword.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- # This is really for the web version. downalod.py will ask. -password='xeMApMQFfR' +password='somepass' From e6ebd956f872b4429f1b76528066aeaeb2bd3d06 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Thu, 10 Mar 2011 14:30:42 -0600 Subject: [PATCH 112/482] Datastore max size is 1,000,000, not 1M(=2^20). --- app.yaml | 2 +- main.py | 4 ++-- queue.yaml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app.yaml b/app.yaml index 9c55df49..0b45fd03 100644 --- a/app.yaml +++ b/app.yaml @@ -1,5 +1,5 @@ application: fanfictionloader -version: 2-6-beta +version: 3-0-alpha runtime: python api_version: 1 diff --git a/main.py b/main.py index 85d5f937..5c0563d2 100644 --- a/main.py +++ b/main.py @@ -345,9 +345,9 @@ class FanfictionDownloaderTask(webapp.RequestHandler): while( len(data) > 0 ): DownloadData(download=download, index=index, - blob=data[:1024*1000]).put() + blob=data[:1000000]).put() index += 1 - data = data[1024*1000:] + data = data[1000000:] download.completed=True download.put() diff --git a/queue.yaml b/queue.yaml index 2acafa27..77c4e83b 100644 --- a/queue.yaml +++ b/queue.yaml @@ -4,4 +4,4 @@ queue: - name: download rate: 10/s retry_parameters: - task_retry_limit: 3 + task_retry_limit: 2 From e645c054d8cc64824879b8a6f91ff74736e8d8be Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Tue, 15 Mar 2011 14:49:16 -0500 Subject: [PATCH 113/482] adastrafanfic can have chapters with tags () in them. Allow get as well as post. --- fanficdownloader/adastrafanfic.py | 4 +++- fanficdownloader/output.py | 2 ++ main.py | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fanficdownloader/adastrafanfic.py b/fanficdownloader/adastrafanfic.py index 894787f2..9d3186b7 100644 --- a/fanficdownloader/adastrafanfic.py +++ b/fanficdownloader/adastrafanfic.py @@ -113,7 +113,9 @@ class Adastrafanfic(FanfictionSiteAdapter): for o in allOptions: # warning=5 bypasses 'are you old enough' checks. url = self.url + "&warning=5&chapter=%s" % o['value'] - title = o.string + # ad astra can have tags, like in chapter titles. + title = "%s" % o + title = re.sub('<[^>]+>','',title) result.append((url,title)) # warning=5 bypasses 'are you old enough' checks. diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 63c1168c..26e646a1 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -474,6 +474,8 @@ def replaceNotEntities(data): def removeEntities(text): # replace numeric versions of [&<>] with named versions. + if text is None: + return text try: t = text.decode('utf-8') except UnicodeEncodeError, e: diff --git a/main.py b/main.py index 5c0563d2..9bddd146 100644 --- a/main.py +++ b/main.py @@ -171,6 +171,9 @@ class RecentAllFilesServer(webapp.RequestHandler): self.response.out.write(template.render(path, template_values)) class FanfictionDownloader(webapp.RequestHandler): + def get(self): + self.post() + def post(self): logging.getLogger().setLevel(logging.DEBUG) From 8c606e45fd8e9e74a0b64f4022e836aeb68d48b6 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Thu, 17 Mar 2011 17:03:25 -0500 Subject: [PATCH 114/482] Remove 2 sec sleep before making live version. --- fanficdownloader/downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/downloader.py b/fanficdownloader/downloader.py index a8bf6039..5d0932d2 100644 --- a/fanficdownloader/downloader.py +++ b/fanficdownloader/downloader.py @@ -104,7 +104,7 @@ class FanficLoader: text = self.adapter.getText(u) self.writer.writeChapter(i, n, text) i = i+1 - time.sleep(2) + #time.sleep(2) self.writer.finalise() From 389fc010f2013881c596193eebbb70cc8a24009d Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Tue, 22 Mar 2011 12:59:12 -0500 Subject: [PATCH 115/482] After merging QueueProc changes, tweaks clean up cron times and change appspot instance name. --- app.yaml | 2 +- cron.yaml | 2 +- utils/remover.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app.yaml b/app.yaml index 0b45fd03..d6dc562b 100644 --- a/app.yaml +++ b/app.yaml @@ -1,5 +1,5 @@ application: fanfictionloader -version: 3-0-alpha +version: 3-0-prod runtime: python api_version: 1 diff --git a/cron.yaml b/cron.yaml index 1d9c70a0..5d8d1acd 100644 --- a/cron.yaml +++ b/cron.yaml @@ -1,4 +1,4 @@ cron: - description: cleanup job url: /r3m0v3r - schedule: every 3 hours \ No newline at end of file + schedule: every 12 hours diff --git a/utils/remover.py b/utils/remover.py index 954e151b..d81fe85f 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -21,7 +21,7 @@ class Remover(webapp.RequestHandler): logging.debug("Starting r3m0v3r") user = users.get_current_user() logging.debug("Working as user %s" % user) - theDate = datetime.date.today() - datetime.timedelta(days=2) + theDate = datetime.date.today() - datetime.timedelta(days=7) logging.debug("Will delete stuff older than %s" % theDate) fics = DownloadMeta.all() From 1d1ed1ef710efa0c4936e6197894896410147725 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Tue, 22 Mar 2011 19:11:17 -0500 Subject: [PATCH 116/482] Add support for www.whofic.com. --- fanficdownloader/downloader.py | 3 + fanficdownloader/whofic.py | 236 +++++++++++++++++++++++++++++++++ index.html | 20 +-- main.py | 2 + 4 files changed, 244 insertions(+), 17 deletions(-) create mode 100644 fanficdownloader/whofic.py diff --git a/fanficdownloader/downloader.py b/fanficdownloader/downloader.py index 5d0932d2..53e9acec 100644 --- a/fanficdownloader/downloader.py +++ b/fanficdownloader/downloader.py @@ -30,6 +30,7 @@ import fictionalley import hpfiction import twilighted import adastrafanfic +import whofic import potionsNsnitches import mediaminer @@ -151,6 +152,8 @@ if __name__ == '__main__': adapter = twilighted.Twilighted(url) elif url.find('adastrafanfic.com') != -1: adapter = adastrafanfic.Adastrafanfic(url) + elif url.find('whofic.com') != -1: + adapter = whofic.Whofic(url) elif url.find('potionsandsnitches.net') != -1: adapter = potionsNsnitches.PotionsNSnitches(url) elif url.find('mediaminer.org') != -1: diff --git a/fanficdownloader/whofic.py b/fanficdownloader/whofic.py new file mode 100644 index 00000000..dbe9ddc7 --- /dev/null +++ b/fanficdownloader/whofic.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- + +import os +import re +import sys +import shutil +import os.path +import urllib as u +import logging +import pprint as pp +import unittest +import urllib2 as u2 +import urlparse as up +import BeautifulSoup as bs +import htmlentitydefs as hdefs +import time +import datetime + +from adapter import * + +class Whofic(FanfictionSiteAdapter): + def __init__(self, url): + self.url = url + parsedUrl = up.urlparse(url) + self.host = parsedUrl.netloc + self.path = parsedUrl.path + self.opener = u2.build_opener(u2.HTTPCookieProcessor()) + self.storyDescription = 'Fanfiction Story' + self.authorId = '0' + self.authorURL = '' + self.storyId = '0' + self.storyPublished = datetime.date(1970, 01, 31) + self.storyCreated = datetime.datetime.now() + self.storyUpdated = datetime.date(1970, 01, 31) + self.languageId = 'en-UK' + self.language = 'English' + self.subjects = [] + self.subjects.append ('fanfiction') + self.subjects.append ('A Teaspoon and an Open Mind') + self.publisher = self.host + self.numChapters = 0 + self.numWords = 0 + self.genre = '' + self.category = '' + self.storyStatus = 'In-Progress' + self.storyRating = 'PG' + self.storyUserRating = '0' + self.storyCharacters = [] + self.storySeries = '' + self.outputName = '' + self.outputStorySep = '-whof_' + + self.chapurl = False + ss=self.url.split('?') + logging.debug('ss=%s' % ss) + if ss is not None and len(ss) > 1: + sss = ss[1].replace('&','&').split('&') + logging.debug('sss=%s' % sss) + if sss is not None and len(sss) > 0: + ssss = sss[0].split('=') + logging.debug('ssss=%s' % ssss) + if ssss is not None and len(ssss) > 1 and ssss[0] == 'sid': + self.storyId = ssss[1] + if len(sss) > 1: + ssss = sss[1].split('=') + logging.debug('ssss=%s' % ssss) + if ssss is not None and len(ssss) > 1 and ssss[0] == 'chapter': + self.chapurl = True + + self.url = 'http://' + self.host + self.path + '?sid=' + self.storyId + logging.debug('self.url=%s' % self.url) + + logging.debug("Created Whofic: url=%s" % (self.url)) + + def requiresLogin(self, url = None): + return False + + def extractIndividualUrls(self): + url = self.url + '&chapter=1' + + data = '' + try: + data = self.opener.open(url).read() + except Exception, e: + data = '' + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") + if data is None: + raise StoryDoesNotExist("Problem reading story URL " + url + "!") + + soup = None + try: + soup = bs.BeautifulStoneSoup(data) + except: + raise FailedToDownload("Error downloading Story: %s! Problem decoding page!" % url) + + title = soup.find('title').string + title = title.split('::')[1].strip() + logging.debug('Title: %s' % title) + self.storyName = title.split(' by ')[0].strip() + self.authorName = title.split(' by ')[1].strip() + + for a in soup.findAll('a'): + if a['href'].startswith('viewuser.php'): + self.authorId = a['href'].split('=')[1] + self.authorURL = 'http://'+self.host+'/'+a['href'] + + logging.debug('self.storyId=%s, self.storyName=%s' % (self.storyId, self.storyName)) + logging.debug('self.authorId=%s, self.authorName=%s' % (self.authorId, self.authorName)) + + select = soup.find('select', { 'name' : 'chapter' } ) + + result = [] + if select is None: + # no chapters found, try url by itself. + result.append((url,self.storyName)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = self.url + "&chapter=%s" % o['value'] + # just in case there's tags, like in chapter titles. + title = "%s" % o + title = re.sub('<[^>]+>','',title) + result.append((url,title)) + + ## Whofic.com puts none of the meta data in the chapters or + ## even the story chapter index page. Need to scrape the + ## author page to find it. + data = self.opener.open(self.authorURL).read() + + soup = bs.BeautifulStoneSoup(data, selfClosingTags=('br','hr')) + # find this story in the list, parse it's metadata based on + # lots of assumptions, since there's little tagging. + for a in soup.findAll('a'): + #print "a href:"+a['href'] + if a['href'].find('viewstory.php?sid='+self.storyId) != -1: + metadata = a.findParent('td') + metadatachunks = metadata.__str__('utf8').split('
    ') + # process metadata for this story. + #print a.findParent('td').__str__('utf8') + self.storyDescription = metadatachunks[1] + + # for cata in metadata.findAll('a'): + # if cata['href'].startswith('categories.php'): + # if len(self.category) == 0: + # self.category = cata.string + # else: + # self.category = self.category + ", " + cata.string + + # the stuff with ' - ' separators + moremeta = metadatachunks[2] + moremeta = re.sub('<[^>]+>','',moremeta) # strip tags. + print "====== moremeta: "+moremeta + + moremetaparts = moremeta.split(' - ') + + self.category = moremetaparts[0] + for cat in self.category.split(', '): + self.addSubject(cat.strip()) + + self.storyRating = moremetaparts[1] + + for warn in moremetaparts[2].split(', '): + self.addSubject(warn.strip()) + + self.genre = moremetaparts[3] + + # the stuff with ' - ' separators *and* names + moremeta = metadatachunks[5] + moremeta = re.sub('<[^>]+>','',moremeta) # strip tags. + print "====== moremeta 2: "+moremeta + + moremetaparts = moremeta.split(' - ') + + for part in moremetaparts: + (name,value) = part.split(': ') + name=name.strip() + value=value.strip() + if name == 'Published': + self.storyPublished = datetime.datetime.fromtimestamp(time.mktime(time.strptime(value, '%Y.%m.%d'))) + if name == 'Updated': + self.storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(value, '%Y.%m.%d'))) + if name == 'Completed' and value == 'Yes': + self.storyStatus = name + if name == 'Word Count': + self.numWords = value + + break + + self.numChapters = len(result) + + return result + + def getText(self, url): + if url.find('http://') == -1: + url = 'http://' + self.host + '/' + url + + logging.debug('Getting data from: %s' % url) + + data = '' + try: + data = self.opener.open(url).read() + except Exception, e: + data = '' + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") + if data is None: + raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) + + soup = None + try: + # I really wish I knew why adastra needs the selfClosingTags to make
    work, but ficwad doesn't. + soup = bs.BeautifulStoneSoup(data, convertEntities=bs.BeautifulStoneSoup.HTML_ENTITIES, selfClosingTags=('br','hr')) + except: + logging.info("Failed to decode: <%s>" % data) + raise FailedToDownload("Error downloading Chapter: %s! Problem decoding page!" % url) + + # hardly a great identifier, I know, but whofic really doesn't + # give us anything better to work with. + span = soup.find('span', {'style' : 'font-size: 100%;'}) + + if None == span: + raise FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return span.__str__('utf8') + + +class Whofic_UnitTests(unittest.TestCase): + def setUp(self): + logging.basicConfig(level=logging.DEBUG) + pass + + def testGetUrlsWorks(self): + url = 'http://www.whofic.com/viewstory.php?sid=37139' + self.assertEquals(6, len(Whofic(url).extractIndividualUrls())) + +if __name__ == '__main__': + unittest.main() diff --git a/index.html b/index.html index 2bc40d75..2ceec564 100644 --- a/index.html +++ b/index.html @@ -36,8 +36,6 @@

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier.

    For Amazon Kindle use Mobi output, for Sony Reader, Nook and iPad use ePub

    -

    To support new features, such as including story summaries, - the URL you need to use for some sites has changed. See below for example URLs for each site.

    Or see your personal list of previously downloaded fanfics.

    @@ -128,6 +126,9 @@
    adastrafanfic.com
    Use the URL of the story's chapter list, such as
    http://www.adastrafanfic.com/viewstory.php?sid=854. +
    whofic.com +
    Use the URL of the story's chapter list, such as +
    http://www.whofic.com/viewstory.php?sid=16334. @@ -141,28 +142,13 @@ Small post written by me — how to read fiction in Stanza or any other ebook reader. -
  • - Currently we support fanfiction.net, fictionpress.com, ficwad.com, fictionalley.org, harrypotterfanfiction.com, potionsandsnitches.net, mediaminer.org and twilighted.net. - fanficauthors.net and tthfanfic.org offer native ePub functionality. -
  • You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader.
  • -
  • - One-shots, fics with a single chapter, are now supported. -
  • -
  • - You can download fanfics and store them for 'later' by just downloading them and visiting recent - downloads section. -
  • Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep Google happy about the app not going over the storage limit).
  • -
  • - If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is - too large to save in the database and you need to download it straight away. -
  • If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and not something else. diff --git a/main.py b/main.py index 9bddd146..2bcc8fd0 100644 --- a/main.py +++ b/main.py @@ -275,6 +275,8 @@ class FanfictionDownloaderTask(webapp.RequestHandler): adapter = twilighted.Twilighted(url) elif url.find('adastrafanfic.com') != -1: adapter = adastrafanfic.Adastrafanfic(url) + elif url.find('whofic.com') != -1: + adapter = whofic.Whofic(url) elif url.find('potionsandsnitches.net') != -1: adapter = potionsNsnitches.PotionsNSnitches(url) elif url.find('mediaminer.org') != -1: From 925324bd13886a774d42db83973bce97638eecdb Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Tue, 22 Mar 2011 19:17:41 -0500 Subject: [PATCH 117/482] Improved (but not perfected) mobi file output. Reduce cron clean up time to 2 days. --- fanficdownloader/constants.py | 22 +++-- fanficdownloader/mobi.py | 41 ++++---- fanficdownloader/output.py | 170 ++++++++++++++++++++++++++++------ utils/remover.py | 2 +- 4 files changed, 184 insertions(+), 51 deletions(-) diff --git a/fanficdownloader/constants.py b/fanficdownloader/constants.py index bd35546a..0be3038e 100644 --- a/fanficdownloader/constants.py +++ b/fanficdownloader/constants.py @@ -29,17 +29,27 @@ MIMETYPE = '''application/epub+zip''' TITLE_HEADER = ''' %s - %s -

    %s by %s

    +

    %s by %s

    +''' + +TITLE_ENTRY = '''%s %s
    +''' + +TITLE_FOOTER = ''' +
    Summary:
    %s
    + +''' + +TABLE_TITLE_HEADER = TITLE_HEADER + ''' ''' -TITLE_ENTRY = ''' +TABLE_TITLE_ENTRY = ''' ''' -TITLE_FOOTER = '''
    %s%s
    %s%s
    -

    Summary:
    %s

    - -''' +TABLE_TITLE_FOOTER = ''' + +''' + TITLE_FOOTER CONTAINER = ''' diff --git a/fanficdownloader/mobi.py b/fanficdownloader/mobi.py index cd9502e0..f9f16ea3 100644 --- a/fanficdownloader/mobi.py +++ b/fanficdownloader/mobi.py @@ -65,44 +65,50 @@ class Converter: return out.getvalue() def ConvertFile(self, html_file, out_file): - self._ConvertStringToFile(open(html_file).read(), - open(out_file, 'w')) + self._ConvertStringToFile(open(html_file,'rb').read(), + open(out_file, 'wb')) def ConvertFiles(self, html_files, out_file): - html_strs = [open(f).read() for f in html_files] - self._ConvertStringsToFile(html_strs, open(out_file, 'w')) + html_strs = [open(f,'rb').read() for f in html_files] + self._ConvertStringsToFile(html_strs, open(out_file, 'wb')) def MakeOneHTML(self, html_strs): """This takes a list of HTML strings and returns a big HTML file with all contents consolidated. It constructs a table of contents and adds anchors within the text """ + title_html = [] toc_html = [] - if self._refresh_url: - toc_html.append('Update Reading List
    ' % - self._refresh_url) body_html = [] - titles = [] - PAGE_BREAK = '' - for pos, html in enumerate(html_strs): + PAGE_BREAK = '' + + # pull out the title page, assumed first html_strs. + htmltitle = html_strs[0] + entrytitle = _SubEntry(1, htmltitle) + title_html.append(entrytitle.Body()) + + toc_html.append(PAGE_BREAK) + toc_html.append('

    Table of Contents


    ') + + for pos, html in enumerate(html_strs[1:]): entry = _SubEntry(pos+1, html) - titles.append(entry.title[:10]) - toc_html.append('%s
    ' % entry.TocLink()) + toc_html.append('%s
    ' % entry.TocLink()) # give some space between bodies of work. body_html.append(PAGE_BREAK) + body_html.append(entry.Anchor()) - body_html.append('

    %s

    ' % entry.title) body_html.append(entry.Body()) # TODO: this title can get way too long with RSS feeds. Not sure how to fix - header = 'Bibliorize %s GMT' % time.ctime( + header = 'Bibliorize %s GMT' % time.ctime( time.time()) footer = '' - all_html = header + '\n'.join(toc_html + body_html) + footer + all_html = header + '\n'.join(title_html + toc_html + body_html) + footer + #print "%s" % all_html.encode('utf8') return all_html def _ConvertStringsToFile(self, html_strs, out_file): @@ -343,5 +349,6 @@ class Header: if __name__ == '__main__': import sys - m = Converter() - m.ConvertFiles(sys.argv[1:], '/tmp/test.mobi') + m = Converter(title='Testing Mobi', author='Mobi Author', publisher='mobi converter') + m.ConvertFiles(sys.argv[1:], 'test.mobi') + #m.ConvertFile(sys.argv[1], 'test.mobi') diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 26e646a1..087fff30 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -83,7 +83,8 @@ class TextWriter(FanficWriter): class MobiWriter(FanficWriter): - body = '' + chapters = [] + files = {} @staticmethod def getFormatName(): @@ -104,6 +105,9 @@ class MobiWriter(FanficWriter): self.mobi = mobi self.inmemory = inmemory + self.files = {} + self.chapters = [] + if not self.inmemory and os.path.exists(self.fileName): os.remove(self.fileName) @@ -122,27 +126,140 @@ class MobiWriter(FanficWriter): except: return text + def _writeFile(self, fileName, data): + #logging.debug('_writeFile(`%s`, data)' % fileName) + if fileName in self.files: + try: + d = data.decode('utf-8') + except UnicodeEncodeError, e: + d = data + + self.files[fileName].write(d) + else: + self.files[fileName] = StringIO.StringIO() + self._writeFile(fileName, data) + + def _getFilesStrings(self): + strings = [] + if "title_page.xhtml" in self.files: + strings.append(self.files["title_page.xhtml"].getvalue()) + del(self.files["title_page.xhtml"]) + + keys = self.files.keys() + keys.sort() + + # Assumed all other files are chapter0000.xhtml. + for fn in keys: + strings.append(self.files[fn].getvalue()) + + return strings + def writeChapter(self, index, title, text): - title = self._printableVersion(title) #title.decode('utf-8') - text = self._printableVersion(text) #text.decode('utf-8') - self.body = self.body + '\n' + self.chapterStartTemplate.substitute({'chapter' : title}) - self.body = self.body + '\n' + text + title = removeEntities(title) + logging.debug("Writing chapter: %s" % title) + #title = self._printableVersion(title) #title.decode('utf-8') + text = removeEntities(text) + #text = self._printableVersion(text) #text.decode('utf-8') + + # BeautifulStoneSoup doesn't have any selfClosingTags by default. + # hr & br needs to be if they're going to work. + # Some stories do use multiple br tags as their section breaks... + self.soup = bs.BeautifulStoneSoup(text, selfClosingTags=('br','hr')) + + allTags = self.soup.findAll(recursive=True) + for t in allTags: + for attr in t._getAttrMap().keys(): + if attr not in acceptable_attributes: + del t[attr] + # these are not acceptable strict XHTML. But we do already have + # CSS classes of the same names defined in constants.py + if t.name in ('u'): + t['class']=t.name + t.name='span' + if t.name in ('center'): + t['class']=t.name + t.name='div' + # removes paired, but empty tags. + if t.string != None and len(t.string.strip()) == 0 : + t.extract() + + text = self.soup.__str__('utf8') + + # ffnet(& maybe others) gives the whole chapter text + # as one line. This causes problems for nook(at + # least) when the chapter size starts getting big + # (200k+) Using Soup's prettify() messes up italics + # and such. Done after soup extract so

    and
    + # tags are normalized. Doing it here seems less evil + # than hacking BeautifulSoup, but it's debatable. + text = text.replace('

    ','

    \n').replace('
    ','
    \n') + + filename="chapter%04d.xhtml" % index + self._writeFile(filename, XHTML_START % (title, title)) + self._writeFile(filename, text) + self._writeFile(filename, XHTML_END) + + #self.body = self.body + '\n' + self.chapterStartTemplate.substitute({'chapter' : title}) + #self.body = self.body + '\n' + text def finalise(self): - html = self.xhtmlTemplate.substitute({'title' : self.storyTitle, 'author' : self.authorName, 'body' : self.body}) - soup = bs.BeautifulSoup(html) - result = soup.__str__('utf8') + logging.debug("Finalising...") -# f = open(self.fileName, 'w') -# f.write(result) -# f.close() + published = self.adapter.getStoryPublished().strftime("%Y-%m-%d") + createda = self.adapter.getStoryCreated().strftime("%Y-%m-%d %H:%M:%S") + created = self.adapter.getStoryCreated().strftime("%Y-%m-%d") + updated = self.adapter.getStoryUpdated().strftime("%Y-%m-%d") + updateyy = self.adapter.getStoryUpdated().strftime("%Y") + updatemm = self.adapter.getStoryUpdated().strftime("%m") + updatedd = self.adapter.getStoryUpdated().strftime("%d") + calibre = self.adapter.getStoryUpdated().strftime("%Y-%m-%dT%H:%M:%S") + + description = self.adapter.getStoryDescription() + if hasattr(description, "text"): + description = description.text + prevalue=description + try: + description = unicode(description) + except: + description=prevalue + + if description is not None and len(description) > 0: + description = description.replace ('\\\'', '\'').replace('\\\"', '\"') + description = removeEntities(description) + else: + description = ' ' - c = mobi.Converter(title=self.storyTitle, author=self.authorName, publisher=self.publisher) - mobidata = c.ConvertString(result) + ### writing content -- title page + titleFilePath = "title_page.xhtml" + self._writeFile(titleFilePath, TITLE_HEADER % (self.authorName, self.storyTitle, self.adapter.getStoryURL(), self.storyTitle, self.adapter.getAuthorURL(), self.authorName)) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Category:', self.adapter.getCategory())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Genre:', self.adapter.getGenre())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Status:', self.adapter.getStoryStatus())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Published:', published)) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Updated:', updated)) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Packaged:', createda)) + tmpstr = self.adapter.getStoryRating() + " / " + self.adapter.getStoryUserRating() + self._writeFile(titleFilePath, TITLE_ENTRY % ('Rating Age/User:', tmpstr)) + tmpstr = unicode(self.adapter.getNumChapters()) + " / " + commaGroups(unicode(self.adapter.getNumWords())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Chapters/Words:', tmpstr)) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Publisher:', self.adapter.getHost())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Story ID:', self.adapter.getStoryId())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Author ID:', self.adapter.getAuthorId())) + + self._writeFile(titleFilePath, TITLE_FOOTER % description ) + + + + c = mobi.Converter(title=self.storyTitle, + author=self.authorName, + publisher=self.publisher) + mobidata = c.ConvertStrings(self._getFilesStrings()) self.output.write(mobidata) if not self.inmemory: self.output.close() +# zipdir.toZip(filename, self.directory) + class HTMLWriter(FanficWriter): @@ -205,7 +322,6 @@ class HTMLWriter(FanficWriter): class EPubFanficWriter(FanficWriter): chapters = [] - files = {} @staticmethod @@ -360,22 +476,22 @@ class EPubFanficWriter(FanficWriter): ### writing content -- title page titleFilePath = "OEBPS/title_page.xhtml" - self._writeFile(titleFilePath, TITLE_HEADER % (self.authorName, self.storyTitle, self.adapter.getStoryURL(), self.storyTitle, self.adapter.getAuthorURL(), self.authorName)) - self._writeFile(titleFilePath, TITLE_ENTRY % ('Category:', self.adapter.getCategory())) - self._writeFile(titleFilePath, TITLE_ENTRY % ('Genre:', self.adapter.getGenre())) - self._writeFile(titleFilePath, TITLE_ENTRY % ('Status:', self.adapter.getStoryStatus())) - self._writeFile(titleFilePath, TITLE_ENTRY % ('Published:', published)) - self._writeFile(titleFilePath, TITLE_ENTRY % ('Updated:', updated)) - self._writeFile(titleFilePath, TITLE_ENTRY % ('Packaged:', createda)) + self._writeFile(titleFilePath, TABLE_TITLE_HEADER % (self.authorName, self.storyTitle, self.adapter.getStoryURL(), self.storyTitle, self.adapter.getAuthorURL(), self.authorName)) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Category:', self.adapter.getCategory())) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Genre:', self.adapter.getGenre())) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Status:', self.adapter.getStoryStatus())) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Published:', published)) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Updated:', updated)) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Packaged:', createda)) tmpstr = self.adapter.getStoryRating() + " / " + self.adapter.getStoryUserRating() - self._writeFile(titleFilePath, TITLE_ENTRY % ('Rating Age/User:', tmpstr)) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Rating Age/User:', tmpstr)) tmpstr = unicode(self.adapter.getNumChapters()) + " / " + commaGroups(unicode(self.adapter.getNumWords())) - self._writeFile(titleFilePath, TITLE_ENTRY % ('Chapters/Words:', tmpstr)) - self._writeFile(titleFilePath, TITLE_ENTRY % ('Publisher:', self.adapter.getHost())) - self._writeFile(titleFilePath, TITLE_ENTRY % ('Story ID:', self.adapter.getStoryId())) - self._writeFile(titleFilePath, TITLE_ENTRY % ('Author ID:', self.adapter.getAuthorId())) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Chapters/Words:', tmpstr)) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Publisher:', self.adapter.getHost())) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Story ID:', self.adapter.getStoryId())) + self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Author ID:', self.adapter.getAuthorId())) - self._writeFile(titleFilePath, TITLE_FOOTER % description ) + self._writeFile(titleFilePath, TABLE_TITLE_FOOTER % description ) ### writing content -- opf file opfFilePath = "OEBPS/content.opf" diff --git a/utils/remover.py b/utils/remover.py index d81fe85f..954e151b 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -21,7 +21,7 @@ class Remover(webapp.RequestHandler): logging.debug("Starting r3m0v3r") user = users.get_current_user() logging.debug("Working as user %s" % user) - theDate = datetime.date.today() - datetime.timedelta(days=7) + theDate = datetime.date.today() - datetime.timedelta(days=2) logging.debug("Will delete stuff older than %s" % theDate) fics = DownloadMeta.all() From efb521c829caf633252e43d84c31692bd9f104ab Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Tue, 22 Mar 2011 19:45:47 -0500 Subject: [PATCH 118/482] Added tag fanficdownloader-0.5 for changeset 0f08ff79de17 From bd82311d51cb437b0fb26272b6e10d2453b52cf2 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Thu, 24 Mar 2011 12:57:38 -0500 Subject: [PATCH 119/482] Mobi improvements: Mark TOC so reader can find it, don't let reader eat every 4096th char, don't prettify. Remove debug outputs from whofic. Increase fetch size of remover. --- fanficdownloader/html.py | 11 ++++++++--- fanficdownloader/mobi.py | 40 +++++++++++++++++++++++++++++++++----- fanficdownloader/whofic.py | 11 ----------- utils/remover.py | 2 +- 4 files changed, 44 insertions(+), 20 deletions(-) diff --git a/fanficdownloader/html.py b/fanficdownloader/html.py index 2c14a58d..e1ca7db5 100644 --- a/fanficdownloader/html.py +++ b/fanficdownloader/html.py @@ -35,15 +35,20 @@ class HtmlProcessor: with . Stores anchors in self._anchor_references''' self._anchor_references = [] anchor_num = 0 - for anchor in self._soup.findAll('a', href=re.compile('^#')): + # anchor links + anchorlist = self._soup.findAll('a', href=re.compile('^#')) + # treat reference tags like a tags for TOCTOP. + anchorlist.extend(self._soup.findAll('reference', href=re.compile('^#'))) + for anchor in anchorlist: self._anchor_references.append((anchor_num, anchor['href'])) del anchor['href'] anchor['filepos'] = '%.10d' % anchor_num anchor_num += 1 - + def _ReplaceAnchorStubs(self): # TODO: Browsers allow extra whitespace in the href names. - assembled_text = self._soup.prettify() + # use __str__ instead of prettify--it inserts extra spaces. + assembled_text = self._soup.__str__('utf8') del self._soup # shouldn't touch this anymore for anchor_num, original_ref in self._anchor_references: ref = urllib.unquote(original_ref[1:]) # remove leading '#' diff --git a/fanficdownloader/mobi.py b/fanficdownloader/mobi.py index f9f16ea3..4748e202 100644 --- a/fanficdownloader/mobi.py +++ b/fanficdownloader/mobi.py @@ -88,8 +88,8 @@ class Converter: entrytitle = _SubEntry(1, htmltitle) title_html.append(entrytitle.Body()) - toc_html.append(PAGE_BREAK) - toc_html.append('

    Table of Contents


    ') + title_html.append(PAGE_BREAK) + toc_html.append('

    Table of Contents


    ') for pos, html in enumerate(html_strs[1:]): entry = _SubEntry(pos+1, html) @@ -103,8 +103,16 @@ class Converter: body_html.append(entry.Body()) # TODO: this title can get way too long with RSS feeds. Not sure how to fix - header = 'Bibliorize %s GMT' % time.ctime( - time.time()) + # cheat slightly and use the
    code to set filepos in references. + header = ''' + +Bibliorize %s GMT + + + + + +''' % time.ctime(time.time()) footer = '' all_html = header + '\n'.join(title_html + toc_html + body_html) + footer @@ -122,6 +130,21 @@ class Converter: def _ConvertStringToFile(self, html_data, out): html = HtmlProcessor(html_data) data = html.CleanHtml() + + # collect offsets of '' tags, use to make index list. + # indexlist = [] # list of (offset,length) tuples. + # not in current use. + + # j=0 + # lastj=0 + # while True: + # j=data.find('',lastj+10) # plus a bit so we find the next. + # if j < 0: + # break + # indexlist.append((lastj,j-lastj)) + # print "index offset: %d length: %d" % (lastj,j-lastj) + # lastj=j + records = [] # title = html.title # if title: @@ -131,6 +154,7 @@ class Converter: end = min(len(data), start_pos + Record.MAX_SIZE) record_data = data[start_pos:end] records.append(self._header.AddRecord(record_data, record_id)) + #print "HTML Record %03d: (size:%d) [[%s ... %s]]" % ( record_id, len(record_data), record_data[:20], record_data[-20:] ) record_id += 1 self._header.SetImageRecordIndex(record_id) records[0:0] = [self._header.MobiHeader()] @@ -139,12 +163,18 @@ class Converter: out.write(header) for record in records: record.WriteHeader(out, rec_offset) - rec_offset += len(record.data) + #print "rec_offset: %d len(record.data): %d" % (rec_offset,len(record.data)) + rec_offset += (len(record.data)+1) # plus one for trailing null # Write to nuls for some reason out.write('\0\0') for record in records: record.WriteData(out) + out.write('\0') + # needs a trailing null, I believe it indicates zero length 'overlap'. + # otherwise, the readers eat the last char of each html record. + # Calibre writes another 6-7 bytes of stuff after that, but we seem + # to be getting along without it. class Record: MAX_SIZE = 4096 diff --git a/fanficdownloader/whofic.py b/fanficdownloader/whofic.py index dbe9ddc7..e7277a16 100644 --- a/fanficdownloader/whofic.py +++ b/fanficdownloader/whofic.py @@ -131,25 +131,15 @@ class Whofic(FanfictionSiteAdapter): # find this story in the list, parse it's metadata based on # lots of assumptions, since there's little tagging. for a in soup.findAll('a'): - #print "a href:"+a['href'] if a['href'].find('viewstory.php?sid='+self.storyId) != -1: metadata = a.findParent('td') metadatachunks = metadata.__str__('utf8').split('
    ') # process metadata for this story. - #print a.findParent('td').__str__('utf8') self.storyDescription = metadatachunks[1] - - # for cata in metadata.findAll('a'): - # if cata['href'].startswith('categories.php'): - # if len(self.category) == 0: - # self.category = cata.string - # else: - # self.category = self.category + ", " + cata.string # the stuff with ' - ' separators moremeta = metadatachunks[2] moremeta = re.sub('<[^>]+>','',moremeta) # strip tags. - print "====== moremeta: "+moremeta moremetaparts = moremeta.split(' - ') @@ -167,7 +157,6 @@ class Whofic(FanfictionSiteAdapter): # the stuff with ' - ' separators *and* names moremeta = metadatachunks[5] moremeta = re.sub('<[^>]+>','',moremeta) # strip tags. - print "====== moremeta 2: "+moremeta moremetaparts = moremeta.split(' - ') diff --git a/utils/remover.py b/utils/remover.py index 954e151b..2c90ed2e 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -26,7 +26,7 @@ class Remover(webapp.RequestHandler): fics = DownloadMeta.all() fics.filter("date <",theDate).order("date") - results = fics.fetch(100) + results = fics.fetch(500) logging.debug([x.name for x in results]) num = 0 From 3e381dd33f34c27c6bb8bb1a91962a609247e3d9 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Thu, 24 Mar 2011 22:09:11 -0500 Subject: [PATCH 120/482] Still tweaking the clean up cron. --- cron.yaml | 2 +- utils/remover.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cron.yaml b/cron.yaml index 5d8d1acd..325ad870 100644 --- a/cron.yaml +++ b/cron.yaml @@ -1,4 +1,4 @@ cron: - description: cleanup job url: /r3m0v3r - schedule: every 12 hours + schedule: every 2 hours diff --git a/utils/remover.py b/utils/remover.py index 2c90ed2e..954e151b 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -26,7 +26,7 @@ class Remover(webapp.RequestHandler): fics = DownloadMeta.all() fics.filter("date <",theDate).order("date") - results = fics.fetch(500) + results = fics.fetch(100) logging.debug([x.name for x in results]) num = 0 From 302a8f12a3bb2ee5e71f7e45c9d1b2d7febecce5 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 1 Apr 2011 22:14:05 -0500 Subject: [PATCH 121/482] Removing storage for old version. Fixes for whofic meta data. --- app.yaml | 3 +++ fanficdownloader/whofic.py | 4 ++-- ffstorage.py | 20 -------------------- utils/remover.py | 2 +- 4 files changed, 6 insertions(+), 23 deletions(-) diff --git a/app.yaml b/app.yaml index d6dc562b..b1796d40 100644 --- a/app.yaml +++ b/app.yaml @@ -27,3 +27,6 @@ handlers: - url: /.* script: main.py + +builtins: +- datastore_admin: on diff --git a/fanficdownloader/whofic.py b/fanficdownloader/whofic.py index e7277a16..735692f1 100644 --- a/fanficdownloader/whofic.py +++ b/fanficdownloader/whofic.py @@ -35,8 +35,8 @@ class Whofic(FanfictionSiteAdapter): self.languageId = 'en-UK' self.language = 'English' self.subjects = [] - self.subjects.append ('fanfiction') - self.subjects.append ('A Teaspoon and an Open Mind') + self.subjects.append ('Fanfiction') + self.subjects.append ('Doctor Who') self.publisher = self.host self.numChapters = 0 self.numWords = 0 diff --git a/ffstorage.py b/ffstorage.py index df1e6ff9..bb17d8bb 100644 --- a/ffstorage.py +++ b/ffstorage.py @@ -1,25 +1,5 @@ from google.appengine.ext import db -class OneDownload(db.Model): - user = db.UserProperty() - url = db.StringProperty() - format = db.StringProperty() - login = db.StringProperty() - password = db.StringProperty() - failure = db.StringProperty() - date = db.DateTimeProperty(auto_now_add=True) - -class DownloadedFanfic(db.Model): - user = db.UserProperty() - url = db.StringProperty() - name = db.StringProperty() - author = db.StringProperty() - format = db.StringProperty() - date = db.DateTimeProperty(auto_now_add=True) - blob = db.BlobProperty() - mac = db.StringProperty() - cleared = db.BooleanProperty(default=False) - class DownloadMeta(db.Model): user = db.UserProperty() url = db.StringProperty() diff --git a/utils/remover.py b/utils/remover.py index 954e151b..6ce8e300 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -21,7 +21,7 @@ class Remover(webapp.RequestHandler): logging.debug("Starting r3m0v3r") user = users.get_current_user() logging.debug("Working as user %s" % user) - theDate = datetime.date.today() - datetime.timedelta(days=2) + theDate = datetime.date.today() - datetime.timedelta(days=3) logging.debug("Will delete stuff older than %s" % theDate) fics = DownloadMeta.all() From 1f303013bdcac5fa0267b195accb35ff2974c173 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 3 Apr 2011 16:47:08 -0500 Subject: [PATCH 122/482] Fix to allow retrying, compress mobi chunks, Working/Finished/Failed in status.html title, keep 3 days worth. --- main.py | 35 ++++++++++++++++++++++++++++++++--- recent.html | 2 +- status.html | 3 ++- utils/remover.py | 2 +- 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 2bcc8fd0..bf8b1238 100644 --- a/main.py +++ b/main.py @@ -17,6 +17,7 @@ import os import sys +import zlib import logging import traceback import StringIO @@ -121,8 +122,19 @@ class FileServer(webapp.RequestHandler): self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi' data = DownloadData.all().filter("download =", fanfic).order("index") + # epub, txt and html are all already compressed. + # Each chunk is compress individually to avoid having + # to hold the whole in memory just for the + # compress/uncompress + if fanfic.format == 'mobi': + def dc(data): + return zlib.decompress(data) + else: + def dc(data): + return data + for datum in data: - self.response.out.write(datum.blob) + self.response.out.write(dc(datum.blob)) class FileStatusServer(webapp.RequestHandler): def get(self): @@ -196,6 +208,7 @@ class FanfictionDownloader(webapp.RequestHandler): else: download = q[0] download.completed=False + download.failure=None for c in download.data_chunks: c.delete() @@ -307,7 +320,11 @@ class FanfictionDownloaderTask(webapp.RequestHandler): else: writerClass = output.TextWriter - loader = FanficLoader(adapter, writerClass, quiet = True, inmemory=True, compress=False) + loader = FanficLoader(adapter, + writerClass, + quiet = True, + inmemory=True, + compress=False) try: data = loader.download() @@ -347,10 +364,22 @@ class FanfictionDownloaderTask(webapp.RequestHandler): download.author = self._printableVersion(adapter.getAuthorName()) download.put() index=0 + + # epub, txt and html are all already compressed. + # Each chunk is compressed individually to avoid having + # to hold the whole in memory just for the + # compress/uncompress. + if format == 'mobi': + def c(data): + return zlib.compress(data) + else: + def c(data): + return data + while( len(data) > 0 ): DownloadData(download=download, index=index, - blob=data[:1000000]).put() + blob=c(data[:1000000])).put() index += 1 data = data[1000000:] download.completed=True diff --git a/recent.html b/recent.html index dbe04fab..cea52742 100644 --- a/recent.html +++ b/recent.html @@ -49,7 +49,7 @@ {% if not fic.completed and not fic.failure %} Request Processing...
    {% endif %} -
    {{ fic.url }} ({{ fic.format }}) + {{ fic.url }}

    {% endfor %} diff --git a/status.html b/status.html index 64c03c30..e3c51f08 100644 --- a/status.html +++ b/status.html @@ -2,7 +2,7 @@ - Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + {% if fic.completed %} Finished {% else %} {% if fic.failure %} Failed {% else %} Working... {% endif %} {% endif %} - Fanfiction Downloader {% if not fic.completed and not fic.failure %} @@ -48,6 +48,7 @@

    Not done yet. This page will periodically poll to see if your story has finished.

    {% endif %} {% endif %} +

    Or see your personal list of previously downloaded fanfics.

    diff --git a/utils/remover.py b/utils/remover.py index 6ce8e300..954e151b 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -21,7 +21,7 @@ class Remover(webapp.RequestHandler): logging.debug("Starting r3m0v3r") user = users.get_current_user() logging.debug("Working as user %s" % user) - theDate = datetime.date.today() - datetime.timedelta(days=3) + theDate = datetime.date.today() - datetime.timedelta(days=2) logging.debug("Will delete stuff older than %s" % theDate) fics = DownloadMeta.all() From b058477b97fa8d6cd616249bd678ee9130ce5f1e Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 3 Apr 2011 16:55:51 -0500 Subject: [PATCH 123/482] Fix so recent, but pre-compress mobis will still download. --- main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index bf8b1238..42db5597 100644 --- a/main.py +++ b/main.py @@ -128,7 +128,11 @@ class FileServer(webapp.RequestHandler): # compress/uncompress if fanfic.format == 'mobi': def dc(data): - return zlib.decompress(data) + try: + return zlib.decompress(data) + # if error, assume it's a chunk from before we started compessing. + except zlib.error: + return data else: def dc(data): return data From 1119ba52a01744774d3665b281745bb4a7bee579 Mon Sep 17 00:00:00 2001 From: sigizmund Date: Mon, 18 Apr 2011 11:07:56 +0100 Subject: [PATCH 124/482] Replaced ad block --- index.html | 44 +++++++++++++++++++++++--------------------- recent.html | 49 ++++++++++++++++++++++--------------------------- status.html | 43 +++++++++++++++++++++++-------------------- 3 files changed, 68 insertions(+), 68 deletions(-) diff --git a/index.html b/index.html index 2ceec564..ae4d1aad 100644 --- a/index.html +++ b/index.html @@ -11,19 +11,19 @@

    FanFiction Downloader

    - +
    - - + +
    {{yourfile}} @@ -185,14 +185,16 @@
    - - + + diff --git a/recent.html b/recent.html index cea52742..b0ff3009 100644 --- a/recent.html +++ b/recent.html @@ -11,21 +11,17 @@ FanFiction Downloader - - - - - - + + {{yourfile}} @@ -55,18 +51,17 @@ {% endfor %} - - - + + - + +
    @@ -74,14 +75,16 @@
    - - + + From 13b96946d11cf4df070a5ae293bee0361a599f7e Mon Sep 17 00:00:00 2001 From: sigizmund Date: Tue, 19 Apr 2011 12:49:37 +0100 Subject: [PATCH 125/482] Improving logging, can dump the data which was downloaded if download fails. --- fanficdownloader/ffnet.py | 11 +-- status.html | 158 +++++++++++++++++--------------------- 2 files changed, 78 insertions(+), 91 deletions(-) diff --git a/fanficdownloader/ffnet.py b/fanficdownloader/ffnet.py index 8925f517..7b51a48b 100644 --- a/fanficdownloader/ffnet.py +++ b/fanficdownloader/ffnet.py @@ -285,14 +285,15 @@ class FFNet(FanfictionSiteAdapter): except Exception, e: data = '' logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") + logging.error("Data downloaded: <%s>" % data) if data is None: - raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) - + raise FailedToDownload("Error downloading Chapter: <%s>! Problem getting page!" % url) + lines = data.split('\n') - + textbuf = '' emit = False - + olddata = data try: data = data.decode('utf8') @@ -309,7 +310,7 @@ class FFNet(FanfictionSiteAdapter): div = soup.find('div', {'id' : 'storytext'}) if None == div: logging.debug(data) - raise FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + raise FailedToDownload("Error downloading Chapter: <%s>! Missing required element!" % url) return div.__str__('utf8') diff --git a/status.html b/status.html index fa56e66e..a5dfbf30 100644 --- a/status.html +++ b/status.html @@ -1,90 +1,76 @@ - - - {% if fic.completed %} Finished {% else %} {% if fic.failure %} Failed {% else %} Working... {% endif %} {% endif %} - Fanfiction Downloader - - - {% if not fic.completed and not fic.failure %} - - {% endif %} - - -
    -

    - FanFiction Downloader -

    - - -
    - - -
    - -
    - -
    - {% if fic.completed %} -

    Your fic has finished processing and you can download it now:

    -

    {{ fic.title }} - by {{ fic.author }} ({{ fic.format }})

    - {% else %} - {% if fic.failure %} - Your fic failed to process. Please check the URL and the error message below.
    -
    - {{ fic.failure }} -
    - {% else %} -

    Not done yet. This page will periodically poll to see if your story has finished.

    - {% endif %} - {% endif %} -

    Or see your personal list of previously downloaded fanfics.

    -
    -
    -
    - Powered by Google App Engine -

    - FanfictionLoader is a web front-end to fanficdownloader
    - Copyright © Roman Kirillov -
    - -
    - - + -
    - -
    - - - + + + +
    + +
    + {% if fic.completed %} +

    Your fic has finished processing and you can download it now:

    +

    {{ fic.title }} + by {{ fic.author }} ({{ fic.format }})

    + {% else %} + {% if fic.failure %} + Your fic failed to process. Please check the URL and the error message below.
    +
    + {{ fic.failure }} +
    + {% else %} +

    Not done yet. This page will periodically poll to see if your story has finished.

    + {% endif %} + {% endif %} +

    Or see your personal list of previously downloaded fanfics.

    +
    +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + +
    + + From 7c5aaa36d552d6566b1d7c331e231a0bf530c30b Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 20 Apr 2011 17:32:30 -0500 Subject: [PATCH 126/482] Save recent downloads longer(5 days, was 2). Fix some redirects. Reduces log errors and makes auto-re-login more transparent. --- main.py | 10 +++++++--- utils/remover.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 42db5597..f3964261 100644 --- a/main.py +++ b/main.py @@ -47,6 +47,7 @@ class LoginRequired(webapp.RequestHandler): user = users.get_current_user() if user: self.redirect('/') + return else: logging.debug(users.create_login_url('/')) url = users.create_login_url(self.request.uri) @@ -96,6 +97,7 @@ class FileServer(webapp.RequestHandler): if fileId == None or len(fileId) < 3: self.redirect('/') + return key = db.Key(fileId) fanfic = db.get(key) @@ -145,7 +147,8 @@ class FileStatusServer(webapp.RequestHandler): logging.info("Status id: %s" % id) user = users.get_current_user() if not user: - self.redirect('/login') + self.redirect(users.create_login_url(self.request.uri)) + return fileId = self.request.get('id') @@ -165,7 +168,8 @@ class RecentFilesServer(webapp.RequestHandler): def get(self): user = users.get_current_user() if not user: - self.redirect('/login') + self.redirect(users.create_login_url(self.request.uri)) + return q = DownloadMeta.all() q.filter('user =', user).order('-date') @@ -195,7 +199,7 @@ class FanfictionDownloader(webapp.RequestHandler): user = users.get_current_user() if not user: - self.redirect(users.create_login_url('/')) + self.redirect(users.create_login_url(self.request.uri)) return format = self.request.get('format') diff --git a/utils/remover.py b/utils/remover.py index 954e151b..d9aa8249 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -21,7 +21,7 @@ class Remover(webapp.RequestHandler): logging.debug("Starting r3m0v3r") user = users.get_current_user() logging.debug("Working as user %s" % user) - theDate = datetime.date.today() - datetime.timedelta(days=2) + theDate = datetime.date.today() - datetime.timedelta(days=5) logging.debug("Will delete stuff older than %s" % theDate) fics = DownloadMeta.all() From 1c2c1a5d6225e27c676c9a150bdb72fb50d8360c Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 20 Apr 2011 22:34:59 -0500 Subject: [PATCH 127/482] Add support for twiwrite.net. Requires User/Pass like twilighted.net. --- app.yaml | 2 +- fanficdownloader/downloader.py | 11 +- fanficdownloader/twipassword.py | 3 +- fanficdownloader/twiwrite.py | 280 ++++++++++++++++++++++++++++++++ index.html | 5 +- main.py | 2 + 6 files changed, 298 insertions(+), 5 deletions(-) create mode 100644 fanficdownloader/twiwrite.py diff --git a/app.yaml b/app.yaml index b1796d40..371bf6ce 100644 --- a/app.yaml +++ b/app.yaml @@ -1,5 +1,5 @@ application: fanfictionloader -version: 3-0-prod +version: 3-0-1 runtime: python api_version: 1 diff --git a/fanficdownloader/downloader.py b/fanficdownloader/downloader.py index 53e9acec..6562fb6d 100644 --- a/fanficdownloader/downloader.py +++ b/fanficdownloader/downloader.py @@ -29,6 +29,7 @@ import ficwad import fictionalley import hpfiction import twilighted +import twiwrite import adastrafanfic import whofic import potionsNsnitches @@ -96,7 +97,10 @@ class FanficLoader: else: logging.debug("Do not check for existance of archive file.") - self.writer = self.writerClass(self.booksDirectory, self.adapter, inmemory=self.inmemory, compress=self.compress) + self.writer = self.writerClass(self.booksDirectory, + self.adapter, + inmemory=self.inmemory, + compress=self.compress) i = 1 for u,n in urls: @@ -150,6 +154,8 @@ if __name__ == '__main__': adapter = hpfiction.HPFiction(url) elif url.find('twilighted.net') != -1: adapter = twilighted.Twilighted(url) + elif url.find('twiwrite.net') != -1: + adapter = twiwrite.Twiwrite(url) elif url.find('adastrafanfic.com') != -1: adapter = adastrafanfic.Adastrafanfic(url) elif url.find('whofic.com') != -1: @@ -182,7 +188,8 @@ if __name__ == '__main__': adapter.setPassword(password) - loader = FanficLoader(adapter, writerClass) + loader = FanficLoader(adapter, + writerClass) loader.setStandAlone(True) if bookFormat != 'epub': loader.setOverWrite(True) diff --git a/fanficdownloader/twipassword.py b/fanficdownloader/twipassword.py index 105c09c3..ef1ac2f0 100644 --- a/fanficdownloader/twipassword.py +++ b/fanficdownloader/twipassword.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- -# This is really for the web version. downalod.py will ask. +# This is really for the web version. download.py will ask. password='somepass' +twiwritepassword='otherpass' diff --git a/fanficdownloader/twiwrite.py b/fanficdownloader/twiwrite.py new file mode 100644 index 00000000..3e10954b --- /dev/null +++ b/fanficdownloader/twiwrite.py @@ -0,0 +1,280 @@ +# -*- coding: utf-8 -*- + +import os +import re +import sys +import shutil +import os.path +import urllib as u +import logging +import pprint as pp +import unittest +import urllib2 as u2 +import urlparse as up +import BeautifulSoup as bs +import htmlentitydefs as hdefs +import time +import datetime + +from adapter import * +import twipassword + +class Twiwrite(FanfictionSiteAdapter): + def __init__(self, url): + self.url = url + parsedUrl = up.urlparse(url) + self.host = parsedUrl.netloc + self.path = parsedUrl.path + self.opener = u2.build_opener(u2.HTTPCookieProcessor()) + self.password=twipassword.twiwritepassword + self.login='BobsClue' + self.storyDescription = 'Fanfiction Story' + self.authorId = '0' + self.authorURL = '' + self.storyId = '0' + self.storyPublished = datetime.date(1970, 01, 31) + self.storyCreated = datetime.datetime.now() + self.storyUpdated = datetime.date(1970, 01, 31) + self.languageId = 'en-UK' + self.language = 'English' + self.subjects = [] + self.subjects.append ('fanfiction') + self.subjects.append ('Twiwrite') + self.publisher = self.host + self.numChapters = 0 + self.numWords = 0 + self.genre = '' + self.category = 'Fanfiction' + self.storyStatus = 'Unknown' + self.storyRating = 'Unknown' + self.storyUserRating = '0' + self.storyCharacters = [] + self.storySeries = '' + self.outputName = '' + self.outputStorySep = '-twrt_' + + self.chapurl = False + ss=self.url.split('?') + logging.debug('ss=%s' % ss) + if ss is not None and len(ss) > 1: + sss = ss[1].replace('&','&').split('&') + logging.debug('sss=%s' % sss) + if sss is not None and len(sss) > 0: + ssss = sss[0].split('=') + logging.debug('ssss=%s' % ssss) + if ssss is not None and len(ssss) > 1 and ssss[0] == 'sid': + self.storyId = ssss[1] + if len(sss) > 1: + ssss = sss[1].split('=') + logging.debug('ssss=%s' % ssss) + if ssss is not None and len(ssss) > 1 and ssss[0] == 'chapter': + self.chapurl = True + + self.url = 'http://' + self.host + self.path + '?sid=' + self.storyId + logging.debug('self.url=%s' % self.url) + + logging.debug("Created Twiwrite: url=%s" % (self.url)) + + def _getLoginScript(self): + return '/user.php?action=login' + + def reqLoginData(self, data): + if data.find('Registered Users Only') != -1 or data.find('There is no such account on our website') != -1: + return True + else: + return False + + def requiresLogin(self, url = None): + return False + + def performLogin(self, url = None): + data = {} + + data['penname'] = self.login + data['password'] = self.password + data['cookiecheck'] = '1' + data['submit'] = 'Submit' + + urlvals = u.urlencode(data) + loginUrl = 'http://' + self.host + self._getLoginScript() + logging.debug("Will now login to URL %s" % loginUrl) + + req = self.opener.open(loginUrl, urlvals) + + d = req.read().decode('utf-8') + + if self.reqLoginData(d) : + return False + else: + return True + + def extractIndividualUrls(self): + url = self.url + '&chapter=1&ageconsent=ok&warning=1' + + data = '' + try: + data = self.opener.open(url).read() + except Exception, e: + data = '' + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") + if data is None: + raise StoryDoesNotExist("Problem reading story URL " + url + "!") + + if self.reqLoginData(data): + self.performLogin() + + data = '' + try: + data = self.opener.open(url).read() + except Exception, e: + data = '' + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") + if data is None: + raise StoryDoesNotExist("Problem reading story URL " + url + "!") + + if self.reqLoginData(data): + raise FailedToDownload("Error downloading Story: %s! Login Failed!" % url) + + soup = None + try: + soup = bs.BeautifulStoneSoup(data) + except: + raise FailedToDownload("Error downloading Story: %s! Problem decoding page!" % url) + + # + + div = soup.find('div',{'id':'pagetitle'}) + titlea = div.find('a', href=re.compile(r"viewstory.php")) + self.storyName = titlea.string + + authora = div.find('a', href=re.compile(r"viewuser.php")) + self.authorName = authora.string + self.authorId= authora['href'].split('=')[1] + self.authorURL = 'http://'+self.host+'/'+authora['href'] + + logging.debug('self.storyId=%s, self.storyName=%s' % (self.storyId, self.storyName)) + logging.debug('self.authorId=%s, self.authorName=%s' % (self.authorId, self.authorName)) + + select = soup.find('select', { 'name' : 'chapter' } ) + + result = [] + if select is None: + # no chapters found, try url by itself. + result.append((self.url,self.storyName)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = self.url + "&chapter=%s&ageconsent=ok&warning=1" % o['value'] + title = o.string + result.append((url,title)) + + url = self.url + "&index=1&ageconsent=ok&warning=1" + data = self.opener.open(url).read() + lines = data.split('\n') + soup = bs.BeautifulStoneSoup(data) + + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Rated' in label: + self.storyRating = value.strip() + + if 'Chapters' in label: + self.numChapters = value.strip() + + if 'Word count' in label: + self.numWords = value.strip() + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + self.category = ', '.join(catstext) + for cat in catstext: + self.addSubject(cat.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.addSubject(genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.storyStatus = 'Completed' + else: + self.storyStatus = 'In-Progress' + + if 'Published' in label: + self.storyPublished = datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%B %d, %Y"))) + + if 'Updated' in label: + # there's a stray [ at the end. + value = value[0:-1] + self.storyUpdated = datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%B %d, %Y"))) + + # the only things in

    tags in

    are the parts of the summary. + divcontent = soup.find('div',{'class':'content'}) + + # metadesc = soup.find('meta',{'name':'description'}) + # contentsoup = bs.BeautifulStoneSoup(metadesc['content']) + ps = divcontent.findAll('p') + pstext=[] + for p in ps: + if p.string: + s = p.string.replace(' ',' ').strip() + if s: + pstext.append(p.string) + + self.storyDescription = ' '.join(pstext) + print "self.storyDescription: %s"%self.storyDescription + + return result + + def getText(self, url): + if url.find('http://') == -1: + url = 'http://' + self.host + '/' + url + + logging.debug('Getting data from: %s' % url) + + data = '' + try: + data = self.opener.open(url).read() + except Exception, e: + data = '' + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") + if data is None: + raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) + + soup = None + try: + soup = bs.BeautifulStoneSoup(data, convertEntities=bs.BeautifulStoneSoup.HTML_ENTITIES) + except: + logging.info("Failed to decode: <%s>" % data) + raise FailedToDownload("Error downloading Chapter: %s! Problem decoding page!" % url) + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return div.__str__('utf8') + + +class Twiwrite_UnitTests(unittest.TestCase): + def setUp(self): + logging.basicConfig(level=logging.DEBUG) + pass + + def testLoginWorks(self): + url = 'http://www.twiwrite.net/viewstory.php?sid=117' + self.assertTrue(Twiwrite(url).performLogin()) + + def testGetUrlsWorks(self): + url = 'http://www.twiwrite.net/viewstory.php?sid=117' + self.assertEquals(36, len(Twiwrite(url).extractIndividualUrls())) + +if __name__ == '__main__': + unittest.main() diff --git a/index.html b/index.html index ae4d1aad..d599c869 100644 --- a/index.html +++ b/index.html @@ -62,7 +62,7 @@ src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> to provide your credentials to download it, otherwise just leave it empty. Currently only needed - by twilighted.net. + by twilighted.net and twiwrite.net.
    Login
    @@ -109,6 +109,9 @@ src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
    twilighted.net
    Use the URL of the start of the story, such as
    http://twilighted.net/viewstory.php?sid=8422. +
    twiwrite.net +
    Use the URL of the start of the story, such as +
    http://twiwrite.net/viewstory.php?sid=427.
    ficwad.com
    Use the URL of any story chapter, such as
    http://www.ficwad.com/story/75246. diff --git a/main.py b/main.py index f3964261..e4d603f0 100644 --- a/main.py +++ b/main.py @@ -294,6 +294,8 @@ class FanfictionDownloaderTask(webapp.RequestHandler): adapter = hpfiction.HPFiction(url) elif url.find('twilighted.net') != -1: adapter = twilighted.Twilighted(url) + elif url.find('twiwrite.net') != -1: + adapter = twiwrite.Twiwrite(url) elif url.find('adastrafanfic.com') != -1: adapter = adastrafanfic.Adastrafanfic(url) elif url.find('whofic.com') != -1: From 666e9666cd35ea96168176afee25beb3b90ea9db Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 22 Apr 2011 17:03:43 -0500 Subject: [PATCH 128/482] Attempting to help with ffnet failures. Increase fetch deadline, add retries with backoff, 1/2 sleep. Also remove <> from error message--HTML eats it. --- fanficdownloader/adapter.py | 2 +- fanficdownloader/ffnet.py | 27 ++++++++++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/fanficdownloader/adapter.py b/fanficdownloader/adapter.py index f4dd116a..16b4adb5 100644 --- a/fanficdownloader/adapter.py +++ b/fanficdownloader/adapter.py @@ -76,7 +76,7 @@ class FanfictionSiteAdapter: if not self.appEngine: return self.opener.open(url).read().decode('utf-8') else: - return googlefetch(url).content + return googlefetch(url,deadline=10).content def requiresLogin(self, url = None): return False diff --git a/fanficdownloader/ffnet.py b/fanficdownloader/ffnet.py index 7b51a48b..47679516 100644 --- a/fanficdownloader/ffnet.py +++ b/fanficdownloader/ffnet.py @@ -277,17 +277,22 @@ class FFNet(FanfictionSiteAdapter): return urls def getText(self, url): - # time.sleep( 2.0 ) - data = '' - try: - logging.debug("Fetching URL: %s" % url) - data = self.fetchUrl(url) - except Exception, e: - data = '' - logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") - logging.error("Data downloaded: <%s>" % data) + data = None + + # try up to three times, with longer sleeps first. + for sleeptime in [0.5, 4, 9]: + time.sleep(sleeptime) + try: + logging.debug("Fetching URL: %s sleeptime: %f" % (url, sleeptime)) + data = self.fetchUrl(url) + if data is not None: + break + except Exception, e: + logging.error("Caught an exception reading URL " + url + ". Exception " + unicode(e) + ".") + logging.error("Data downloaded: <%s>" % data) + if data is None: - raise FailedToDownload("Error downloading Chapter: <%s>! Problem getting page!" % url) + raise FailedToDownload("Error downloading Chapter: %s! Problem getting page!" % url) lines = data.split('\n') @@ -310,7 +315,7 @@ class FFNet(FanfictionSiteAdapter): div = soup.find('div', {'id' : 'storytext'}) if None == div: logging.debug(data) - raise FailedToDownload("Error downloading Chapter: <%s>! Missing required element!" % url) + raise FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) return div.__str__('utf8') From 1426626d0a4a9919c3c4e4f14a845b31e301ad12 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 25 Apr 2011 19:57:40 -0500 Subject: [PATCH 129/482] Strip leading newline so it doesn't (falsely) show up in appengine logs as error. --- fanficdownloader/whofic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/whofic.py b/fanficdownloader/whofic.py index 735692f1..79fec927 100644 --- a/fanficdownloader/whofic.py +++ b/fanficdownloader/whofic.py @@ -135,7 +135,7 @@ class Whofic(FanfictionSiteAdapter): metadata = a.findParent('td') metadatachunks = metadata.__str__('utf8').split('
    ') # process metadata for this story. - self.storyDescription = metadatachunks[1] + self.storyDescription = metadatachunks[1].strip() # the stuff with ' - ' separators moremeta = metadatachunks[2] From 7420d2765216ec44d75ca1264dcb5ac6b550f3c7 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 26 Apr 2011 17:22:51 -0500 Subject: [PATCH 130/482] More log clean up, better handling of bad URL in ficwad, missing story in ffnet. --- fanficdownloader/adapter.py | 4 +++- fanficdownloader/ffnet.py | 5 ++++- fanficdownloader/fictionalley.py | 4 ++-- fanficdownloader/ficwad.py | 3 ++- fanficdownloader/fpcom.py | 2 +- fanficdownloader/hpfiction.py | 2 +- fanficdownloader/mediaminer.py | 2 +- fanficdownloader/twilighted.py | 2 +- 8 files changed, 15 insertions(+), 9 deletions(-) diff --git a/fanficdownloader/adapter.py b/fanficdownloader/adapter.py index 16b4adb5..cd029e94 100644 --- a/fanficdownloader/adapter.py +++ b/fanficdownloader/adapter.py @@ -141,7 +141,9 @@ class FanfictionSiteAdapter: return self.storyName def getStoryDescription(self): - logging.debug('self.storyDescription=%s' % self.storyDescription) + ## with out stripping \n's, appengine treats additional lines from this debug + ## output as error messages. + logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) return self.storyDescription def getStoryCreated(self): diff --git a/fanficdownloader/ffnet.py b/fanficdownloader/ffnet.py index 47679516..f497705a 100644 --- a/fanficdownloader/ffnet.py +++ b/fanficdownloader/ffnet.py @@ -257,7 +257,7 @@ class FFNet(FanfictionSiteAdapter): self.storyDescription = self.storyDescription + '&' + ss else: self.storyDescription = ss - logging.debug('self.storyDescription=%s' % self.storyDescription) + logging.debug('self.storyDescription=%s' % self.storyDescription.replace('\n',' ').replace('\r','')) elif l.find("var datep") != -1: dateps = self._getVarValue (l) self.storyPublished = datetime.datetime(*time.strptime ( dateps, "'%m-%d-%y'" )[0:5]) @@ -314,6 +314,9 @@ class FFNet(FanfictionSiteAdapter): div = soup.find('div', {'id' : 'storytext'}) if None == div: + if "Story Not Found" in data: + logging.info("Story not Found at %s" % url) + raise FailedToDownload("Story not Found at %s" % url) logging.debug(data) raise FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) diff --git a/fanficdownloader/fictionalley.py b/fanficdownloader/fictionalley.py index b1a32125..08eedaaa 100644 --- a/fanficdownloader/fictionalley.py +++ b/fanficdownloader/fictionalley.py @@ -120,7 +120,7 @@ class FictionAlley(FanfictionSiteAdapter): logging.debug('self.storyCharacters=%s' % self.storyCharacters) elif keystr == 'Summary:': self.storyDescription = valstr - logging.debug('self.storyDescription=%s' % self.storyDescription) + logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) def extractIndividualUrls(self): @@ -208,7 +208,7 @@ class FictionAlley(FanfictionSiteAdapter): self.storyRating = ss1[1] logging.debug('self.storyRating=%s' % self.storyRating) self.storyDescription = unicode(ss[1]).replace("
    ","").replace("
    ","").replace('\n','') - logging.debug('self.storyDescription=%s' % self.storyDescription) + logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) for li in links: a = li.find('a', {'class' : 'chapterlink'}) diff --git a/fanficdownloader/ficwad.py b/fanficdownloader/ficwad.py index 2f8fcdf4..68c1344d 100644 --- a/fanficdownloader/ficwad.py +++ b/fanficdownloader/ficwad.py @@ -107,7 +107,7 @@ class FicWad(FanfictionSiteAdapter): description = soup.find('blockquote', {'class' : 'summary'}) if description is not None: self.storyDescription = unicode(description.p.string) - logging.debug('self.storyDescription=%s' % self.storyDescription) + logging.debug('self.storyDescription=%s' % self.storyDescription.replace('\n',' ').replace('\r','')) meta = soup.find('p', {'class' : 'meta'}) if meta is not None: @@ -175,6 +175,7 @@ class FicWad(FanfictionSiteAdapter): allBlocked = storylist.findAll('li', {'class' : 'blocked'}) if allBlocked is not None: #logging.debug('allBlocked=%s' % allBlocked) + raise FailedToDownload("Are you sure %s is a chapter URL(not the chapter list)?"%cururl) raise LoginRequiredException(cururl) allH4s = storylist.findAll('h4') diff --git a/fanficdownloader/fpcom.py b/fanficdownloader/fpcom.py index 7806af5c..bf020c0b 100644 --- a/fanficdownloader/fpcom.py +++ b/fanficdownloader/fpcom.py @@ -155,7 +155,7 @@ class FPCom(FanfictionSiteAdapter): for meta in metas: if 'content' in meta._getAttrMap(): self.storyDescription = unicode(meta['content']) - logging.debug('self.storyDescription=%s' % self.storyDescription) + logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) title=meta.find('title') logging.debug('title=%s' % title.string) diff --git a/fanficdownloader/hpfiction.py b/fanficdownloader/hpfiction.py index aeda7d36..ba265e12 100644 --- a/fanficdownloader/hpfiction.py +++ b/fanficdownloader/hpfiction.py @@ -208,7 +208,7 @@ class HPFiction(FanfictionSiteAdapter): else: ii = ii + 1 self.storyDescription = sss - logging.debug('self.storyDescription=%s' % self.storyDescription) + logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) urls = [] diff --git a/fanficdownloader/mediaminer.py b/fanficdownloader/mediaminer.py index 660d7bd0..81911042 100644 --- a/fanficdownloader/mediaminer.py +++ b/fanficdownloader/mediaminer.py @@ -216,7 +216,7 @@ class MediaMiner(FanfictionSiteAdapter): pass elif ssbt == 'Summary:': self.storyDescription = sst.strip() - logging.debug('self.storyDescription=%s' % self.storyDescription) + logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) elif ssbt == 'Latest Revision:' or ssbt == 'Uploaded On:': #logging.debug('sst=%s' % sst) ssts = sst.split(' ') diff --git a/fanficdownloader/twilighted.py b/fanficdownloader/twilighted.py index 7560834b..6517ab14 100644 --- a/fanficdownloader/twilighted.py +++ b/fanficdownloader/twilighted.py @@ -176,7 +176,7 @@ class Twilighted(FanfictionSiteAdapter): ps = s1.findAll('p') if len(ps) > 0: self.storyDescription = ps[0] - logging.debug('self.storyDescription=%s' % (self.storyDescription)) + logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) else: divs = meta.findAll('div') #logging.debug('Divs: %s' % divs) From bdaea798753078fa85a5999358c4a2fc4cd13a28 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 2 May 2011 13:12:16 -0500 Subject: [PATCH 131/482] Previous change to elim newlines in debug sometimes output breaks download. Cheap fix, comment out debugs. --- fanficdownloader/adapter.py | 2 +- fanficdownloader/ffnet.py | 2 +- fanficdownloader/fictionalley.py | 4 ++-- fanficdownloader/ficwad.py | 2 +- fanficdownloader/fpcom.py | 2 +- fanficdownloader/hpfiction.py | 2 +- fanficdownloader/mediaminer.py | 2 +- fanficdownloader/twilighted.py | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fanficdownloader/adapter.py b/fanficdownloader/adapter.py index cd029e94..48e8facd 100644 --- a/fanficdownloader/adapter.py +++ b/fanficdownloader/adapter.py @@ -143,7 +143,7 @@ class FanfictionSiteAdapter: def getStoryDescription(self): ## with out stripping \n's, appengine treats additional lines from this debug ## output as error messages. - logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) + #logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) return self.storyDescription def getStoryCreated(self): diff --git a/fanficdownloader/ffnet.py b/fanficdownloader/ffnet.py index f497705a..4c43cf36 100644 --- a/fanficdownloader/ffnet.py +++ b/fanficdownloader/ffnet.py @@ -257,7 +257,7 @@ class FFNet(FanfictionSiteAdapter): self.storyDescription = self.storyDescription + '&' + ss else: self.storyDescription = ss - logging.debug('self.storyDescription=%s' % self.storyDescription.replace('\n',' ').replace('\r','')) + #logging.debug('self.storyDescription=%s' % self.storyDescription.replace('\n',' ').replace('\r','')) elif l.find("var datep") != -1: dateps = self._getVarValue (l) self.storyPublished = datetime.datetime(*time.strptime ( dateps, "'%m-%d-%y'" )[0:5]) diff --git a/fanficdownloader/fictionalley.py b/fanficdownloader/fictionalley.py index 08eedaaa..889674d9 100644 --- a/fanficdownloader/fictionalley.py +++ b/fanficdownloader/fictionalley.py @@ -120,7 +120,7 @@ class FictionAlley(FanfictionSiteAdapter): logging.debug('self.storyCharacters=%s' % self.storyCharacters) elif keystr == 'Summary:': self.storyDescription = valstr - logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) + #logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) def extractIndividualUrls(self): @@ -208,7 +208,7 @@ class FictionAlley(FanfictionSiteAdapter): self.storyRating = ss1[1] logging.debug('self.storyRating=%s' % self.storyRating) self.storyDescription = unicode(ss[1]).replace("
    ","").replace("
    ","").replace('\n','') - logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) + #logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) for li in links: a = li.find('a', {'class' : 'chapterlink'}) diff --git a/fanficdownloader/ficwad.py b/fanficdownloader/ficwad.py index 68c1344d..f715f3b1 100644 --- a/fanficdownloader/ficwad.py +++ b/fanficdownloader/ficwad.py @@ -107,7 +107,7 @@ class FicWad(FanfictionSiteAdapter): description = soup.find('blockquote', {'class' : 'summary'}) if description is not None: self.storyDescription = unicode(description.p.string) - logging.debug('self.storyDescription=%s' % self.storyDescription.replace('\n',' ').replace('\r','')) + #logging.debug('self.storyDescription=%s' % self.storyDescription.replace('\n',' ').replace('\r','')) meta = soup.find('p', {'class' : 'meta'}) if meta is not None: diff --git a/fanficdownloader/fpcom.py b/fanficdownloader/fpcom.py index bf020c0b..ab2a9bca 100644 --- a/fanficdownloader/fpcom.py +++ b/fanficdownloader/fpcom.py @@ -155,7 +155,7 @@ class FPCom(FanfictionSiteAdapter): for meta in metas: if 'content' in meta._getAttrMap(): self.storyDescription = unicode(meta['content']) - logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) + #logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) title=meta.find('title') logging.debug('title=%s' % title.string) diff --git a/fanficdownloader/hpfiction.py b/fanficdownloader/hpfiction.py index ba265e12..0fb03df5 100644 --- a/fanficdownloader/hpfiction.py +++ b/fanficdownloader/hpfiction.py @@ -208,7 +208,7 @@ class HPFiction(FanfictionSiteAdapter): else: ii = ii + 1 self.storyDescription = sss - logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) + #logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) urls = [] diff --git a/fanficdownloader/mediaminer.py b/fanficdownloader/mediaminer.py index 81911042..03e64619 100644 --- a/fanficdownloader/mediaminer.py +++ b/fanficdownloader/mediaminer.py @@ -216,7 +216,7 @@ class MediaMiner(FanfictionSiteAdapter): pass elif ssbt == 'Summary:': self.storyDescription = sst.strip() - logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) + #logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) elif ssbt == 'Latest Revision:' or ssbt == 'Uploaded On:': #logging.debug('sst=%s' % sst) ssts = sst.split(' ') diff --git a/fanficdownloader/twilighted.py b/fanficdownloader/twilighted.py index 6517ab14..d1151e6c 100644 --- a/fanficdownloader/twilighted.py +++ b/fanficdownloader/twilighted.py @@ -176,7 +176,7 @@ class Twilighted(FanfictionSiteAdapter): ps = s1.findAll('p') if len(ps) > 0: self.storyDescription = ps[0] - logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) + #logging.debug('self.storyDescription=%s' % self.storyDescription.replace("\n"," ").replace('\r','')) else: divs = meta.findAll('div') #logging.debug('Divs: %s' % divs) From f75abaad727bb499a8e1174b1246d64f811a6fcc Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 2 May 2011 13:54:48 -0500 Subject: [PATCH 132/482] 3-0-2 Add convert link feature. Remove title page table(for convert). Make favicon.ico work. --- app.yaml | 7 ++++++- fanficdownloader/output.py | 26 +++++++++++++------------- index.html | 20 ++++++++++++++++++-- main.py | 21 ++++++++++++++------- recent.html | 7 +++++-- status.html | 7 +++++-- 6 files changed, 61 insertions(+), 27 deletions(-) diff --git a/app.yaml b/app.yaml index 371bf6ce..bee0c4e6 100644 --- a/app.yaml +++ b/app.yaml @@ -1,5 +1,6 @@ +# fanfictionloader application: fanfictionloader -version: 3-0-1 +version: 3-0-2 runtime: python api_version: 1 @@ -25,6 +26,10 @@ handlers: - url: /static static_dir: static +- url: /favicon\.ico + static_files: static/favicon.ico + upload: static/favicon\.ico + - url: /.* script: main.py diff --git a/fanficdownloader/output.py b/fanficdownloader/output.py index 087fff30..b9404d56 100644 --- a/fanficdownloader/output.py +++ b/fanficdownloader/output.py @@ -476,22 +476,22 @@ class EPubFanficWriter(FanficWriter): ### writing content -- title page titleFilePath = "OEBPS/title_page.xhtml" - self._writeFile(titleFilePath, TABLE_TITLE_HEADER % (self.authorName, self.storyTitle, self.adapter.getStoryURL(), self.storyTitle, self.adapter.getAuthorURL(), self.authorName)) - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Category:', self.adapter.getCategory())) - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Genre:', self.adapter.getGenre())) - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Status:', self.adapter.getStoryStatus())) - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Published:', published)) - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Updated:', updated)) - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Packaged:', createda)) + self._writeFile(titleFilePath, TITLE_HEADER % (self.authorName, self.storyTitle, self.adapter.getStoryURL(), self.storyTitle, self.adapter.getAuthorURL(), self.authorName)) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Category:', self.adapter.getCategory())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Genre:', self.adapter.getGenre())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Status:', self.adapter.getStoryStatus())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Published:', published)) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Updated:', updated)) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Packaged:', createda)) tmpstr = self.adapter.getStoryRating() + " / " + self.adapter.getStoryUserRating() - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Rating Age/User:', tmpstr)) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Rating Age/User:', tmpstr)) tmpstr = unicode(self.adapter.getNumChapters()) + " / " + commaGroups(unicode(self.adapter.getNumWords())) - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Chapters/Words:', tmpstr)) - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Publisher:', self.adapter.getHost())) - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Story ID:', self.adapter.getStoryId())) - self._writeFile(titleFilePath, TABLE_TITLE_ENTRY % ('Author ID:', self.adapter.getAuthorId())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Chapters/Words:', tmpstr)) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Publisher:', self.adapter.getHost())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Story ID:', self.adapter.getStoryId())) + self._writeFile(titleFilePath, TITLE_ENTRY % ('Author ID:', self.adapter.getAuthorId())) - self._writeFile(titleFilePath, TABLE_TITLE_FOOTER % description ) + self._writeFile(titleFilePath, TITLE_FOOTER % description ) ### writing content -- opf file opfFilePath = "OEBPS/content.opf" diff --git a/index.html b/index.html index d599c869..499d3be8 100644 --- a/index.html +++ b/index.html @@ -1,7 +1,7 @@ - + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza @@ -35,9 +35,25 @@ src="http://pagead2.googlesyndication.com/pagead/show_ads.js">

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier.

    -

    For Amazon Kindle use Mobi output, for Sony Reader, Nook and iPad use ePub

    +

    For Amazon Kindle use Mobi output(see notice below), for Sony Reader, Nook and iPad use ePub

    Or see your personal list of previously downloaded fanfics.

    +

    Experimental New Feature

    +

    + If you select EPub format, when it's done you will also be given a 'Convert' link. +

    +

    + That link will take you to convertfiles.com where you can + directly convert your new story to FictionBook (fb2), Mobipocket (mobi), MS Reader (lit) or Adobe Portable + Document Format(pdf). + There's also a 'Convert' link for EPubs on your recent downloads + page. We'd really like to hear from users about this in our Google Group. +

    +

    + We'd especially like Kindle and other Mobi users to try it. The convertfiles.com Mobi file + appears to be more correct than our Mobi output. +

    +
    {{ error_message }}
    diff --git a/main.py b/main.py index e4d603f0..eaa41d9f 100644 --- a/main.py +++ b/main.py @@ -81,8 +81,6 @@ class MainHandler(webapp.RequestHandler): self.response.out.write(template.render(path, template_values)) else: -# self.redirect(users.create_login_url(self.request.uri)) -# self.redirect('/login') logging.debug(users.create_login_url('/')) url = users.create_login_url(self.request.uri) template_values = {'login_url' : url, 'authorized': False} @@ -91,8 +89,8 @@ class MainHandler(webapp.RequestHandler): class FileServer(webapp.RequestHandler): + def get(self): -# user = users.get_current_user() fileId = self.request.get('id') if fileId == None or len(fileId) < 3: @@ -144,7 +142,6 @@ class FileServer(webapp.RequestHandler): class FileStatusServer(webapp.RequestHandler): def get(self): - logging.info("Status id: %s" % id) user = users.get_current_user() if not user: self.redirect(users.create_login_url(self.request.uri)) @@ -159,8 +156,14 @@ class FileStatusServer(webapp.RequestHandler): fic = db.get(key) logging.info("Status url: %s" % fic.url) - - template_values = dict(fic = fic, nickname = user.nickname()) + if fic.completed and fic.format=='epub': + escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+fileId+"&fake=file."+fic.format) + else: + escaped_url=False + template_values = dict(fic = fic, + nickname = user.nickname(), + escaped_url = escaped_url + ) path = os.path.join(os.path.dirname(__file__), 'status.html') self.response.out.write(template.render(path, template_values)) @@ -174,6 +177,10 @@ class RecentFilesServer(webapp.RequestHandler): q = DownloadMeta.all() q.filter('user =', user).order('-date') fics = q.fetch(100) + + for fic in fics: + if fic.completed and fic.format == 'epub': + fic.escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+str(fic.key())+"&fake=file."+fic.format) template_values = dict(fics = fics, nickname = user.nickname()) path = os.path.join(os.path.dirname(__file__), 'recent.html') @@ -412,7 +419,7 @@ def main(): application = webapp.WSGIApplication([('/', MainHandler), ('/fdowntask', FanfictionDownloaderTask), ('/fdown', FanfictionDownloader), - ('/file', FileServer), + (r'/file.*', FileServer), ('/status', FileStatusServer), ('/recent', RecentFilesServer), ('/r2d2', RecentAllFilesServer), diff --git a/recent.html b/recent.html index b0ff3009..d03a621f 100644 --- a/recent.html +++ b/recent.html @@ -1,7 +1,7 @@ - + Fanfiction Downloader (fanfiction.net, fanficauthors, fictionalley, ficwad to epub and HTML) @@ -36,8 +36,11 @@ src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> {% for fic in fics %}

    {% if fic.completed %} - {{ fic.title }} + Download {{ fic.title }} by {{ fic.author }} ({{ fic.format }})
    + {% if fic.escaped_url %} + Convert {{ fic.title }} to other formats
    + {% endif %} {% endif %} {% if fic.failure %}

    {{ fic.failure }}
    diff --git a/status.html b/status.html index a5dfbf30..cb70cb0e 100644 --- a/status.html +++ b/status.html @@ -1,7 +1,7 @@ - + {% if fic.completed %} Finished {% else %} {% if fic.failure %} Failed {% else %} Working... {% endif %} {% endif %} - Fanfiction Downloader @@ -35,8 +35,11 @@
    {% if fic.completed %}

    Your fic has finished processing and you can download it now:

    -

    {{ fic.title }} +

    Download {{ fic.title }} by {{ fic.author }} ({{ fic.format }})

    + {% if escaped_url %} +

    Convert {{ fic.title }} to other formats

    + {% endif %} {% else %} {% if fic.failure %} Your fic failed to process. Please check the URL and the error message below.
    From 150316f46077e943f87c30d2bb8ef5b434eb688a Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 3 May 2011 11:27:58 -0500 Subject: [PATCH 133/482] Commit first version of reorg/rewrite. Currently CLI only. --- app.yaml | 37 + cron.yaml | 4 + css/index.css | 71 + delete_fic.py | 59 + fanficdownloader/BeautifulSoup.py | 2014 ++++++++ fanficdownloader/__init__.py | 1 + fanficdownloader/adapter.py | 231 + fanficdownloader/adapters/__init__.py | 81 + .../adapters/adapter_fanfictionnet.py | 183 + fanficdownloader/adapters/adapter_test1.py | 89 + .../adapters/adapter_twilightednet.py | 200 + .../adapters/adapter_whoficcom.py | 183 + fanficdownloader/adapters/base_adapter.py | 102 + fanficdownloader/adastrafanfic.py | 225 + fanficdownloader/books/place holder.txt | 0 fanficdownloader/configurable.py | 49 + fanficdownloader/constants.py | 552 +++ fanficdownloader/defaults.ini | 111 + fanficdownloader/downloader.py | 220 + fanficdownloader/epubmerge.py | 293 ++ fanficdownloader/ffnet.py | 368 ++ fanficdownloader/fictionalley.py | 301 ++ fanficdownloader/ficwad.py | 257 + fanficdownloader/fpcom.py | 301 ++ fanficdownloader/hpfiction.py | 280 ++ fanficdownloader/html.py | 126 + fanficdownloader/html2text.py | 452 ++ fanficdownloader/html_constants.py | 19 + fanficdownloader/htmlcleanup.py | 448 ++ fanficdownloader/mediaminer.py | 366 ++ fanficdownloader/mobi.py | 384 ++ fanficdownloader/newdownload.py | 48 + fanficdownloader/output.py | 643 +++ fanficdownloader/potionsNsnitches.py | 367 ++ fanficdownloader/readme.txt | 10 + fanficdownloader/story.py | 64 + fanficdownloader/twilighted.py | 316 ++ fanficdownloader/twipassword.py | 5 + fanficdownloader/twiwrite.py | 280 ++ fanficdownloader/whofic.py | 225 + fanficdownloader/writers/__init__.py | 16 + fanficdownloader/writers/base_writer.py | 168 + fanficdownloader/writers/writer_epub.py | 404 ++ fanficdownloader/writers/writer_html.py | 84 + fanficdownloader/writers/writer_txt.py | 142 + fanficdownloader/zipdir.py | 177 + ffstorage.py | 19 + index-ajax.html | 109 + index.html | 219 + index.yaml | 33 + js/fdownloader.js | 116 + js/jquery-1.3.2.js | 4376 +++++++++++++++++ main.py | 433 ++ queue.yaml | 7 + recent.html | 80 + simplejson/__init__.py | 318 ++ simplejson/__init__.pyc | Bin 0 -> 12071 bytes simplejson/_speedups.c | 2329 +++++++++ simplejson/decoder.py | 354 ++ simplejson/decoder.pyc | Bin 0 -> 11292 bytes simplejson/encoder.py | 440 ++ simplejson/encoder.pyc | Bin 0 -> 13938 bytes simplejson/scanner.py | 65 + simplejson/scanner.pyc | Bin 0 -> 2340 bytes simplejson/tests/__init__.py | 23 + simplejson/tests/test_check_circular.py | 30 + simplejson/tests/test_decode.py | 22 + simplejson/tests/test_default.py | 9 + simplejson/tests/test_dump.py | 21 + .../tests/test_encode_basestring_ascii.py | 38 + simplejson/tests/test_fail.py | 76 + simplejson/tests/test_float.py | 15 + simplejson/tests/test_indent.py | 41 + simplejson/tests/test_pass1.py | 76 + simplejson/tests/test_pass2.py | 14 + simplejson/tests/test_pass3.py | 20 + simplejson/tests/test_recursion.py | 67 + simplejson/tests/test_scanstring.py | 111 + simplejson/tests/test_separators.py | 42 + simplejson/tests/test_unicode.py | 64 + simplejson/tool.py | 37 + static/ajax-loader.gif | Bin 0 -> 10819 bytes static/favicon.ico | Bin 0 -> 21792 bytes status.html | 79 + utils/remover.py | 52 + 85 files changed, 20691 insertions(+) create mode 100644 app.yaml create mode 100644 cron.yaml create mode 100644 css/index.css create mode 100644 delete_fic.py create mode 100644 fanficdownloader/BeautifulSoup.py create mode 100644 fanficdownloader/__init__.py create mode 100644 fanficdownloader/adapter.py create mode 100644 fanficdownloader/adapters/__init__.py create mode 100644 fanficdownloader/adapters/adapter_fanfictionnet.py create mode 100644 fanficdownloader/adapters/adapter_test1.py create mode 100644 fanficdownloader/adapters/adapter_twilightednet.py create mode 100644 fanficdownloader/adapters/adapter_whoficcom.py create mode 100644 fanficdownloader/adapters/base_adapter.py create mode 100644 fanficdownloader/adastrafanfic.py create mode 100644 fanficdownloader/books/place holder.txt create mode 100644 fanficdownloader/configurable.py create mode 100644 fanficdownloader/constants.py create mode 100644 fanficdownloader/defaults.ini create mode 100644 fanficdownloader/downloader.py create mode 100644 fanficdownloader/epubmerge.py create mode 100644 fanficdownloader/ffnet.py create mode 100644 fanficdownloader/fictionalley.py create mode 100644 fanficdownloader/ficwad.py create mode 100644 fanficdownloader/fpcom.py create mode 100644 fanficdownloader/hpfiction.py create mode 100644 fanficdownloader/html.py create mode 100644 fanficdownloader/html2text.py create mode 100644 fanficdownloader/html_constants.py create mode 100644 fanficdownloader/htmlcleanup.py create mode 100644 fanficdownloader/mediaminer.py create mode 100644 fanficdownloader/mobi.py create mode 100644 fanficdownloader/newdownload.py create mode 100644 fanficdownloader/output.py create mode 100644 fanficdownloader/potionsNsnitches.py create mode 100644 fanficdownloader/readme.txt create mode 100644 fanficdownloader/story.py create mode 100644 fanficdownloader/twilighted.py create mode 100644 fanficdownloader/twipassword.py create mode 100644 fanficdownloader/twiwrite.py create mode 100644 fanficdownloader/whofic.py create mode 100644 fanficdownloader/writers/__init__.py create mode 100644 fanficdownloader/writers/base_writer.py create mode 100644 fanficdownloader/writers/writer_epub.py create mode 100644 fanficdownloader/writers/writer_html.py create mode 100644 fanficdownloader/writers/writer_txt.py create mode 100644 fanficdownloader/zipdir.py create mode 100644 ffstorage.py create mode 100644 index-ajax.html create mode 100644 index.html create mode 100644 index.yaml create mode 100644 js/fdownloader.js create mode 100644 js/jquery-1.3.2.js create mode 100644 main.py create mode 100644 queue.yaml create mode 100644 recent.html create mode 100644 simplejson/__init__.py create mode 100644 simplejson/__init__.pyc create mode 100644 simplejson/_speedups.c create mode 100644 simplejson/decoder.py create mode 100644 simplejson/decoder.pyc create mode 100644 simplejson/encoder.py create mode 100644 simplejson/encoder.pyc create mode 100644 simplejson/scanner.py create mode 100644 simplejson/scanner.pyc create mode 100644 simplejson/tests/__init__.py create mode 100644 simplejson/tests/test_check_circular.py create mode 100644 simplejson/tests/test_decode.py create mode 100644 simplejson/tests/test_default.py create mode 100644 simplejson/tests/test_dump.py create mode 100644 simplejson/tests/test_encode_basestring_ascii.py create mode 100644 simplejson/tests/test_fail.py create mode 100644 simplejson/tests/test_float.py create mode 100644 simplejson/tests/test_indent.py create mode 100644 simplejson/tests/test_pass1.py create mode 100644 simplejson/tests/test_pass2.py create mode 100644 simplejson/tests/test_pass3.py create mode 100644 simplejson/tests/test_recursion.py create mode 100644 simplejson/tests/test_scanstring.py create mode 100644 simplejson/tests/test_separators.py create mode 100644 simplejson/tests/test_unicode.py create mode 100644 simplejson/tool.py create mode 100644 static/ajax-loader.gif create mode 100644 static/favicon.ico create mode 100644 status.html create mode 100644 utils/remover.py diff --git a/app.yaml b/app.yaml new file mode 100644 index 00000000..bee0c4e6 --- /dev/null +++ b/app.yaml @@ -0,0 +1,37 @@ +# fanfictionloader +application: fanfictionloader +version: 3-0-2 +runtime: python +api_version: 1 + +handlers: +- url: /r3m0v3r + script: utils/remover.py + login: admin + +- url: /r3m0v3r + script: main.py + login: admin + +- url: /fdownloadtask + script: main.py + login: admin + +- url: /css + static_dir: css + +- url: /js + static_dir: js + +- url: /static + static_dir: static + +- url: /favicon\.ico + static_files: static/favicon.ico + upload: static/favicon\.ico + +- url: /.* + script: main.py + +builtins: +- datastore_admin: on diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 00000000..325ad870 --- /dev/null +++ b/cron.yaml @@ -0,0 +1,4 @@ +cron: +- description: cleanup job + url: /r3m0v3r + schedule: every 2 hours diff --git a/css/index.css b/css/index.css new file mode 100644 index 00000000..36c22034 --- /dev/null +++ b/css/index.css @@ -0,0 +1,71 @@ +body +{ + font: 0.9em "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif; +} + +#main +{ + width: 43%; + margin-left: 23%; + background-color: #dae6ff; + padding: 2em; +} + +#greeting +{ + margin-bottom: 1em; + border-color: #efefef; +} + + + +#logpassword:hover, #logpasswordtable:hover, #urlbox:hover, #typebox:hover, #helpbox:hover, #yourfile:hover +{ + border: thin solid #fffeff; +} + +h1 +{ + text-decoration: none; +} + +#logpasswordtable +{ + padding: 1em; +} + +#logpassword, #logpasswordtable { +// display: none; +} + +#urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile +{ + margin: 1em; + padding: 1em; + border: thin dotted #fffeff; +} + +div.field +{ + margin-bottom: 0.5em; +} + +#submitbtn +{ + padding: 1em; +} + +#typelabel +{ +} + +#typeoptions +{ + margin-top: 0.5em; +} + +#error +{ + font-size: small; + color: #f00; +} diff --git a/delete_fic.py b/delete_fic.py new file mode 100644 index 00000000..73722724 --- /dev/null +++ b/delete_fic.py @@ -0,0 +1,59 @@ +import os +import cgi +import sys +import logging +import traceback +import StringIO + +from google.appengine.api import users +from google.appengine.ext import webapp +from google.appengine.ext.webapp import util + +from fanficdownloader.downaloder import * +from fanficdownloader.ffnet import * +from fanficdownloader.output import * + +from google.appengine.ext import db + +from fanficdownloader.zipdir import * + +from ffstorage import * + +def create_mac(user, fic_id, fic_url): + return str(abs(hash(user)+hash(fic_id)))+str(abs(hash(fic_url))) + +def check_mac(user, fic_id, fic_url, mac): + return (create_mac(user, fic_id, fic_url) == mac) + +def create_mac_for_fic(user, fic_id): + key = db.Key(fic_id) + fanfic = db.get(key) + if fanfic.user != user: + return None + else: + return create_mac(user, key, fanfic.url) + +class DeleteFicHandler(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect('/login') + + fic_id = self.request.get('fic_id') + fic_mac = self.request.get('key_id') + + actual_mac = create_mac_for_fic(user, fic_id) + if actual_mac != fic_mac: + self.response.out.write("Ooops") + else: + key = db.Key(fic_id) + fanfic = db.get(key) + fanfic.delete() + self.redirect('/recent') + + + fics = db.GqlQuery("Select * From DownloadedFanfic WHERE user = :1", user) + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + \ No newline at end of file diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py new file mode 100644 index 00000000..4b17b853 --- /dev/null +++ b/fanficdownloader/BeautifulSoup.py @@ -0,0 +1,2014 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.2.0" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
    (No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

    FooBar *

    * should pop to 'p', not 'b'. +

    FooBar *

    * should pop to 'table', not 'p'. +

    Foo

    Bar *

    * should pop to 'tr', not 'p'. + +

    • *
    • * should pop to 'ul', not the first 'li'. +
  • ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

    tag should implicitly close the previous

    tag. + +

    Para1

    Para2 + should be transformed into: +

    Para1

    Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

    tag should _not_ implicitly close the previous +
    tag. + + Alice said:
    Bob said:
    Blah + should NOT be transformed into: + Alice said:
    Bob said:
    Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
    , + but not close a tag in another table. + +
    BlahBlah + should be transformed into: +
    BlahBlah + but, + Blah
    Blah + should NOT be transformed into + Blah
    Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + + + + + + + + +
    +

    + FanFiction Downloader +

    + + +
    +
    + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
    + +
    + Ebook format   +
    + +
    + +
    + + + +
    + + + +
    +
    + +

    + Login and Password +

    +
    + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
    +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    +
    + + +
    + + +
    + +
    +
    + Few things to know, which will make your life substantially easier: +
      +
    1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
    2. +
    3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
    4. +
    5. Paste a URL of the first chapter of the fanfic, not the index page
    6. +
    7. Fics with a single chapter are not supported (you can just copy and paste it)
    8. +
    9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
    10. +
    11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
    12. +
    13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
    14. +
    15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
    16. +
    17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
    18. +
    + Otherwise, just have fun, and if you want to say thank you — use the email above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..499d3be8 --- /dev/null +++ b/index.html @@ -0,0 +1,219 @@ + + + + + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + + {{yourfile}} + + + {% if authorized %} +
    +
    +
    +

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier.

    +

    For Amazon Kindle use Mobi output(see notice below), for Sony Reader, Nook and iPad use ePub

    +

    Or see your personal list of previously downloaded fanfics.

    +
    +

    Experimental New Feature

    +

    + If you select EPub format, when it's done you will also be given a 'Convert' link. +

    +

    + That link will take you to convertfiles.com where you can + directly convert your new story to FictionBook (fb2), Mobipocket (mobi), MS Reader (lit) or Adobe Portable + Document Format(pdf). + There's also a 'Convert' link for EPubs on your recent downloads + page. We'd really like to hear from users about this in our Google Group. +

    +

    + We'd especially like Kindle and other Mobi users to try it. The convertfiles.com Mobi file + appears to be more correct than our Mobi output. +

    + +
    + {{ error_message }} +
    + +
    + +
    +
    Ebook format
    +
    + EPub + HTML + Plain Text + Mobi (Kindle) +
    +
    + +
    +

    Login and Password

    +
    + If the story requires a login and + password to download, you may need + to provide your credentials to + download it, otherwise just leave + it empty. Currently only needed + by twilighted.net and twiwrite.net. +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    + +
    + +
    + + {% else %} +
    +
    +

    + This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. +

    +

    Login using Google account

    +
    +
    + {% endif %} + +
    +
    +
    fictionalley.org +
    Use the URL of the story's chapter list, such as +
    http://www.fictionalley.org/authors/drt/DA.html. Or the story text URL for + fictionalley.org one-shots, such as +
    http://www.fictionalley.org/authors/drt/JOTP01a.html. +
    fanfiction.net +
    Use the URL of any story chapter, with or without story title such as +
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
    http://www.fanfiction.net/s/2345466/3/. +
    fictionpress.com +
    Use the URL of any story chapter, such as +
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
    http://www.fictionpress.com/s/2847338/6/. +
    twilighted.net +
    Use the URL of the start of the story, such as +
    http://twilighted.net/viewstory.php?sid=8422. +
    twiwrite.net +
    Use the URL of the start of the story, such as +
    http://twiwrite.net/viewstory.php?sid=427. +
    ficwad.com +
    Use the URL of any story chapter, such as +
    http://www.ficwad.com/story/75246. +
    harrypotterfanfiction.com +
    Use the URL of the story's chapter list, such as +
    http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
    potionsandsnitches.net +
    Use the URL of the story's chapter list, such as +
    http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
    mediaminer.org +
    Use the URL of the story's chapter list, such as +
    http://www.mediaminer.org/fanfic/view_st.php/166653. + Or the story URL for one-shots, such as +
    http://www.mediaminer.org/fanfic/view_st.php/167618. +
    adastrafanfic.com +
    Use the URL of the story's chapter list, such as +
    http://www.adastrafanfic.com/viewstory.php?sid=854. +
    whofic.com +
    Use the URL of the story's chapter list, such as +
    http://www.whofic.com/viewstory.php?sid=16334. +
    + + + A few additional things to know, which will make your life substantially easier: +
      +
    1. + First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
    2. +
    3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
    4. +
    5. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
    6. +
    7. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
    8. +
    9. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
    10. +
    11. + If you think that something that should work in fact doesn't, drop me a mail + to sigizmund@gmail.com, or, even better, write an email to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
    12. +
    + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + +
    + +
    + + + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..16bcaefe --- /dev/null +++ b/index.yaml @@ -0,0 +1,33 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadData + properties: + - name: download + - name: index + +- kind: DownloadMeta + properties: + - name: user + - name: date + direction: desc + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
    " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "
    ", "
    " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + + {{yourfile}} + + +

    +
    + Hi, {{ nickname }}! These are the fanfics you've recently requested. +
    +
    + +
    + {% for fic in fics %} +

    + {% if fic.completed %} + Download {{ fic.title }} + by {{ fic.author }} ({{ fic.format }})
    + {% if fic.escaped_url %} + Convert {{ fic.title }} to other formats
    + {% endif %} + {% endif %} + {% if fic.failure %} +

    {{ fic.failure }}
    + {% endif %} + {% if not fic.completed and not fic.failure %} + Request Processing...
    + {% endif %} + {{ fic.url }} + +

    + {% endfor %} +
    + + + + +
    + + + + diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 00000000..d5b4d399 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/__init__.pyc b/simplejson/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f01003d4f81d37513d0f8a2a5fb857b8448ae2bd GIT binary patch literal 12071 zcmeHNL37+jc5aXoC52lM??!ES`^Va5uF#&l87#d{Ux!3 zn-|3n?q3p7OB|dN$$7DJUN}^KM;t7P z_xSsL5nU6}&*=IaadGDz>Vo#>kD8f3w86!7@%u|+hsD0O&EIev42oGpIC}l95x%f< zrIyx+#l_FX@3E|_XH|W`RXp2m??ckj2g^eIdi&8s>HRu*9&Cq2oR{*^@Rna)x_EB5coSj#}_YN%Byvr%iNvp!DC;7EE8?)~QTmG#@}@@5f9 z6~#tUrBx&Y>YT*;?9r*L2+zFO@cy?gJgjIku=it zI6O$yKw_vWQQDVVC9RKys3XiN4U*(oP6A929~HHpV;JbA9?3{Cv$KQAFtd$ioXRhc z%Q2d-`?tGtSe1<^-3qfw4jm7%gz{J(#^re0_!dvG>H9Gky|5|@m6pkIM~(yC((!&8 zkK!;$OPQ;J^_GT82GMie3ig%mO7&c&EIY&4m5$SWUR##amIR5s*P>;nyd(&aI#(*H zat-xANW(0m4#PmlVLi9Z*vB|lP;7`Fy|K}1N&LHe7p5`Ev!ayKJ)`|5?KCaejG}6i zYj4*bWtrQRFWg~JxEs>L@7E|l%u>~rYyL-Fx!yT>+Tp(LZX2!JXx&EZ_J-WW@7E}& zRg%=LpPoE*o00MYo5q9tX1w+uiP)p=M&`_o*Y~R2y=ra!<}J7G!=?7?JGgs$P20Wi zX!oKWVi{OuduV?H`aS7N4ITCm)Un=tTvW=8`=ZUYGp)JzNi&a8kxk@wiAC>kJ*qdN zE;p^>Ol~%eL#+kpb%I85+Dcc=8GuNYyJ!st{ z8`R+21;w0HWLzV4KEj*5=_9?0)o==6&jfOlQ&B&Q%x(N&GdP9*(Wn zUq*IYzTb{SY6J(`r$~{gBQFZe&IXU>`#zb1j7QS#*Y*9rOZJ0S^Npw>48JN;gr-K) zu8UKi(D6oxT{oTt`>wUMTDt9o`g&0QZTyAZ@)zxyDZmy>Y$yB_iAQM-mn0mQ>nE-; z+j;<_oc=h=4mPLjG|KnZe!2c^x(_z8K#vfXoH>s*e+|(CPC={w2y-hpZEGKgf_kw5 zox10_)Xj{;cKG@|d^=x8d&oUiy-yyN{pvo(2+o9CLPho6daF(~oY~7=H1kQxT{=iU z>DU~}TDwIMYb75a=juUGWQA9#yzsJ){H1IY#!0i%m?)4F+iWmQl#PrKF|T41LD$iD z4Rgbqf+{ID=htPF=mhd|eQ&^Hzw1I*0} zKSO$}^@JhP6u_e~QbSX{Pn zImHbun2iFO;RRRaXyw!L0$N!E4QW|4!u$CA3qJbK_FN%{Tkvir!S~ADQoIrCiB`{sg2xOP$a6!=E7X^)aNnkh(@~yZy zEC1+2@p4(*e|k-vTox}eHl$ppv7N8}UHlFc-}Xou`VUagwsjdw4hAsn0VoymI*xdT zzm?#61{TtB84N}_8hHWR@nGN|7C4YzXE0dN6%3+a+Z@G-T1nyqMGg)2+5rn8hqpC? zfZ6~ch6oyB3^AD$HBlUvxJ%Z7TR|y6_SjB#L0mQp7&_~?TDhn!KM>;Ms%#-y9n0V`o9<)gl;VvG->!xMNIJT&8OrK31S zV#Cg2TAWjiamz+40qitgN!31*BF_~DFV(&(?1|v%1w{dqSBaYNaI{&*QShWDYBHo8 zP`#(KQ5olx6D;f=%%CzsZY1&L=P8CFqGoQeDCqbjBPUCd{(&A4&}6C(nUncYz3~Kf zs%VmFqf?^11hbTeKu(~|n(#F8*cFond2oc2ep3Z-g$;?Aaz(gL;9?SVG_VUddlDT zC1Qh_Wrmp)aRm1QrC3e-_2OtFCJH&hh`f2d0Jyx!q)FkY*)?_G!TB`8{K!R=hk`JsN_&yRZ~ z?rVJX-_cd~s&mD0owjpr;j+_m-pBuC=lbFWvE+obaMD);`~D3DfTHCr1Ka{=8{-G) zFTfAb-wu9&wG6oX4GIlT`wWE!1KvadFwDTde?$Rj%=GyHg%+g0HxK)^8QnFKE$BOM zKp$=c^x@gYB11pGH9$XrK0rT!K7PnSLj&|-On^SdeY2p?0=hrLVzyrabg<4>0G)*n zb3V=Da&urA#E{vOXZ!2KWi~0oF}H~|HLjhaA9BhZU*72mP19r zQt2=t%t?EMM&#qDHV8qBGT71omI$hO zJ7ORE3JXl4EUbtMBAGy7#Xc@KYnjGDW+r$f&zuo%%I2a#Kg_mSYS;u)WQ`D7xsGCO zhW-VQ3JQ`+&JbF}pMc?|D{Fx2jCfER8b3M3F~Gyp02_%<=CFw+g@4kP=<>IQ4XJz3o7aY3LxcX7!n3JEHD|%d%5jwB8HTX zEc__%c=bX-Ozg8} z(jz|C!@(7t?B`QTUZF>?Se+8yyFvHpouVEQtG|dZqv`w?KAI*Wu3u%c*7z#&$?U5Z z;qxdDzQhT5qGj+@ra-i8u`UoT4}yxX`%wrGH@r;hiKV_*U_?O7)#3*3@tNb zToTz8bOtp81#`q5CURyFTLr#`ss&qRCS$8W;!w3{tITX{l}YEHvsIV^+B($yTj4a> zeCv1r15|z9?^4=;PYQDdI_@(^yGhc_xLdeYvR*c?H*>e(A@;08wi%}3LgX;s(C_ky z?0aG6ukpKY=5|>Uxn9oJ{}8v!lIn*G*6p71x-)KbR1tZ z^&43~xy3qq%(yFxO?cO#37Xx8d7p|GoZz-70r3Kfq2ky+mZD@i0R=d2yGy?Of&v{s z%Z4q%GRZibgf<%Uj&qvbODkk)%YuUonw<&(2nDbNW3Ti^Bjxbuc<~wxe4ytKoKIzW zb!@;?=+y)t{+?e^XpE$B&OgH1;o{$F<>>b#t{c)QqhGI_)ldh)!C*f8yxF2D<%(TK z95fr1(KBBHZN%B}NOwc7)DIrIa(ab_6!m+9=3Ny|TbZOd$NE#7YjVhNLh2|~d``(2 z;}kidI5K+*>!61ZjfWsqRnbeW0C>ip|3YD&g8S+COF7<$Uc*!7Q@h6Y3+656+B|Nj zxQZvvh_L?;(VcL{6%4Nb0T-g}Iyao_j^Qbnk))mdJoMf}6MjbD?;|Aj`;2Y+eVhMB znMg;!4+o8F;##<_kZ~_;mDK<*o7*x3R>d^E{VRGF%eaSL37V2UP9XP4Nj;iqufa$j zNvlbN??bUIMMVdWXMnUth%ajb-P5 E0OpUxq5uE@ literal 0 HcmV?d00001 diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 00000000..23b5f4a6 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,2329 @@ +#include "Python.h" +#include "structmember.h" +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#ifndef Py_IS_FINITE +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *skipkeys; + int fast_encode; + int allow_nan; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {NULL} +}; + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +void init_speedups(void); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *const); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()); + return 1; + return 0; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t max_output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + max_output_size = 2 + (input_chars * MAX_EXPANSION); + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + Py_ssize_t new_output_size = output_size * 2; + /* This is an upper bound */ + if (new_output_size > max_output_size) { + new_output_size = max_output_size; + } + /* Make sure that the output size changed before resizing */ + if (new_output_size != output_size) { + output_size = new_output_size; + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (!S_CHAR(c)) { + /* If we have to escape something, scan the string for unicode */ + Py_ssize_t j; + for (j = i; j < input_chars; j++) { + c = (Py_UNICODE)(unsigned char)input_str[j]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + } + break; + } + } + + if (i == input_chars) { + /* Input is already ASCII */ + output_size = 2 + input_chars; + } + else { + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + } + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + output[0] = '"'; + + /* We know that everything up to i is ASCII already */ + chars = i + 1; + memcpy(&output[1], input_str, i); + + for (; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use the Python function simplejson.decoder.errmsg to raise a nice + looking ValueError exception */ + static PyObject *errmsg_fn = NULL; + PyObject *pymsg; + if (errmsg_fn == NULL) { + PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); + if (decoder == NULL) + return; + errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + Py_DECREF(decoder); + if (errmsg_fn == NULL) + return; + } + pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (pymsg) { + PyErr_SetObject(PyExc_ValueError, pymsg); + Py_DECREF(pymsg); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + if (c > 0x7f) { + has_unicode = 1; + } + if (has_unicode) { + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + chunk = PyUnicode_FromUnicode(&buf[end], next - end); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_DECREF(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } + else if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return 0; +} + +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = PyString_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += PyString_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromUnicode(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromString(numstr, NULL); + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_str(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_unicode(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } + else if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + /* PyString_AS_STRING is used on encoding */ + s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == Py_None) { + Py_DECREF(Py_None); + s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + } + else if (PyUnicode_Check(s->encoding)) { + PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); + Py_DECREF(s->encoding); + s->encoding = tmp; + } + if (s->encoding == NULL || !PyString_Check(s->encoding)) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->sort_keys = NULL; + s->skipkeys = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + + PyEncoderObject *s; + PyObject *allow_nan; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, + &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + return -1; + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->sort_keys); + Py_INCREF(s->skipkeys); + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + PyObject *rval; + Py_ssize_t indent_level; + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + rval = PyList_New(0); + if (rval == NULL) + return NULL; + if (encoder_listencode_obj(s, rval, obj, indent_level)) { + Py_DECREF(rval); + return NULL; + } + return rval; +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = PyString_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = PyString_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = PyString_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (i > 0) { + return PyString_FromString("Infinity"); + } + else if (i < 0) { + return PyString_FromString("-Infinity"); + } + else { + return PyString_FromString("NaN"); + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_list_append(PyObject *lst, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = PyList_Append(lst, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + PyObject *newobj; + int rv; + + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr == NULL) + return -1; + return _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + return encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + return encoder_listencode_dict(s, rval, obj, indent_level); + } + else { + PyObject *ident = NULL; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + return -1; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + return -1; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + return -1; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + return -1; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + return -1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + return -1; + } + Py_XDECREF(ident); + } + return rv; + } +} + +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term, rval is a PyList */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *key, *value; + Py_ssize_t pos; + int skipkeys; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = PyString_InternFromString("{"); + close_dict = PyString_InternFromString("}"); + empty_dict = PyString_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return PyList_Append(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (PyList_Append(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + /* TODO: C speedup not implemented for sort_keys */ + + pos = 0; + skipkeys = PyObject_IsTrue(s->skipkeys); + idx = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + PyObject *encoded; + + if (PyString_Check(key) || PyUnicode_Check(key)) { + Py_INCREF(key); + kstr = key; + } + else if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); + if (kstr == NULL) + goto bail; + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + if (kstr == NULL) + goto bail; + } + else if (key == Py_True || key == Py_False || key == Py_None) { + kstr = _encoded_const(key); + if (kstr == NULL) + goto bail; + } + else if (skipkeys) { + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_ValueError, "keys must be a string"); + goto bail; + } + + if (idx) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyList_Append(rval, encoded)) { + Py_DECREF(encoded); + goto bail; + } + Py_DECREF(encoded); + if (PyList_Append(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + idx += 1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term, rval is a PyList */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *s_fast = NULL; + Py_ssize_t num_items; + PyObject **seq_items; + Py_ssize_t i; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = PyString_InternFromString("["); + close_array = PyString_InternFromString("]"); + empty_array = PyString_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + if (s_fast == NULL) + return -1; + num_items = PySequence_Fast_GET_SIZE(s_fast); + if (num_items == 0) { + Py_DECREF(s_fast); + return PyList_Append(rval, empty_array); + } + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + seq_items = PySequence_Fast_ITEMS(s_fast); + if (PyList_Append(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + for (i = 0; i < num_items; i++) { + PyObject *obj = seq_items[i]; + if (i) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_array)) + goto bail; + Py_DECREF(s_fast); + return 0; + +bail: + Py_XDECREF(ident); + Py_DECREF(s_fast); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->sort_keys); + Py_VISIT(s->skipkeys); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->skipkeys); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +void +init_speedups(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); +} diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 00000000..b769ea48 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/decoder.pyc b/simplejson/decoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ae9b3591ee9c6400d5cd09eb0a05999ef680bdc GIT binary patch literal 11292 zcmcIq&2tmkcE2sjACi&37_iM}!weOf0h^grg(*VjJp*RO4@U6XFwFBak=p8(-B{|D zyIWwr$eTbG*{8C4o2;@-Ro*I9S){Vde~_xAD%-5H%x3cYo!i}#{XArtI09eY&vVZ` z=iKvk^}GKnmp=LJZ`T?s`IPbfbNogBXei|K(wGQ9BBIomM%g zsO}crSyW1_%%C!%?rxlw9p;pSt)|pl;4v>toH8a%Vx}{N$}}rnGRq2~eZplA;DT$P?H3UqU?swTMdF{uF3{A%N4Zo6Ljgj~FYVy)e2 zxoy{v?br)^GiaK>SbtJ|;5Gus4NKnl_*5(4wu(=oXxqDPEo#`l?}oa!L_p|>;?VQA z;|Vl}0Epz|(Km*_`<&WE0Xgwd=G0q59RM<#;7(q>%B$d(GWr-RC=>F<|0C)z@@hf% zT|U$|hqb<;j5x2n0)+sRz^%=kIsn|9>Km$$GN&e*c^sK2RAUq6?S~J)xHa@ad#H8m z762->rtLSq#v6BQIr7NLS5e?EYplYhPA{GUj9Rl5*l{BOldw-TgRY2tQ^u4rZQL^E zjRwzEz#k7Ld|F2Z^dz6;R)d&tag+0Ej+E8x%Bi3Pc7fP=<)4!pmtg+Tl6ef04SCh1 z;Bk{`M1d*+w#>$HxqZyN8$s9C)PguU?!L?y(l|jwzB@rA)N6UZ>j%lAmLHrj-6$>G zq7k&tqnglb!6+N0+=9{IG01F6hR|ytl^7%wnM=2d3=!+kmbMrmx%$r;+T(Zvcpu)w#cPpfUu%#VsTeE0br_&zScdU=tb0+O+ zwqv;=7)iKC@}UJxPn?0goo*NcuTM~mCQ+!|8KayZg}k3Z84>rJYq&nX7x!YKk~Mfo zt9CUa>XoJIKU=K)`s(K9=Kif)H#he`Yrb6UQi88SbD@;_rpWZB)R(1yS?c3b-;(+% zsW+uwlDaSTveZMV7o{FYybRsAdGm_~4??0+wTh_y9N6&&&Wlvx24V>=B_(KECBg!Q zdH^wzt2hrS+W+qyg-rwu(s@u+{bCB?f~ZjN6r#o%JqYVKq5le1X(r{!7;2OlVKOq_)Dg+L}Q~!zXDTobN zK?9~>5%&c+Ptby$Jvc=<1Csqy=`m6y_pTU5AHXMww6XVxvRiL5nYM6HRxkWdRRN+$ z@8Ys}7aG>Vf$vh2n2s90sky`LJ>5635^Tj)KDu zsJ0|4q_Et$8~T`Q#$EU$38fNToaEqg1TG(@=3pnGq(B=`6J|nFbO{ayct>mZ+y}aC z&$mh2lz${OTu~6hGC7g6H)Z--LIrq2hfii+1IJ_0yj^jR(wUo@_IFHZK=>*&dtwu^ z@$Pr{BpmXlIY<2~itqo8#rK%~G5QYA-qdTNF1WT0%*~Ds=Z^kHL^o)&>rvCgIf7eE zFU?c`j)GQzzO$-U#LUj28gs)3s}EP6KYmtQt=@m~V6FOmr&K z1z+ZDhe5X!$s8~8#5ZiX(Bs(0_vrr1&>Fok5iqfkkKCSakQAZY3B~xg5I^M6;rX#e zJ1Q&N_I?UTkO$D6P&|iMf#+AmPY_OOl(Z{=eHlh@0r?5fnKY?w>94j7GWD(LvU zf`JqC&lys8dJwn3>uA8N%WPOF2>M&B$I7GY#i#%V0z_SKGiK~6RC`}n{U;Yq&vQOGBg2ROmu*XPY} z`pP(hQ}36_oP~r=cd=E5v0k1IT^z!K)E*||-rs1*FZ|PRRe&{oo-uJ%#0BXsFd|8? z(ZTX5o>sd^;LyND(&Pl*hf&v_4EzLe1L>09y%cZF@&x-ocn^xnVG_SkQa=fzN$O`@ zd`5A_1{BC7K^(>MHW>s4U93jDDF;Cd)9I<#I2#}sEUCU<0mHVAfmBY-S4;! zF^nCg5RjednZDh2WyT#bYJ(YfV6IzUo6bN)8#5RQwq5}W0$nIjD(@RdR^Aw_tWcwi zZnv(pM(ZP5^jX+-EiSf}*hST{9tWK@x*kkISwy$OZqfQnx2)T!TMS(#e1x;)*8+G# zl5Nr$)pmklR~v~pqPuHvBI}R1Oza)40?llcDS6TORPwuSPmBf}q^^6y%ANL??%orj z+Fzj+?W4%6qA{AEfzB;M51+yJENUk8yc#pk7Z9H!Mm2M#T;7;L?~HK`T6zSn1$;2XoBhJrE{ zU{GwdGDRj*w1xs20}WMR(g^k;nVc$Dt)XC1g9L~P{pJwRs8_)p&_dJA0qhPKPRew?=PY1weHrc9v#MaEM7nz1GMM{ zI#pUX$kly$?A5j&Vb*&tHJt%{sTWy$%;Fms*&UZ#U7?s+RTfWJ{E~&mVsI&mkKDS* zdd88vEWSl?!ncrl9^S&0FtmIBfkt!>gYx;Eae)2 zA|KL(AfpE;T)ade+(ijr1WXfI~(x*y^F)w7aeNjt21Q&BLFgZH|5_TlFn} z`bj<_ZT~cweix~f?9(@^NLFV}seA~YBFbwqavxCucH?b_FDXuB*6YX;*XuV>Npr_+ zF+o$WXSwBi-LxZtfs}_AA~7F^Zqx~U#=4j&jKL(W3~!ikb`UBvGZS><$a5q&qi?Bfdh}-}lO{DA$yzLN^s0m-IUPq!zB6*w&i3CZ~ zAX~+ZPdZ|DBPPTGin<$iLKo!qd!PocSG_LOBA>KUh!R!jP;13IAWfREFe{QJAJS%v zOQs24?bwm&2N@i<*@WW?(jv>1sla+Yxw}9!*(jkdx2|UCy9RPKNF%g*X5gbpkH7;V zxhGjMGwQ;rj&KU323OYaudSJHf^hdnhJe-@;AztgK!#p92gIvgK`dL4{-Mqa^RCrT;95r4d< zR5!X&9JDh)j*FL0582P(B*v=ZE^RMSLO|M15XOS*P3!4>v(W;b%pFJ$F3{p3;&f~< zK|gr@c;aZ@WSAT#j#isM14l?Gv}E4PjAtT_k%{!IhZ_tz0nQmLSzUp8uVE92=5Yi8 zVL)>&6^@OU_+AGT4IvFUi%#rP9+{Vxo`tJ)*iez-Tj8!kzZWk`g%!TT9~Un0c|b_hT4S+2k+u;=sH8fo@9b0IDQJ%D`Bh< zK?38Un_zv;Vfw+k6NHnLXzl)!>iV;l>a%rOQLAmcF@pYftp+`xPEM&O0l4*okoZ#& z4M}Sb3qJ(nGg#UyUYCUULsK>B7z5~-Kz@)R34zFMHFb>7V_2IkUb3jOVBCMg5dkw3 zwHn`jYqcu~fg`?Ct4l@XTBjwq${PPQm?2>DfQD9#?4O|mlZUJ&U%6oZU{_I*hfYMY zZ)p<^#;kdD023>t2=uu92$O$m4k;^{)z=Lfr@^E zLcg%-EF1q}A(4zJ_ zD3Ow1@tl3o0Jt#FTir*LVN!MJU&gR6wBOXUy42y~XXqOQi89YZl#~R7j1L7a1WM+g zEXOVK?-%4WQBxz=CI5ZRkRxPFX4EB2{u-6ug#n+z#v~IVTz%nRM!XH?ArpbOZhXK` z$U?#@$y$ zJN9cRI|`HU23D-lpJzf>%;Ujj5?O_+Bu69@S*tMz3w2Aetkv3q(`~Uyajn%r@mfu?p*qik z)PjCmd-3%eemVAZ<^HPZ{5F2vwId>Kam&Feq;?WNBeN*P`Sd8G7e;eMW41CiA-^NE zc8M2fgswdQSv*b5XUf5E(w3OD#jgfM|Hss~Lt`~Ku&#sG$brph3_Xd7Wx)>>;RDL! zqmU$_5!~E_Uf-@-_nxe;ikbTcFQRLY*Q%@XVEEw4vz5n>g;Wn8uY9#`@kenMACP7E zX%NALOSS55qbk2~(yyg|%1Pu#BWQQLmQFdZKeO=AdjIR^)km`1y_Nfq)*r8|e{GTA znpbVszGBfrA?9?zwHhFZCDn$LdM7cYq~iYr|Dtigy{pCROHoL7(J_jbKm5y0> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/encoder.pyc b/simplejson/encoder.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e59d372a7ab88749c287a7a2a77dfad41ebd616b GIT binary patch literal 13938 zcmcgz%WoVu6UVGPCtt?yCBhuPSIZfHJC2EJw=^C=d zVfU!JMb#3`xaN zKxe3_sjlZ&Rp0mf9@YBQzmHU2|K^)Jb(MZjryf^SIHD|k&>9s%L0O-t?Mu>rKs^|IUi(4QUR%m9E3Z#| zjp-^Xee9RVq3m%$9*5PcQYCLt6}+;#Qt~Ru3^6l|%m_21$c!;Fj?6J;jw5q|nF(Z0 zsw)ph)aF}Ck1B7HW8Qf5ntCv%^oa7ul|H8Q7)!k=W*U~7{2EJo@^9+CKCZk|Lc|HB zC)9(J$~&!|VY4QccSbVkQc^AKj-FBvipqOQ=~EoSdzsZ=EA_>L(@LLF8Wk@q{fg4x zQy5jhsjS#LGY=BjivMeT=BqeFl}sVgQeInjV;v`vAFR~exbFL=!7v)EyKA~;T1D|G6g*tq zym)bLar3Rl;^Oq;@?v8#Sd12vNAVacs}E85X#3$W7VWv;dNdOcnGToOq0x907x28e z7<2kX^Dv$8VcI+_OUltJd}MK!^Hg@xc&W$?`$Rm$iAh2 z%uk(DaOx7i*YSacf#GCtmF18)Ll(Ezkd?^3 zw3kgeR~S3o?IWg$yLEs)r#TysQI*Pt{9L&y*IBcK#OyHG3I`d^C@U!_zbFTbO)sg( zWfh)K77w8>J%k;l0!tB}QidohbWi$~KhWU`9M+UqBwKK{_!^RoEdu-6!u30M=E9)4 z^+GHXbEtOGn7brP;S;~YEPsp)i4;b~aP}fExl1SEa``SY7(|flrfqyWZcBA@g!iCOODgBW zJ`VR)WJ=cjU5L~f%+T6O;rsyTzlqwIt5c;ntP@swJRy8{X^%SV_~Ds+h;O`zL_@(r zvXFvxpq{}-qUfRfb$|8`qBbc1P>W1SJu6e|gZZAJBUF38WT4v7azH)9I77-YycWZ1 zP&YRd9mM`}QyU4pvYI5V%NH&X$Ige*%2hiBGrJK+8610V;u4eFb>O#-&l_M~Xt!Em zwb)KpHCh8MH=y|JaQU&Wqtp#NJK1XK*k0bUJ>78I&E&G_`R3fg&;GJE1^l-&5~vB% z&$gjFd$hNu<(uyv{Om8w#`OGpl6RNPc1m_unvQvp%(>^$+4SRN)=t{c3F#o)wi`w6 zRyJft=L?L9EuXd9flod+!)&LjF&p&7>GT7T*$jh~S-a5;UF^fQ9kkb%b@YE9^Ip`> z7M%{VZG&67%3hq#TW&M%8UW*N+l`*G7ax<*k*(7Fa_!A#r|d<>6t=q;+L|H-USIVg z3L(szdbkq!UurvqU__Zb%-V5#x!!bRBN<{m(7d!=8E0wfd@3W=nWZIrUBeFY>)M(z0X~hZcz?g`sSiE56(GzjPteGLn%$5#Oby5M%ibyA{SF^RtGg4If+UCw9CV zwws<^cd-Q&;<(+?kgU10vBTbJZXG7NPerYf(X41A~2rVt6M^COMB^|rAxp90qn+SA<-0GS%n?8 zlU7?6veI@VH%PPwtB3Y7227%S_6+n5!-AcBI7I7QR|a48RdlZ>Fs^MK*Ikm(L;rpf zIy;%sumOvl^JXKOIeQmUU)A-sTHTN8?WP;`EG4iB(ebQJ^v4hNQ@tH0;d-j_yhB!F zW|UE}Zn8%{cBC6R!tp`Dd2PCUowH2Bd?>g$PesbX5AYlUoCgu29mVu2s~f964rwD5 z!!~5g4eYy5bp&kQ5CMaD@sRkrOFW_M&Vo72@d{;j0_QTe#p&qX4hVL*Q426;_n`*% zygNT@-!b%TnXQ=19>Mdn>U>&b5O}s5%bYm2dxW}Wz3M*ou_MTnS1C7k+|qR)4w69( zut2NngK1{-!Ilj()gg2^4epGv%z7AO1ox@CP>=jpV$+fKGE>ALrZM*-2%p)Fp8bHV zW$fYFpSjJpK__ymA>VA5XuSR63mc}E$#2g1fdFF4{c!%tG&nIg^0CwVcPPdXAQyHH z#`&qy1lScEXQSN|u8H&t3@17B1Q7yJ6o%vT)rjDmP0S#jGsb}DfNamrgdBS|_{Oz8 zcWW*Q=kUY5^7pOD4?J!yXmD{h!CatMa3W6u`Vin14Nwt;lG(&a$*IsL=wY@zRVC}1 z=0Uz}8$6vPTXQWiJ(n#9Nb2)J(&?M@nZYjC$ooQ&lrWZZ(d?mkda(X=l-PmZX!-%I z%j|qbU1JSYO~N3u1u)%uBL}@KpK_q~0Du}B<^tEjxJJ`q8(q6fOswUVvplrU<4iui z*mA<{IHve6=&&{wlv{!U;kVfupK2$Hk_Ey-=C^F=3aa0-f+>G#S(+7CE zzzP5a7^a1A!gH6IY9({pjdmc63W8uJfHLw=vx#FOc%?9%(qoT*o@WlwqzT+VCw`%wFUZfNwz(8gUhKS#gK$?t%2KJ%tZCcd3ik z`6}HxaZxj!D?Vtq0!v2Gq*oUubW3wQmM6l7Adk4SMns&G9Gu;IxilF;Q4ga^s41mW zsL57R6<43Ga;%#Tht8_=5|fviyuyS5PUlr5;(+I(5Ie82g2ADq4GRc;wHno5tp-Oo z4G)|zTb;iVYtkCQU!4XdzeRknvn^MEXq1WBATk5{Z3;iXgB(M)Y!|a-K`Ir6Da|wP{R_k#U&72DfE$O=#z)a62}Q$`&BrKK_y&^+Jz^-jVHKG5)fU; zTEVFj(F1sVZCQwSk_x0l3P6Y;Cj!Q;adaGFDJLnKxNuSVBWY0@{rxj6$?3TiFbKa- z!_#8O?*@8Z8jOkrOGikuO2BsHUV`9DWH1YHj@tAgj?id~yjpLse;ijP=ZRz-RYMQgAi$W0K1Xjgw-SRc!Hrj>nX!tA#)qR#G zd^R{$ev`=AB`RH-XTPeTwiD)oA+=AOI!sz!_E!u!hp3VDtvIZ&>E!%IgdSBHZ00aY z5)L7k@GF-!bI1@(1Y)rr*$tCi#Slr%4HLEtO$bnOnKv&cfQX-wH%gf)6X~GoR0|`2 zCA)xie#i#+9^u8`ASW@x5&aYLa)+uQ2PdpLv4iJID3^5-HLwEwBTIwjVbXw8gP#aH zf(L=sQVi}!@w6!ho~OkS5EKugSOQ?&3G7Mh47d>YTqQA;WP?&WNszk_+eJ#|<(dT{ z0I|w4w?8hUDh~q0+@ZU$xPiQ|tyd7>mtgTzy4K*SG(IH?H23l^+eUp7&^kW2!;KcU z7OWBjR3NQJUKL1#Vb~CJwgyL)d2UpKxaxOsMkWN1ME{H_!Zt!?Kl6Pt+`yW_~2_FXbZEOm0scYc6J*|c>xT7%=|{FwDuSxUsMb@jyK(Q&#% zA^kgi<2N{NrD9bI{n$aK61Ue{vF5PWT9xC#P66jU*#>GY)TE`sC8Yd$%fTx z>!H_f@{y~n)x5A?tC@}9mI$B7O@MPAk4A@bTCtX#cbFqN9X^rVQQYJ^G!$>)8&ie% zzua$`zp{1mQhB0$rq0(s_+>i+Sp2U@cJZ&Ec8hAaq;`kYZok?cP`iU_x2$$6YPV1A zj;P&HwR=qM9#^|3)b51ZJ*jpl)$Wwq#Vag$)x}c3tJLqI3#zQH(a<*^4YQa5F~x8; z=6Khz-D94jo5V3O@BctvPTSaX#=*NuMH%Om+B*d-p^(mtnM#50$Myr)KxRsU309vrkvHJ6IyDolA6_`4>9eD-f7c@VtHpxu;3obia3h2-Ep;t z7q0Twdswlt_@QlZiSjmvqFgvgi$`Kr2-uid3dOq#L!26*hS)t!^#nSL zA=WS%Xm8c&`LXXB5~>1%6x{5QEbL zsvoV4QaXX!&fg?VGMHtBKDa3+H)gRlTU4O1`tU2TMqbNd?KYU;P*@wz*qg8hArZSP zg|*=v*6>Q>eprJ{bzlu_56jKqzzZ;Ln3X8yk0m1>18mI?XAGRmn zxD5UR%QR)^g)(9RND;6>m*9^uBsv&KqiG8d3B^06ctVhl#qfPl+>l}}9H7Z!00@fj z=93hNyD3<4s>!aY@Kze&J_JHP`4<(u1nU>jftR}ky`WsCypb$Y2nZKUpAgHrAG4MS zHNYw%99ilrE(}XHi^V%%tOwq+3Nu*9r<9h3(!4ZXElUOZOj|%CSH)7|8b?Sc=w1}3 z!J@jRNgJ8%Wi__117&$~1V{FF`7dO)x}cl`<#-Tb*D| zq-mejQKa@uod}$WV@J3K(XN(W3jV*!)(uSjCzLG)AQ<*KR0O#(OJ%Q#&NWkyAav9A!X!83`|n^7BP$#5&1twW-nStC7(@AFvmX$9bO# zjdSM|61kNy!DxxuJf8)={q$wa3=3f7qjO*61g@A^*Hv07@jWE{)-WPu6L4RK7;)rp mVGM#ZW|bfaoxh>Ni0Ut#Ee>1#g>t!kdZaM<*Njmf_V?dj3=-%7 literal 0 HcmV?d00001 diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 00000000..adbc6ec9 --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/scanner.pyc b/simplejson/scanner.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30d94445f0a0c941ee46b6c4fa3bd255e662f6ef GIT binary patch literal 2340 zcmb_dUvC>l5T8B&cj6S1v}wr$X-SKd%5~uhP(>h$k&IGTvRgn&D(iB$PA@s%o$fXz z5y=AtVOzZO)Mvf`?|cM43}2x9X3kDsMZDG4&dtuw@3*rvb9;aNTPd%;dewAj{43%4 z6-F|IaEW#x6}c82DcVtVx+v2O9a-dOXeUeR``{L3b&d|p6jnX8C{7O5ZHEFAz? zAg#zNlA9ByB(hAKY@MOa3jk)x&C{>gutGXZ5r}n#b~3zmr&{2M79hUuJZY_%@JI(M ziDL(Wj?3O_{909oRWl3Gw~uspyx7K^k~N5GZKJyJ#ly4RPimh(-*ea3)~b6C_T2kx z8`WLic)nY^|9nHH4ioX1!N_1FzeAi6c|@aBQ8X%x#iCJ>OoqHHjk03N(I_WMo<=26 z3N*4rDbh%ZQle2_lroJnG|16F(ZHfXi3VjFWN83no(4pN0u74Po8grIhTRJ^EFc*c z;%PZ7ix_-l&P?(LET?l!e5UBuxkXYKLsNw@ihfaPVa_aOJ+vrXCN-4f0ET2Qq42{D zU1X^fye7qd8Sz_%UvW&&em#p)*I|i5OL5o+BD{!IPQGzefF z6A7CA^0ai@Er@-d6dG`B1h^A~DXjQEu+jvEl1#%sOJU`!uo>QM_7a9@5dw^|o5F#m zm@p#hbC9APku$HIr9^j}vIAKFOyA@i3eWG3{ zIxkRwH)e>o$Wl5#EAS>B(n%yi{GG-=Cow+6Js|1g*nk3Pu^1&KZ#*i zWPmpauP=+>*!RU|WNPALEz=&74Hp(Y+fOta8&f7~dHkB9=1}c)nG}o)>uL zR9tHw-){+v+GIg47gH8jSD!TEIE+mN(~b$FrqQu&yfBbpT4A6?dCF07DBnmZd1wc5 zcpN1Xg$~@R?9cYZ#9nY9cF#SLkOF;ToELU1A@vPkZeC#YfsTc|7u!zCa}voj)=8Cb zBLVBc30-F7Lqv9*=q|v9*V9?g4{c*6TRYQBb{yNM<4Y0|bc5smJ~m}+xPPb}(|r+! zM`rGl%L#+T*r4ZICZ$guC0}yOcEiBYQ|sw@tMI2}1ET&c(Q#5g@?y{T!9(P#W zX0gKJ9*esW>9>Z5Q%*a@zrvmZWcT(e3tKW%O|TMg;kttU^v-rjDngP~o6NYe`? n)!s$2k|Nk1^+WgA*I)9HlN%6uHH!vYlm;tVdFUTlrBD9@kb}!4 literal 0 HcmV?d00001 diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 00000000..17c97963 --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,23 @@ +import unittest +import doctest + +def additional_tests(): + import simplejson + import simplejson.encoder + import simplejson.decoder + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + +def main(): + suite = additional_tests() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 00000000..af6463d6 --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 00000000..1cd701d4 --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,22 @@ +import decimal +from unittest import TestCase + +import simplejson as json + +class TestDecode(TestCase): + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assert_(isinstance(rval, decimal.Decimal)) + self.assertEquals(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assert_(isinstance(rval, float)) + self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"}) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 00000000..139e42bf --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEquals( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 00000000..4de37cf4 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,21 @@ +from unittest import TestCase +from cStringIO import StringIO + +import simplejson as json + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEquals(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEquals(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEquals(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEquals(json.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 00000000..7128495f --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 00000000..002eea08 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# Fri Dec 30 18:57:26 2005 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '["Illegal backslash escape: \\\'"]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 00000000..1a2b98a2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,15 @@ +import math +from unittest import TestCase + +import simplejson as json + +class TestFloat(TestCase): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: + self.assertEquals(float(json.dumps(num)), num) + self.assertEquals(json.loads(json.dumps(num)), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEquals(json.dumps(num), str(num)) + self.assertEquals(int(json.dumps(num)), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 00000000..66e19b9e --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,41 @@ +from unittest import TestCase + +import simplejson as json +import textwrap + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 00000000..c3d6302d --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E666, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ], + "compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066 + + +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) + try: + json.dumps(res, allow_nan=False) + except ValueError: + pass + else: + self.fail("23456789012E666 should be out of range") diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 00000000..de4ee00b --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 00000000..f591aba9 --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 00000000..97422a66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(x) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 00000000..b08dec71 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,111 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson as json +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, "xx\xff") diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 00000000..8fa0dac6 --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 00000000..6f4384a5 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,64 @@ +from unittest import TestCase + +import simplejson as json + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEquals(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEquals(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEquals(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEquals(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEquals(j, u'"%s"' % (u,)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEquals(j, u'["%s"]' % (u,)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') + self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEquals(json.loads('"' + u + '"'), u) + self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u%04x"' % (i,) + self.assertEquals(json.loads(s), u) + + def test_default_encoding(self): + self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEquals(type(json.loads(u'""')), unicode) + self.assertEquals(type(json.loads(u'"a"')), unicode) + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 00000000..90443317 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,37 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/static/ajax-loader.gif b/static/ajax-loader.gif new file mode 100644 index 0000000000000000000000000000000000000000..f16ebf7cbd4f28620c0daba2f4a36ae0196b3d4c GIT binary patch literal 10819 zcmb`NXHZjX->;L91QJksO9Fy4X^NnNiVC_BuprxlbVKhX^w84?z4u7CGf5evn_!Tr3?d(NECJ2Pu0AJ)uTvu54bx_-a^t*&`j>8i;TfD`Z)060EAmb_Eg z)wf#RL@#+W)ka%x?pW*}*_fIC+&j2FF}pLjxVO0SWp(}A+xH6{y(=4A-?w-72Szu) zd^`O1{b+b%YyaTK$DxhUsqNjpgDLv%hfiy@)VYJh9~E^o`Bf9b$IM!4PoLaT)mD=~ zFUJ4`006?j5qF#|Ok6F@g*p)Y6Z7*nj+PjJ@F5rmKRHY0xC_AA8$W@FCrO7u?zADA-VZ`x?tC|{EeZ$+PS}L}+>0Lu3YulS{ zT4!^6L+^W9RZs87mhra9j_Iz!%2vK2CI%TW6b5HzImN~dVJ!O2!OQxY?dQ)g-}ZOc zHbx@}1$YJ+?Kz}{t&;P~OkgsY^BS?Vr*pM7Zz+VGy zGfn8J#m3_yoX+b#a zDpjI8oCMB@qx33`Xw_VU0cm=BragGsS+$bevLq%hnO&KCcBH=3mDfvuPp-Rdj{6&R zHa8Kz$xJs|?B}2IQQUsp?)jr!!0=#iu`;qPU^}gDYqibg#q;S@SNLF|EZL!#x_2db)8GULs#w(5+~toZv=1YdVas3W&uFU4AzBgx@vEBz#;l z-pu9tZ)?LyrFGoG)7w(&W-(&|Hk`4;RUNspd=I*5q2ess%;1CmgIS<4v~VcVWaPQH zANrnVeu}B%aK0|an0w{9^-)|O9DS)D9?uwbcai9O^ScC@pw(ezOGyY?wF-n!Sm#Ke zZy3)y(6~018z!}0`0yn?_&rY+<>YT(f~^#{5m)wlacRx^dP!x6)JAFbi0ww11o>n& z{9EsPXXod0%vV-{ohI1IX%fgQW4U^o;-lR1D2o$i=iuqZ>VxzPB9Jhr87P6lwqYLL zzAmxh0lsOW7;-|8F98?s9psNE+IvPo(qi06Ws!-wc9~e{KMO$+^=Bn?AdpZ9E6B?F z1A=K_u-m%NuW@h!26eF-W1SeBa_##tKQP?qva-5}9POT49d`BGnqS=b_~meVq;S`! zgb_=kIvvv?GI&6n9GF3ujY-D@NP@D-pFf91Cy^u1oI&?!IJb}(xK)nDnXbl&f}s>; z;#;8si@QnEHk&J3gSNcYVjbRBWRIc( zvKzVb5N~!MFj(-4jL_9=*-X>+JQKFctTLloj)kA> z9G&XPDujY_f!g_$c1eN~62X^~Q5s<^MdWux&z|6ZtNq_x%Nv)pIs% z<@aqc7|CWG#B)lO27spFDxV{ zdYr!DyZ`O|uSUbIfsI>M$}+#ezuJX`W_;eY)V^^4y_63+2J*_W0U% zepJ^$z`V=Il-pRMH-O)ERX<>)`;978*j-52{u7^zX^6D`^EA)}O5Ge(0N%+LbVWZ3 z7{O(%Xc&Ih8OP#|MC*!1{kR+_8YV)k6N#2`rwV(Ym*O{JWIvY#0x~KwnK+*jILrpH zgsEGa3u@KiQ+VRWO+tOg#nRJmpN$nKR4-XirTUoGoH4@A*S|=2kmW7Na^0?<&XIR7 zOh~8kdrdpif%1ohd7+!_eVX}T-u z5(Z#qmf?p!n&=XXnBierie$(vu4-qu#{&x`ScFc29Zax*vO;;4wG^3Ly>pi2dQBUe zbn0P&ztrG*zPEG!kCk45uPjJ2c}{EBa&mFKYwyEbh)Y9wN5z&wFSW8F5PKN_dY3b9 z;`7C$R?X?Lu(C_}IaUiLI}_}`M=#WHpaA=gENGdp>e$%g{UTdV>#4Ev@4p1+=Eh|Y zro1mdCCr%)mL1?cSM=v5Lq9Hsj%wJ?9j;^ro=bC<80f*9MQpkSM;v zL4@!y?`T^T#xa7`-zO=YObN=sCp&rsK!P*uQ*+WYZ3q>q-g%jJaSkZK)98~=ryUBy z2k%0_Pmlq!a)q!m4GqDWdi$e#2Zx7ykyeufGs7^bb-%U6(h%L%cXMWRWg-Z<_=RaD z#O82mXO+I86OmbWX%C*m%p0DiNZOz0kJ1LV0C>PqrE_SsDg%>{>IpYgh~2Rqg2z5A zl6$I}Fc>=JX*Y)`iIH17<*mx=<%c2iX8DHQdzM=1%E1)FU8;FN(#6;jGWN^Mz6`Zg z&<)Aqf{`qahF9kBr`Ecq!X-Ie#WAW3V6YsIbQW-t+~{$LdQAb4@DSW*OFC&20rFy& z2lu6B?cSk=GAiD6KiHjZ_Y_mQlBneTpfipWpo2P4u;}B|Sh(zvl(O1bEWwtawu#vs zDoxa*pm3GTgXO|jcs36RP#Groh?);nb#dXA(O`Z1h~(A`%ekL<(?k`t@zKbxR*vAa ze{vyYUvs+mw~hW<#_t);DWKTB-sc_>zRKTrezuR!80~y^%k=lqqYE7S#8-7r8!^k* zu@c*hu!cZ3ZM2|$xRF&72sAaU>RIVf`2g2N9%~6ACNs@m=ChA#32CY?oRJsOQZWma zc%1tn;!=!N3wm^FY!rGnMkz1+0{;Ffh|Fz;WC!gr9iD1 zMbiLDHYx$kZmpCK5pN38^Ko=!}~{zBHD+P|(rL2v)lL1JiZcvF1V8IYn36ul$2* zPXmQA!6NM!`ns)A&3bQWMdGq~84qeH)gynuzVDvLTL$P*=d`W^bhACum14kqbeDh~ zp_|ifzbq4R#1?r$S+!ur`v(`>1hn)7e!bfAk@|=bIq{*e2U%I=tP_;ClNTT5apGYplxKjaubZEPcYqHo($AKNbHXsNM!Sbn zpuPlK5683&f*%7Glan49m06IMkdz#S^YTh8!&k>)Q5LLLGDaR42)y$IEGJ~K4n6_P zAQODB*FVbU)6jIUwO{nq^e7bh(SKoaEM{_GXoP8VZhQJOYKy0duT_T$K{jZrJ6 zfw0{Fa!$S6Q1uq;)Ke8XJcRIsl`4m)RxJ=gjG{p(&pB06gdp>A&;ucY1u;hA@7ub& z=)8wX@>Vws`(G7+)8uU?3}IYiq!ez$5SjI=<^Du*7r}U{i|A0&xq$EHhYH0#xytqD zzomRr?TY89xoC~YI5bFd*^bjJG7If(LTG|fBTfvJ%7Y=zzH|H1k()r@ou!h> z^c2FBs_5;*M>-Ul@f1okzm#+hBjTc!zu%o>fp8XrgZ zb5XdTUO|K~2l0yP-o+uH3!TxU&-#3XzQP71v?a&Cg6fBP7-@|Vyo7}s6H2Sq;U2fEjE zkDvm0ep{%LSKY)_pu9H|@MIe5iAgYjPT~W35#u0~vye{i^DsWa#1<&+X<|=^;C5lQ zvC7R3vpc9)xY6LLX${jrkI!8K%G_r?vWb#kb?{IwdLZh)@Jt)k!<`T;XvQ36!|x(V zi?#Ip6kOet_Q75*--oAbZC)NW6`Yh@Sd?4ao}73o;S*{qpj zdE1rZ@Zr1()!az|g|P~%@v5CXO^{^wIxodG^0?|%->BW?r@++ABGReRtzteY-tnA{ zqd+k@7@MV1m^)G@97TuKCtcQDW)hApI2*O&|3fM@EF2k?v=R7oyFjHyjE2ZICx8BC z?*5$vD%_%g2AuT}riLuc< z#TDg;X0vJ@E_Km@`d`2|0+puvSj8QxKo3Mhz5?&)bGr2ku-sVsc$fL%W%-zkrwSl< zHeNm$8jpY2H@p-tP+oyKvQkx(#ybswbIwySsZ2k~MU_I6Ni5HdOqI@bAp~`ma%jS* ztH6w0g7yo-tx|--@k4~tnBoOvqrN16SW zgW4D&i&oMCH@wLn4sL>_qU_#o5H|HKzTHwb0SPzB<=lHsSRr_%;GihDqkEyKoD` zNyxD^)TZOfkwRjILv>ZLWU$1oV|{xSc@Yvw_(_QQ_@3CkI0;TezVVX)6oq#;Ew$m0 z=wiI2DSrUtSfM4bC4hdtRR%JpNSTapCqPq(L~#UJK=Y{Zsyo(kuYdH+TQb>O(< zlqymlin{i7`BlJ$PUA-{ov{8{`3uIqUq8R&tlRl^UihlR5|EJ3kg^z+A^@#CYa! zQj!Ntzarvlw}!R_4x<+Q!1HOj#=QEugnA$Yn--geg{2oX17fHD`T@uwL=hO5vemwv zJg>L%GEK5UEeB^VZYHSm&{;AO{9#w|p_Nwa8%ufn0T?e{l0KsD{m8u^$PoMRFuGvg z6TWU7NL!IC%8XH$4Ek7h#N%{^M-Nyur|B`4J52DfD&f4QR9u=b&sS>JR<6HkIi!Kz zi2hR4$qW71Y*l)k+c|f%QPjA1?@azVYkdItQJ>N@S!yDA0?M~)vk=tW7X*wJ+SWaP zFHTE(KDs-)c(nXhvs!U|P<_IALv`qAO7q3$wW8oMO_*o|du}T5T|YvpA{SWFxZTmY z?Jdd7pn=ANeD{7x5FPYjGnwIU(x!T_g81S%{qLIA8R*Bsg3v1$(!^3Qt&p!?N{gm}IRq=E{aKwWGg%&?lZQ5u8=5M0ZLcR} zml9LnJ$$VI$(s4)xf(Xi(pkxBJ^acK&NECv1`US2+@K z!jbFm1H!$Ugs;zF`-Gab#;#&WcaE2pH}SUSP_j411qlldfsuT?ph3a0;kYPrG^<-! zLVyoLN@{$P&*?08+pvf{EB~-!R7O$^-aUfi?Gj?Ij{6TvTu7|te^AoX!-VXP3x?2` zMn?OyLt)*cGvh_E(=#Leb45doOP@k*yW#8ojc!A`i&NQ!$hFVV)IruEVXA~|s)~#K zW|#=0))OED2eeX%PM9Du;|2fYD2#s`MN-vFJYgeoQSM1` z-njF^horN>V0+ImOX?}2_jzu>o{HU1d|g z@gsXTU!(Y5E4HPv-`7phyJOMpBzEqK!^h9ymf{MNLhbC$zx>ZKhm!3~hyhk0k1s}S zdg)~NTVGDrh^g{`rI4mx_sJi!U+o&iL^CVXD&MSOyl-77Z<%x&8f7-PerpP2JlH7v zbpJ+_vn!-^>vm?^^d=kx71ajXz_P@Xyh3Ef0mmd ze3o=dp2bKK#@)s`Qi16&r8%&GD^>`!)PkD}i6r12dWu}LI_n@2;uKNu^1b9`B*AwFL-4nvGGQJ0!6bA#Xq zK^qYUHL0p@cdyhD^vOm_6_EE6p)J0v2^*=DWBG(d11~rQlYT@~FKq~z>q?1KiGkWT z_EXg5!b&dpAX{3*HL7Z5ScC`w#)IyPI1W@)RuEZybk(ZiqV{qoCmM@y4=Z|*?jw6& zXAEEdkgeK=haXram+vgt!jDlU<%oXf= zjd?VxL3|vAs)Qk0@Or6x9Y<{?h(R0+fNcFrwGQF#ku92mTo50adK*xj5Z3TZKq)EY zK2^yuC{jv>?nzt0%}0NftyG?2zoUE5>$rA9$|puKgZgf<2YLVov3$A0VX#>KWBVVD zJazs1{h##nha(mLO+T4zpnt7S2>`XUNnYY0<9(Xq9~MKjgeF7<$9SOvtb;r}VNMCor~N%bb0QN13qnG$C3!*FF;$er z0!O5}1rS1^rm}!Y!W|U4;qUYmtTUqT6 zaah`#bPO5(0u3$PU+F*kzTB*uDjGgpi`^Le=7KK317RoatDME=Z9y39{{aeDGM(5F zAqYGU7qxK~xpx8-v|;e)#Sck*oTWWubg>?@YgQ?nhkGIq(`9`-SnfS;)y_O~Az~5s zY*|a0H7LhiTWEz=pva8sPY%St2SQ~`bwmG@C!F)Nbo@#xs201T)ulbUZP4W8meIzX zR8F>obJYG~=6hrcL2z)6d{AtWhcJmMjBF?wEcQAd3f>LV16tP;71XtSyej&J$4(Mf zc=l}w%dQ+f{( zPSwy0?^F8H6Ee5eE=*xxR~n_f6y_O+UKH%k4Md=|(QroNEIJV>*PmzDZDO4X)q89` z7T()cAci)&+rg5=q9S4FY$qS9l6uq*kx;lP4oHNwe@!5E)^H{d*TF05y$AXVu6<2 zf}g~IJ}?`oRcly2j+Zd2U^f=AFBT}BD6VQeo4OfXmcl4qO!Eq)9{ap6vQ0MX%&=-H z4f!Rp4r6o=XfXl-))vh!6eB01jKwV%8%NbB!eO-o8phH5EREBYE8D5L4aYTO>(rWZ zyUf6=6SR`3m(!zip_})DDzR;F@wq1O26Ed>D#ET6z`=x7f#8-5R-2PALXGls)fxf$ ztrlJY3WK=5OCCq5yO^KVi-&0Ge06oP}- zG~e`&j`7ueaPjq;HdRnRVF}+eEERY`{rQXbF2 zssTI7ci6=Nnu?bOc=Sj9@ z9s{y1Dpm0x((Ve_=f-!=Lc4}7CVV-ZzYCGeEe?cDdad!oRwY%8yc7Xfz$9tEJFo@H zpQV9NV3)$?;CPhy&_w^<;T+26GF>xUZjdG#4(56S4Biu?;zRB9RM;_aTcIVgtr34V z(V)-brMXy40u{8TCubSlYfl9vCQ6bPOs(Kn5^8(#tK#t=%k;u6Bb*8>vAEl7EDXai zb@mAVl!N>fQpD8)pTd`cR=Eh7U=YyFa6HLgmXgBb32c2vD4a4{IaT=a?@p?hqa-~; zU$$8)JxX8<&RSBvoRVk26(F@)l3yiJ0&b<;+l=hgk1DOmXlLYbYinJ9etfyN{+wai z2jA!6`4X2TxuEnPA}Oa78s3HJU(R(V$|Lh?CMGC zsG4<=l)!dze4Vz%ve;wm=p1anG36pPyq+Jz88yliqFE7XkTE6{`ucoKi3KitJigp= z=0@teVqPw2CIj%U`h+KSEC11wKRo&0>g0d5(EHD zUnYI16H34#y~)1*1R^UX%+Ji-BNRr7^&>=`Bp!yKM4S&984(3d4uDzN;qCH$66}Is zdd3DsRE0QsK^V;c!3l_i^>hm61Sm|1-d-d$yBgVj5_6bZQap#pMkcV;(=#n$7}&zt z(p=XGPCj`Jtc`9Au5Rxv?PY@w7eCz&$YG3}y-H*X;s3)Epb4CduP#fSs0NEgsEl5fJ6E7xf#wA@dUw=)&B*)yj_$#CwHVJ#y4QFl!8 z!IB(ny|Gtq^dG?8#5+Pb3Z28Go=Ma6U`$F}St`pA7CyGT@N@svB?=E-<lhO5|E@f1c-g)%4-6as`B$nM3B0$`o#9 zb+F`&Vnn)Q({ktR-3z}2cE4=5a%=Sa8F?)?z%P}0uuS{@+>6Mqh(LKZyVZ@gkMe@P zTvcqr#eDuV6Xvk41Va0!0#!i~O$h+I50y2lIInn~7v@OBx#xfV9KLJ?N^8= zO3Eb##*8pg@t!>BYJUImx^FMgr!0BQl9wOiqZP~tmeZUtt^EAPR+MB*a$2&@l#647 zm6etMdi^-w`K(8phTIhrK5D30>|ExHJ)F*LH{s)GCcloUGk?r`F~>0e}+8&W_xm?DZA%b zl_IKzw|2R%>>e!llzR&Xzx9i_a&XLmgCRZ1C|`d+0x5tL=w?qKhr=T6@x-`De=}%k z7#8N57?K14nQKXb#5 zPjnMOB?9Ci%zQ$BCdKWc9DVvbH8c5qNg%b1FcF z2of7wB}+xM|DUiU^q6qD7DVq$k5Uz#wB_SkoJNlB=r`Ika$cG0DML4TZ%}kAxBAwhl`K(e;;AnS&v+`X@ zdgZn62e02{+TFOabTrw%GV(`LfDG&^m4suntv6y+7hemV3=$h{lt+Q9Reh3pSoM+$ z<=5>HRuCi>3cp}B&0?`*)wj&H6uVuA_73;un>XgqI~bK0hX&;s^z8DPG75CXFZjgy zYUnbu6Yrwo(6iR;!8sSRP)<*XBUu0)4oi0L-$jEjBQtIE9)i4r6;&Da$_sm-X3#P5 zB!noH>N@nMJ`9R=ELe^kD|(H`;nsuk^6p2HiE1ehl+`TU`9zD$K^>CMlM%|nL;^EY zLb46)sfwtNfZT4*<+Fzxc}0EE`nIluet|QEpQo32aJl7hyYSi$HEGv4UkPAzeKXC@ zfQQ{6ue>CHr})+TS2P_`>$wf-10^G#G<$^B*#~F5=j1-v#5l53iNE7nvvm)zSL3YcQq`!7K5@OdZe^d>PEMBXgo8Sj2hPG z6GL8=rq~emG*JoanRhRBKVUEv8@f^wU_LL_7_0j literal 0 HcmV?d00001 diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..ad4ca66a17637746a5c33e5a1cfc46e35754fac8 GIT binary patch literal 21792 zcmeHv2UwKH^Z&l}`q2ag8^(eim7pLN>;*+)iBgRs8VhO=>2e36#%@FrrA3LTNz^C? zMVis5!2-qxh=LspNE9QSl>5!zduQcPN%`ja|Jlb6yKmXq*?DJoXJ_X`h(HX&i9lw^ zR0pBYq1?SYcl~91gciU(Gc)eGgC0V8RtPn1%3U88z{|rC($!VJPa|~L5up{14#Oov zQz1N+j2tm!9Da(yXCZU--ZcXu>CKTt22SuNN@KlNyG}GYm1E;K!?)^yTPKTIXI70F z^-UgWI-;%qFFH>@xo^Dm*I#wEMnyGESP^15Y*rhCJl^uU$&5n}va?G{O1zYr(Iwf}wglz$%P9+ZT$XV*-Ln6V zJ59|HF@_vyId}UqME4|UyB2nKQHa!a=)Y|+A{;4Q-S%=69x?X5t{u9B=x~7k&z{q>evb`!UXIQN-_%iw8 z9Odkvo%bbmujyp)N5?-motgMJGPpe8uIrlgq?i(GbmX$4m$%Cr;^1(DIYSWfqh93> zqtux>eqIK)hc2~UoL(I5u>0_$!gZ52Jesv|?!6IL61Et08`Cmr$4uM1?6CAhi7{To z$0ZabPS1(!*NZoJ3*SuM)6*lvqOj_Rn1UxufAK3lYMI?4bN-=)MFE)$bDv!ZanBkn zJUXU2Cxt4ru6S6G-EXE>TF)Yf{xPl>oJ+i(4+^%v=q>%iIwr{ZbfToyk=emDEsE~X zO7tAv*G*O%y23!`n%|s7cMK*iz4ap7>4;Z)`N4o6;+1J*g7Z#1$n^+(eEE|9ow6Q< z%CaU?nlIY>lLsjk^!(9sx*cja`qJ66`?CyczbL%up8rK^aiQXQO8A5P6@wM@?IAt} z%Lo5n=v+W=vrd^8?;GHxo9ulm&3JtUJGVks<50Y>N^YN9`t^gv;U2fmb55R2PubJ= z`mAQR){N7$Lpw&D%F6bi)U5Nds$P2!&dJKx3o86-zRtES^Ib-YTy7Sm>`VQ0k#!1H zn&q(gme;MTeTuX5OKO(gNRK`caVnzQm}1{9kRY)=GYq(zV-~> z;YNFw#LSMLar>8<<~v(mO}bDyywm3EpLNVGxV$I!(6r#2p(n2$ij2xGSn2LkR$~(# zn|Wuu?%uR(%bOhvU+~L!vDLOyPu2QP`|5b<&{CecYt8+O`=2~&clFjDePFxYs9`^J zUgKZ6WA}@qj9k;QiXN3uO3&yW4PVGpese26wVQb>UY_HcdC#6YUi0_rlu`MOWAKct zzKYVL1K6^awksd5D+xYo$r~SdxqWPWf=x5!s1b8DQ(+s#U-h@Nlp{eZNf zy&-2ty~s{G-uk@NgX0fad#4d*C+?V@J3|;}6h4~ld?o8}+O4bMvF4&kS4X4J8pH9~ zEfiiJ&mS4*pNiZZerxO87Hj)wpW=c|lnx{TGfAL{*xYoo^Nz*?sO( zof*4#7g^oQn6=N!Gh&LR;5(O&UvctqaMt<$D&U};%DXk?A%%>v6eDEfZ4HEwNv`8}d1=5T>S`Pdn=PIm!R_ z$1VFqPWY7Uxos({nU(4GE`^mYqr6Xn&^j z!BU~`9wxe*j-qqs!{p?gH9?uB3y1z0CrU5cVnLiut@X{H_T&qX=X*W6wV3U2E_0m0 z{jJZ&Rm5dW!tFA`d(S@?G;8i=okQ0hCu9UJ>d=2yMf2VVhWr?HtTr!i&H$;mU(5KS z&i8jXCAI(?tNZjDo~|*A{VEyT;L5xQFCr(+Nd4hUpI@uiD3%XQW-@~|Tg)qXc-*Mq zLhln(%Z!JQ3oOn!d2Le3h0O8ugnir!(lU~1-J@5ZTAa9GA-=gXr|PQ3*xW72D5l`c z0g~jD88cCdbRCTMWQ$iF8XsI9QnhkH)o1H>=DmntJ6+*E9Dhj^v~g}(e`Vav zIltY0zOwW{LB#V%R>iqq<&O_snXsx=d49SgbGqO2(FHld!&9Hktcfa5$STZlR@^e+ zyB>bI_8n_m7u7}=Ou5G1zS~xgMn!FQKKJ=PLfoc}$L)m63$NNZOue0G;h|GHDR$dm+;%%GkOve&$!h`kTr16TKXlBO~Yjt>?@3Y@1be&q5IsyzkE! zPg{8{{N_;8#a>qIqIn*QbkEcCV9M(~C`~6cDM}uD$y1K#31CxSK5NjMUX93a#b#!s z5y3Z)Ki^u8b`gBOfgL)QW$wc_L$eWijMkMecq(W?^5v#-d3S;~iwPdI9ICO6)fz?e zAJmc?%J9^Atxc-V1jC*ejU4JQWbdFa!a6`1HrMD20N!zHOG>2@HKI`ljXKb%1OE#h zP$(4Wui%A3K>{cZfEK~w1*K4Y6i9d}zqTe3Q9czOlVrmE&@Kh-fR9uWK%oFA>UQfxr(&Fp|qd zNx3|X=J=Bnb@8cfUHgFhBz~tQLZJf@Iz9lQj6Mkc0=GILq(2NHm*WUIWpKGOlqVn4 zj!8!mnwWx+!)}CZ*MZ;K73i1(a3TPl>sxjC<{y@2QA5CESrXTqpwnbexM53Wm|-(z zph0V;xq!7{`9s|GB|DTllJAuIlATIjNg@E}^%{K!wUiX zO_hM%P^}#sl>#6@RX(|Za{>{062pt3H&YomZaf-6kn3y zD=|_olo-62CvK`RqrhuV+B%8ZNF|@$SSbX)SSBEcriKO3)aWHNHA+TP_MrgzD>$xX zDAzb1voM*@tiC5;p&#t}XHd5mml@hA8dQi6B7<{}zb9$k zhjaWgU&^Ds(gkcZ^kE&y0rZG+*~;~hd*|QdAIl#nN~6Efm->>r4}@tq;QdblyRk~h zZmr=nVRs1Bv?Is2HeIZb?yF}XCx7P{t|q0K$MM{S`=AEa0}WL7;1SnXmiG7o>p#q8 z&+#{)i6M(PS}Z=bY;gLE{A0egCG|7aIH5?Q<8U?qFc&A(GSswPoSwb>mfGmld;cHg zU)?U|A18q>pIP?|<{?-=wkUZ_@MVMy+i}lc8}l%rpaKJ+KLjvYu=ssE|9`;z^S1uf zCx4E$mw!o`r52u2t#7J6tg9f%_Jg^{G^`1KEe++1qyItvp`QfFQcls_T)Gx?C?3{= zo|8DPS)oP`j7ZwDFDqQpn-v7wutK>tD_jf+(SpVBj;uE;40IFep7-I_ud2D+`c>Eb z-%$Chz7wp|VFES|#_;MgA-l7dBKyu%^Dn|WfXm%k!Q3z%W91^Lp)yEnr1S&$zXt*E ze38TuYzLM7Q1kcm&y4|^8tN2&gA=0}tU8I+*4e&e< zaCx4%$1B_7&OZ)tW=YFsy zo@2X|B)d;g^Z#-U#(G2(gb*NjA1HW-0V9zl2-co2Nz}-{9{aNa@_)I0>G9dMU;}M} zy}+g#0lVr6Nw(?7@vpJtK3@Ksd3VGs`@cT>`6Kw(*u*OPfh5U}BRJWE9nFiag+0y* z2-Y&@I9Bx$S0L4(Ab&5Qu4=8uG+_Vo-G|GcR?XFTO?qXoY3#Pwb4;1D^%#1(%6=2D zYs!VZ1hAneZNg7MUH;PA3;l$D2z{XZKgNCrx_F;}h1$Xr@1X^-9|jv5_F)^IkVH2J z{7l0?2!g`8{4v1ege3?Gi~}C%|IYWz-!+btH;ebMU^-#hV*61ATn6C%Er!O%=IgNZ z{7h_na^p5m$4eLHoV>hh~w1fl;4 z-fQtyd#?zEkllo3|0hXw8o||h#XrbVSD&Q18hjkOG$E;OP_MHCU%E%bzxMeb%OCFz z1rTG2=WHxPo>&dI7U&R!PBW4)?M{=nU-D?5OqDN%-3WW%h)N+ZPDxV^Uvs>_w3$^M zZb!4@W{3^Wd43$u;yTmC2B#N@o0Q@8*GFPd{VxAlcALR}5|YPI6C?0Go}ng3!QOl; z_DQG#W%unY?Ue%i!yMpOHOC8h>tW6WpAg>n(r()k;zQpx`*zjXQ}N%G5qJbijcWp= z##I2ESAQr1;awl8;fwhaqZbP#1{Jf!O`m}O!4mvelf>Y&;DqByXj)&Z!g;Lf) zENRk0xwJ`3Wq6Zj%Ebm|%8o*|5B47#sQiZsNP_8 zaNdtXzV&35<*EFXEGwX){En{^dSe3)*T{wfoB@#?5ZVNYXp7JWpw|L|eg`5n_7LD8 zLXMDQsIIl-4#^1F??q@JjPovwVSTa(I-oDAgk`Q?H@G=leP5M@fd4#57Sdn@L)}-9 zAxuvuUnY{v)-`(z&EZ-L)|$#?EA+6>Tqg4Y_yIz&|C|Eqnzp z8s*Zc1An6qyuOw-mK!DTu@V5g3z-4z0EDUpg*sB>I#;K?^RLfkYJ2H@@hzH|8XOM; z@2cz|ZSgeq#?1Un-yPeE*lv8UJnM=fdHdd~&`{IJuq8cLhlV!C&qC!J+lI=wb`6zp z>GCF`I?3hh;hg}mxBbE1*4Wx$!>i+J-sY)}q4CZw2v(S5hfuaLLTW#;T|C61WC4DI z7?jsA=>$TflMu364SRXDPmqhN&=7sQ{kmc}-o7_PthXwpNps~$ss2X%Oeze!q$}C3 z+TY=rl2_OT_YMH|Dy5<&xdOiBSO`mbV~_WC`x?IL%lojxCEjojF@Pt6enHP@A2|Pt zsAj0oH&SGu>95dl7ERiR@tD91LSDR*&nzz{2!qzI`-K|PzAYHMK z!S|{pi1x6rd^eHAC$>EBQQ*Bj_!_=_{f)n~eUj)gk@JtK{AX&v8Fvm&5?viIkKl8t zB#_uh!^7X(zQt!6x^L#3Pl(4X&IMm1oNx4=qIs^ZeF8e74R8*)tS_p<7q~q2DB&clOi<@JYrtqR*8M9WWc`+R17WqtD3Jkyr9M-?E zy5tY*&!D>M5JYPmSOd>ve=_*N(_Vk)ZTZ8Yi*O)MyrDig3w3|{N@Dl|*56f_7LQ&8 z=cf_X5K9?Bk=9dR0k6MfmhzZ@^W6Ho`VW$5hJSW~zo{y{-M%i%Gg@ubN?9M4<9Jr^ zOWRBJw{Z4H=tW)ecBRhSP{p(ESKA-UuM7CLE-XvD&;Ag1LnyI6SRk~k2SPtc5mK*X zgA*VQIQv~-n`{jDj(e?zDQ$?l%PM_Pm*3GpxlBeZlgo8J1WROcK4*VMnumtQ^aS(> z4UK*U`Bn?L{F_!>ytJRn-z$^JU)r9y?%VSJhps=^{>J+Wb<95AW7G?H7yEV9`;RKv zf7FHgWA^`{{{CBX{ePw%&3O3#HOGDu_|I%qzsYO#le~YQu5H}br#}=Tc$N%*K|fZq znol4hN%V4p*uyO}^%=z1k6+7B<5n}&XR9C<-=BcERzh_ijwxzlZM0HXt!E4(bex2e z%sJwwB{~Gsr>JS$MEIPRje|4t1f`I-9f0%i;kE{AT+Dx(WL*QSRqNc>bS+f!m&0slvGhrKsM`N2}6T~}Wy9Xy6L!dnYeepzr zBz1<^_yO<_^xdN|eXxz-=QR}6#jyuCW{D5+2YA0^K2*ql4}EZo;;6pt1GdRD`$HTX zgv%rg^2huG{fC@<++#TUX1%!jn(xTjg9MToK}^669PoU2ejd%jrznTN>e zmM-Wg3Gx@I^nHUEH?}nGyc}Pk z{~GaFpG zV|(!U>M~l$?uPN@7LTtrI>nXW{rj5!KjP$r=Lfu>G41m9+XT_3ACHk2aQ+@kC+<1T z7ZIe%1c6TKIDt;`c!5p|;IJAg69l?P=8E-8K|gPD`subs1pL#KTO3U~EK`AUTmio^ z_@2)!DT3b{z#QtL=ADam)FY_&GSE%X&zi-Mt5F*QWr*Xc4UjfbdWZ}$eHbQKKcm4X z76oxk_%~Jf_dIbBXS1zVz-)n7zR8<4bCIThcz@J}V#a(zG4_DbZ-O1&o@U0GQG9#w zVUPsbb3CjmYjJ!yO^pdx{l1H|52eU{vk8RfRMi*;9l_HXm$a>GjrTd5KqIIBN{$C5 z4!$x2{Fm)EU=Jsio`D_2#~0?b(9J=>+fERztOQKRRhVb4@tLK!0JkBQGKC5`Ok_0$YZ0DrX=;c+p9#$sFkcrzY#RP;r6y_W z3-ApKMzX+F$fiR5;R0sKEugH9+f=R31fTi(HbaeFs;P|UIVj_GUW)JaWF)>*gzQlq zHz{DSPgBPfOaY)Fg`*L$XQ*Z%T{FxFLcIU0BXcNm6a+BNZY z{aMkf86y2Vu%`a*Ce%3xcHW=PLiP|%cD4szp}r8S0wk^>s^F~7hHtw5sK_I3SvMJ(*k$AS%&NzlenDI=4?{qZs&8V+N&-0fbq4vZC;=aZ>OTH$Am)Oh++xAE za{`y4VF->UBqSmrNSADa?}7aNUVXo1_ra_I`;F5TEIWu-1iPV^3x2mH#SHcZpRSk0 zupEBR3%S7`{!&w=yO5Vk6CFn3I9?tde2GVgv$f97};1{7vw=p2(ndgMi5}h z2o_i|LSL}s=UXwNfc6yp- + + + + {% if fic.completed %} Finished {% else %} {% if fic.failure %} Failed {% else %} Working... {% endif %} {% endif %} - Fanfiction Downloader + + + {% if not fic.completed and not fic.failure %} + + {% endif %} + + +
    +

    + FanFiction Downloader +

    +
    + + +
    + +
    + +
    + {% if fic.completed %} +

    Your fic has finished processing and you can download it now:

    +

    Download {{ fic.title }} + by {{ fic.author }} ({{ fic.format }})

    + {% if escaped_url %} +

    Convert {{ fic.title }} to other formats

    + {% endif %} + {% else %} + {% if fic.failure %} + Your fic failed to process. Please check the URL and the error message below.
    +
    + {{ fic.failure }} +
    + {% else %} +

    Not done yet. This page will periodically poll to see if your story has finished.

    + {% endif %} + {% endif %} +

    Or see your personal list of previously downloaded fanfics.

    +
    +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + +
    +
    + + diff --git a/utils/remover.py b/utils/remover.py new file mode 100644 index 00000000..d9aa8249 --- /dev/null +++ b/utils/remover.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +remover.py + +Created by Roman on 2010-06-20. +Copyright (c) 2010 __MyCompanyName__. All rights reserved. +""" + +import datetime +import logging + +from google.appengine.ext.webapp import util +from google.appengine.ext import webapp +from google.appengine.api import users + +from ffstorage import * + +class Remover(webapp.RequestHandler): + def get(self): + logging.debug("Starting r3m0v3r") + user = users.get_current_user() + logging.debug("Working as user %s" % user) + theDate = datetime.date.today() - datetime.timedelta(days=5) + logging.debug("Will delete stuff older than %s" % theDate) + + fics = DownloadMeta.all() + fics.filter("date <",theDate).order("date") + results = fics.fetch(100) + logging.debug([x.name for x in results]) + + num = 0 + for d in results: + d.delete() + for c in d.data_chunks: + c.delete() + num = num + 1 + logging.debug('Delete '+d.url) + + logging.info('Deleted instances: %d' % num) + self.response.out.write('Deleted instances: %d' % num) + + +def main(): + application = webapp.WSGIApplication([('/r3m0v3r', Remover)], + debug=False) + util.run_wsgi_app(application) + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.DEBUG) + main() From 547411666d9478b92a576d140e5fa102a7642e21 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 4 May 2011 14:29:24 -0500 Subject: [PATCH 134/482] Move CLI up a level to match appengine, change module packaging to suit. Change mechanism for loading adapters to avoid problems with module init. Move adapter exceptions to own file. --- fanficdownloader/defaults.ini => defaults.ini | 0 fanficdownloader/adapters/__init__.py | 47 +------------------ .../adapters/adapter_fanfictionnet.py | 10 ++-- fanficdownloader/adapters/adapter_test1.py | 9 ++-- .../adapters/adapter_twilightednet.py | 11 ++--- .../adapters/adapter_whoficcom.py | 9 ++-- fanficdownloader/adapters/base_adapter.py | 8 ++-- fanficdownloader/adapters/exceptions.py | 41 ++++++++++++++++ fanficdownloader/writers/__init__.py | 6 +-- fanficdownloader/writers/base_writer.py | 17 ++++--- fanficdownloader/writers/writer_epub.py | 2 +- fanficdownloader/writers/writer_html.py | 2 +- fanficdownloader/writers/writer_txt.py | 4 +- .../newdownload.py => newdownload.py | 28 +++++------ 14 files changed, 97 insertions(+), 97 deletions(-) rename fanficdownloader/defaults.ini => defaults.ini (100%) create mode 100644 fanficdownloader/adapters/exceptions.py rename fanficdownloader/newdownload.py => newdownload.py (62%) diff --git a/fanficdownloader/defaults.ini b/defaults.ini similarity index 100% rename from fanficdownloader/defaults.ini rename to defaults.ini diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py index 96090cfe..ec8b55fc 100644 --- a/fanficdownloader/adapters/__init__.py +++ b/fanficdownloader/adapters/__init__.py @@ -5,47 +5,6 @@ from os.path import dirname, basename, normpath import logging import urlparse as up -## A few exceptions for different things for adapters - -class FailedToDownload(Exception): - def __init__(self,error): - self.error=error - - def __str__(self): - return self.error - -class InvalidStoryURL(Exception): - def __init__(self,url,domain,example): - self.url=url - self.domain=domain - self.example=example - - def __str__(self): - return "Bad Story URL: %s\nFor site: %s\nExample: %s" % (self.url, self.domain, self.example) - -class FailedToLogin(Exception): - def __init__(self,url,username): - self.url=url - self.username=username - - def __str__(self): - return "Failed to Login for URL: %s with username: %s" % (self.url, self.username) - -class StoryDoesNotExist(Exception): - def __init__(self,url): - self.url=url - - def __str__(self): - return "Story Does Not Exit: " + self.url - -class UnknownSite(Exception): - def __init__(self,url,supported_sites_list): - self.url=url - self.supported_sites_list=supported_sites_list - - def __str__(self): - return "Unknown Site("+self.url+"). Supported sites: "+", ".join(self.supported_sites_list) - ## This bit of complexity allows adapters to be added by just adding ## the source file. It eliminates the long if/else clauses we used to ## need to pick out the adapter. @@ -54,9 +13,6 @@ class UnknownSite(Exception): __class_list = [] -def _register_handler(cls): - __class_list.append(cls) - def getAdapter(config,url): parsedUrl = up.urlparse(url) logging.debug("site:"+parsedUrl.netloc) @@ -76,6 +32,7 @@ sys.path.insert(0,normpath(dirname(__file__))) for file in filelist: #print "file: "+basename(file)[:-3] - __import__(basename(file)[:-3]) + module = __import__(basename(file)[:-3]) + __class_list.append(module.getClass()) del sys.path[0] diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index 5e264a31..47b341e1 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -6,11 +6,9 @@ import logging import re import urllib2 -import BeautifulSoup as bs +import fanficdownloader.BeautifulSoup as bs -import adapters -from adapters import _register_handler -from adapters.base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup class FanFictionNetSiteAdapter(BaseSiteAdapter): @@ -179,5 +177,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): return utf8FromSoup(span) -_register_handler(FanFictionNetSiteAdapter) +#_register_handler(FanFictionNetSiteAdapter) +def getClass(): + return FanFictionNetSiteAdapter diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index c70653d9..a4b1ac0e 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -2,10 +2,9 @@ import datetime -import BeautifulSoup as bs +import fanficdownloader.BeautifulSoup as bs -from adapters import _register_handler -from adapters.base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup class TestSiteAdapter(BaseSiteAdapter): @@ -84,6 +83,6 @@ horizontal rules ''',selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. return utf8FromSoup(soup) - -_register_handler(TestSiteAdapter) +def getClass(): + return TestSiteAdapter diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index 27d42adf..dded7719 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -7,12 +7,10 @@ import re import urllib import urllib2 -import BeautifulSoup as bs +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML -import adapters -from adapters import _register_handler -from adapters.base_adapter import BaseSiteAdapter, utf8FromSoup -from htmlcleanup import stripHTML +from base_adapter import BaseSiteAdapter, utf8FromSoup class TwilightedNetSiteAdapter(BaseSiteAdapter): @@ -196,5 +194,6 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): return utf8FromSoup(span) -_register_handler(TwilightedNetSiteAdapter) +def getClass(): + return TwilightedNetSiteAdapter diff --git a/fanficdownloader/adapters/adapter_whoficcom.py b/fanficdownloader/adapters/adapter_whoficcom.py index 7d415654..c848a443 100644 --- a/fanficdownloader/adapters/adapter_whoficcom.py +++ b/fanficdownloader/adapters/adapter_whoficcom.py @@ -6,11 +6,9 @@ import logging import re import urllib2 -import BeautifulSoup as bs +import fanficdownloader.BeautifulSoup as bs -import adapters -from adapters import _register_handler -from adapters.base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup class WhoficComSiteAdapter(BaseSiteAdapter): @@ -179,5 +177,6 @@ class WhoficComSiteAdapter(BaseSiteAdapter): return utf8FromSoup(span) -_register_handler(WhoficComSiteAdapter) +def getClass(): + return WhoficComSiteAdapter diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index d218576b..85dd8d35 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -6,10 +6,10 @@ import time import urllib2 as u2 import urlparse as up -from story import Story -from configurable import Configurable -from htmlcleanup import removeEntities, removeAllEntities, stripHTML -from adapters import InvalidStoryURL +from fanficdownloader.story import Story +from fanficdownloader.configurable import Configurable +from fanficdownloader.htmlcleanup import removeEntities, removeAllEntities, stripHTML +from fanficdownloader.adapters.exceptions import InvalidStoryURL class BaseSiteAdapter(Configurable): diff --git a/fanficdownloader/adapters/exceptions.py b/fanficdownloader/adapters/exceptions.py new file mode 100644 index 00000000..3f554442 --- /dev/null +++ b/fanficdownloader/adapters/exceptions.py @@ -0,0 +1,41 @@ +## A few exceptions for different things for adapters + +class FailedToDownload(Exception): + def __init__(self,error): + self.error=error + + def __str__(self): + return self.error + +class InvalidStoryURL(Exception): + def __init__(self,url,domain,example): + self.url=url + self.domain=domain + self.example=example + + def __str__(self): + return "Bad Story URL: %s\nFor site: %s\nExample: %s" % (self.url, self.domain, self.example) + +class FailedToLogin(Exception): + def __init__(self,url,username): + self.url=url + self.username=username + + def __str__(self): + return "Failed to Login for URL: %s with username: %s" % (self.url, self.username) + +class StoryDoesNotExist(Exception): + def __init__(self,url): + self.url=url + + def __str__(self): + return "Story Does Not Exit: " + self.url + +class UnknownSite(Exception): + def __init__(self,url,supported_sites_list): + self.url=url + self.supported_sites_list=supported_sites_list + + def __str__(self): + return "Unknown Site("+self.url+"). Supported sites: "+", ".join(self.supported_sites_list) + diff --git a/fanficdownloader/writers/__init__.py b/fanficdownloader/writers/__init__.py index fa435e8b..339680ea 100644 --- a/fanficdownloader/writers/__init__.py +++ b/fanficdownloader/writers/__init__.py @@ -3,9 +3,9 @@ ## This could (should?) use a dynamic loader like adapters, but for ## now, it's static, since there's so few of them. -from writers.writer_html import HTMLWriter -from writers.writer_txt import TextWriter -from writers.writer_epub import EpubWriter +from writer_html import HTMLWriter +from writer_txt import TextWriter +from writer_epub import EpubWriter def getWriter(type,config,story): if type == "html": diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index 906338e1..f8866d55 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -6,12 +6,10 @@ import string import StringIO import zipfile from zipfile import ZipFile, ZIP_DEFLATED +import logging -from story import Story -from configurable import Configurable -from htmlcleanup import removeEntities, removeAllEntities, stripHTML - -from adapters.base_adapter import * +from fanficdownloader.configurable import Configurable +from fanficdownloader.htmlcleanup import removeEntities, removeAllEntities, stripHTML class BaseStoryWriter(Configurable): @@ -141,6 +139,8 @@ class BaseStoryWriter(Configurable): outfilename=filename if not outstream: + close=True + logging.debug("Save directly to file: %s" % outfilename) if self.getConfig('make_directories'): path="" dirs = os.path.dirname(outfilename).split('/') @@ -149,6 +149,10 @@ class BaseStoryWriter(Configurable): if not os.path.exists(path): os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2? outstream = open(outfilename,"wb") + else: + close=False + logging.debug("Save to stream") + if self.getConfig('zip_output'): out = StringIO.StringIO() @@ -160,7 +164,8 @@ class BaseStoryWriter(Configurable): else: self.writeStoryImpl(outstream) - outstream.close() + if close: + outstream.close() def writeStoryImpl(self, out): "Must be overriden by sub classes." diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index fccc497e..a7d149c6 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -10,7 +10,7 @@ from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED ## use DOM to generate the XML files. from xml.dom.minidom import parse, parseString, getDOMImplementation -from writers.base_writer import * +from base_writer import * class EpubWriter(BaseStoryWriter): diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py index 2d5b8c35..0c040084 100644 --- a/fanficdownloader/writers/writer_html.py +++ b/fanficdownloader/writers/writer_html.py @@ -3,7 +3,7 @@ import logging import string -from writers.base_writer import * +from base_writer import * class HTMLWriter(BaseStoryWriter): diff --git a/fanficdownloader/writers/writer_txt.py b/fanficdownloader/writers/writer_txt.py index 1abe698f..6b5eaaec 100644 --- a/fanficdownloader/writers/writer_txt.py +++ b/fanficdownloader/writers/writer_txt.py @@ -4,9 +4,9 @@ import logging import string from textwrap import wrap -from writers.base_writer import * +from base_writer import * -from html2text import html2text, BODY_WIDTH +from fanficdownloader.html2text import html2text, BODY_WIDTH ## In BaseStoryWriter, we define _write to encode objects ## back into for true output. But txt needs to write the diff --git a/fanficdownloader/newdownload.py b/newdownload.py similarity index 62% rename from fanficdownloader/newdownload.py rename to newdownload.py index 07bc373e..287c665e 100644 --- a/fanficdownloader/newdownload.py +++ b/newdownload.py @@ -1,17 +1,15 @@ # -*- coding: utf-8 -*- import logging -import sys, os +logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") -import adapters -import writers +import sys, os +import getpass + +from fanficdownloader import adapters,writers import ConfigParser -from writers.writer_html import HTMLWriter - -logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") - config = ConfigParser.ConfigParser() logging.debug('reading defaults.ini config file, if present') @@ -27,13 +25,15 @@ def writeStory(adapter,writeformat): try: adapter = adapters.getAdapter(config,sys.argv[1]) - #try: - print adapter.getStory() - #except adapters.FailedToLogin, ftl: - # print "Login Failed, trying with user/pass" - # adapter.username="BobsClue" - # adapter.password="XXXXXXXXX" - # print adapter.getStory() + try: + print adapter.getStory() + except adapters.FailedToLogin, ftl: + print "Login Failed, Need Username/Password." + sys.stdout.write("Username: ") + adapter.username = sys.stdin.readline().strip() + adapter.password = getpass.getpass(prompt='Password: ') + #print("Login: `%s`, Password: `%s`" % (adapter.username, adapter.password)) + print adapter.getStory() writeStory(adapter,"epub") writeStory(adapter,"html") From 94669a2179ce88dd4f1efde62ac06093e2abda46 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 4 May 2011 17:54:36 -0500 Subject: [PATCH 135/482] Fix up exception handling, first working appengine(SDK) version. --- app.yaml | 4 +- defaults.ini | 4 +- fanficdownloader/adapters/__init__.py | 7 +- .../adapters/adapter_fanfictionnet.py | 10 +- fanficdownloader/adapters/adapter_test1.py | 14 +- .../adapters/adapter_twilightednet.py | 7 +- .../adapters/adapter_whoficcom.py | 5 +- fanficdownloader/adapters/base_adapter.py | 11 +- fanficdownloader/{adapters => }/exceptions.py | 8 +- index.html | 4 +- main.py | 224 ++++++++---------- newdownload.py | 10 +- simplejson/__init__.pyc | Bin 12071 -> 0 bytes simplejson/decoder.pyc | Bin 11292 -> 0 bytes simplejson/encoder.pyc | Bin 13938 -> 0 bytes simplejson/scanner.pyc | Bin 2340 -> 0 bytes 16 files changed, 159 insertions(+), 149 deletions(-) rename fanficdownloader/{adapters => }/exceptions.py (67%) delete mode 100644 simplejson/__init__.pyc delete mode 100644 simplejson/decoder.pyc delete mode 100644 simplejson/encoder.pyc delete mode 100644 simplejson/scanner.pyc diff --git a/app.yaml b/app.yaml index bee0c4e6..f8c0d6b4 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # fanfictionloader -application: fanfictionloader -version: 3-0-2 +application: ffd-retief +version: 4-0-0 runtime: python api_version: 1 diff --git a/defaults.ini b/defaults.ini index bc75de44..f0029adf 100644 --- a/defaults.ini +++ b/defaults.ini @@ -55,7 +55,7 @@ safe_filename: true extratags: FanFiction ## number of seconds to sleep between calls to the story site. -slow_down_sleep_time:0.5 +#slow_down_sleep_time:0.5 ## Each output format has a section that overrides [defaults] @@ -87,7 +87,7 @@ wide_titlepage_entries: description, storyUrl, author URL ## Each site has a section that overrides [defaults] *and* the format section [test1.com] -titlepage_entries: title,description,category,genre, status,dateCreated,rating,numChapters,numWords,extratags,description,storyUrl,extratags +#titlepage_entries: title,description,category,genre, status,dateCreated,rating,numChapters,numWords,extratags,description,storyUrl,extratags extratags: FanFiction,Testing ## If necessary, you can define [:] sections to customize diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py index ec8b55fc..63254b4d 100644 --- a/fanficdownloader/adapters/__init__.py +++ b/fanficdownloader/adapters/__init__.py @@ -5,6 +5,8 @@ from os.path import dirname, basename, normpath import logging import urlparse as up +import fanficdownloader.exceptions as exceptions + ## This bit of complexity allows adapters to be added by just adding ## the source file. It eliminates the long if/else clauses we used to ## need to pick out the adapter. @@ -21,11 +23,10 @@ def getAdapter(config,url): adapter = cls(config,url) # raises InvalidStoryURL return adapter # No adapter found. - raise UnknownSite( url, (cls.getSiteDomain() for cls in __class_list) ) + raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] ) ## Automatically import each adapter_*.py file. -## Each must call _register_handler() with their class to be -## registered. +## Each implement getClass() to their class filelist = glob.glob(dirname(__file__)+'/adapter_*.py') sys.path.insert(0,normpath(dirname(__file__))) diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index 47b341e1..0ef69765 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -5,8 +5,10 @@ import datetime import logging import re import urllib2 +import time import fanficdownloader.BeautifulSoup as bs +import fanficdownloader.exceptions as exceptions from base_adapter import BaseSiteAdapter, utf8FromSoup @@ -50,7 +52,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): soup = bs.BeautifulSoup(self._fetchUrl(url)) except urllib2.HTTPError, e: if e.code == 404: - raise adapters.StoryDoesNotExist(self.url) + raise exceptions.StoryDoesNotExist(self.url) else: raise e @@ -166,14 +168,16 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): def getChapterText(self, url): logging.debug('Getting chapter text from: %s' % url) - + time.sleep(0.5) ## ffnet tends to fail more if hit too fast. + ## This is in additional to what ever the + ## slow_down_sleep_time setting is. soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. span = soup.find('div', {'id' : 'storytext'}) if None == span: - raise adapters.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) return utf8FromSoup(span) diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index a4b1ac0e..fdf9cb87 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -3,6 +3,7 @@ import datetime import fanficdownloader.BeautifulSoup as bs +import fanficdownloader.exceptions as exceptions from base_adapter import BaseSiteAdapter, utf8FromSoup @@ -12,6 +13,8 @@ class TestSiteAdapter(BaseSiteAdapter): BaseSiteAdapter.__init__(self, config, url) self.story.setMetadata('siteabbrev','tst1') self.crazystring = u" crazy tests:[bare amp(&) quote(') amp(&) gt(>) lt(<) ATnT(AT&T) pound(£)]" + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) @staticmethod def getSiteDomain(): @@ -24,8 +27,14 @@ class TestSiteAdapter(BaseSiteAdapter): return BaseSiteAdapter.getSiteURLPattern(self)+'\?sid=\d+$' def extractChapterUrlsAndMetadata(self): + + if self.story.getMetadata('storyId') == '666': + raise exceptions.StoryDoesNotExist(self.url) + + if self.story.getMetadata('storyId') == '668': + raise exceptions.FailedToLogin(self.url,"FakeUser") + self.story.setMetadata(u'title',"Test Story Title "+self.crazystring) - self.story.setMetadata('storyId','12345') self.story.setMetadata('storyUrl',self.url) self.story.setMetadata('description',u'Description '+self.crazystring+u''' Done @@ -67,6 +76,9 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" #

    “It might be all it takes.” He held out his hand and shook Wilfred’s, he glanced at the Vinvocci woman as she knelt there cradling the body of her partner, and he said not a word.

    Disclaimer: I don't own Harry Potter or the craziness of Romilda Vane.

    *EDIT* Romilda is in her 4th year, like she always has.

    Thanks xxSkitten for Beta reading this! :D

    Full Summary: Harry and Ginny are together. Romilda Vane is not happy. She can't stand seeing the guy she wants to be with the person she deserves to be with, with another girl - especially a girl younger that is far less pretty than her. She orders 100 Love potions from Weasley's Wizard Wheezes, Wonder Witch line. Several get to undesired targets, such as Ron Weasley. What happens when Ginny takes matters into her own hands?


    Romilda Vane (3rd Person)

    "Th-Tha-That little skank!" snarled Romilda Vane as she watched Harry Potter and Ginny Weasley from the balcony overlooking the common room.

    "Romilda," said Abigail Stones, one of her friends, "Lets go, you don't need to watch this."

    Abigail stones had long, sleek black hair that was always in a high ponytail. She had pale skin that very few blemishes. She had a long, blocky nose and a small mouth. Her hazel eyes were behind think horned rimmed glasses, and her uniform was in order without a crease or wrinkle in sight.

    "What does he see in her?" Romilda snarled in a whisper, her eyes upon the red-headed fifth year. "I mean, she's all freckle-y and gingery, she's a filthy fifth year-"

    "And you're a fourth year!" Abigail interjected, but Romilda just kept on ranting.

    "…and I heard they live in a dump!" Her nostrils flared.

    "Well what are you going to do about it, just sit and watch them all the time?" Piped up Charlotte Henderson, the second of Romilda's present friends. She had curly shoulder length blonde hair and wore a thick layer of make up to cover up her various large red pimples. Her eyes were dark blue and were surrounded with large clumpy eyelashes. She had an eager expression, like she was witnessing two people on a Muggle drama who were about to kiss.

    "Of course not!" She said, looking away as Ginny kissed Harry. "I've ordered one-hundred love potions from that Wonder Witch line from Weasley's Wizard Wheezes, so once I get him in my grasp I'll have him for the rest of the year!"

    "You realize," Abigail said, rolling her eyes slightly. "That with your luck, you'll get every guy in the school but him."

    "It will only be for around an hour, and I could always just make him jealous by making every guy close to him fall in love with me."

    Abigail sighed, "One, he has a girlfriend. Two, you already got his best friend and he wasn't jealous, he was pissed, and three, you'll get expelled before you can get to him."

    "Sometimes I wonder how we're friends!" Romilda snapped at Abigail.

    "We're friends because you need a good influence around you, or you would be as crazy as Peeves." Abigail stated.

    Romilda spun around to glare at her friend, knowing Abigail was right but did not daring to admit it.

    The silence was broken by Charlotte. "So how are you going to slip him the potion?" She asked, honestly interested.

    "Just wait 'till morning, and you'll see." Romilda said, looking back down at Harry, then suddenly realizing Ginny wasn't there.

    Then, Ginny appeared next to them. She stalked through their group, not looking at any of them. She stopped at the girl's dorm door and turned her head slightly to see them from the corner of her eye.

    "One-hundred? You're that desperate?" Ginny said with a mix of humor and anger. Then, the red-head turned to the door and left them all in a surprised state.

    "You're screwed." Abigail said matter-of-factly. She went into the dorm without another word.

    "She can be so insensitive." Charlotte said, looking where Abigail had left while shaking her head.

    "You can say that again," mumbled Romilda, downcast.

    "She can be-" Charlotte began again, but Romilda held her hand up.

    "That was a figure of speech, pea-brain." She snapped. "Sometimes you can be as dumb as that Loony Lovegood." She then stalked up to her room with one last pleading look at Harry, whispering fiercely under her breath.

    "You will be mine…"


    Isn't Romilda Pleasant? ;] xD Oh she's crazy, insane, envious, has stalkerish and man stealing tendencies. and that's why she's everyone's FAVORITE character.

    Also Romilda's in her fourth year. yeah. oh an NO FEMSLASH geez.

    Also, Abigail Stones and Charlotte Henderson are to OC's that i made up on the spot because even crazies need friends. Ones the ignored good influence and ones a stereotypical dumb 'blonde' (NO OFFENSE TO BLONDES! I'm blonde and I don't take those things that personally unless their clearly mean that way. Also Charlotte's Muggle-Born so she watches all those Muggle TV's shows were all addicted too. ;] .. )

    The rest of the story will be in Ginny's point of view whether its 1st or 3rd Person IDK yet but probably 1st person. The pairing in this are - Harry x Ginny / Romilda x Harry / Ron x Hermione (hints of) / Charolette x OC (Undetermined).

    Reviews = Something... GOOD!

    ~ Sincerely MNM

    #
    ''' + if self.story.getMetadata('storyId') == '667': + raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url) + soup = bs.BeautifulStoneSoup(u'''

    Chapter

    diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index dded7719..f3d64064 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -9,6 +9,7 @@ import urllib2 import fanficdownloader.BeautifulSoup as bs from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions from base_adapter import BaseSiteAdapter, utf8FromSoup @@ -74,7 +75,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): if self.needToLoginCheck(d) : logging.info("Failed to login to URL %s as %s" % (loginUrl, data['penname'])) - raise adapters.FailedToLogin(url,data['penname']) + raise exceptions.FailedToLogin(url,data['penname']) return False else: return True @@ -88,7 +89,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): data = self._fetchUrl(url) except urllib2.HTTPError, e: if e.code == 404: - raise adapters.StoryDoesNotExist(self.url) + raise exceptions.StoryDoesNotExist(self.url) else: raise e @@ -190,7 +191,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): span = soup.find('div', {'id' : 'story'}) if None == span: - raise adapters.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) return utf8FromSoup(span) diff --git a/fanficdownloader/adapters/adapter_whoficcom.py b/fanficdownloader/adapters/adapter_whoficcom.py index c848a443..b25a9785 100644 --- a/fanficdownloader/adapters/adapter_whoficcom.py +++ b/fanficdownloader/adapters/adapter_whoficcom.py @@ -7,6 +7,7 @@ import re import urllib2 import fanficdownloader.BeautifulSoup as bs +import fanficdownloader.exceptions as exceptions from base_adapter import BaseSiteAdapter, utf8FromSoup @@ -44,7 +45,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter): soup = bs.BeautifulSoup(self._fetchUrl(url)) except urllib2.HTTPError, e: if e.code == 404: - raise adapters.StoryDoesNotExist(self.url) + raise exceptions.StoryDoesNotExist(self.url) else: raise e @@ -173,7 +174,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter): span = soup.find('span', {'style' : 'font-size: 100%;'}) if None == span: - raise adapters.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) return utf8FromSoup(span) diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index 85dd8d35..d4233d08 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -9,7 +9,7 @@ import urlparse as up from fanficdownloader.story import Story from fanficdownloader.configurable import Configurable from fanficdownloader.htmlcleanup import removeEntities, removeAllEntities, stripHTML -from fanficdownloader.adapters.exceptions import InvalidStoryURL +from fanficdownloader.exceptions import InvalidStoryURL class BaseSiteAdapter(Configurable): @@ -29,6 +29,7 @@ class BaseSiteAdapter(Configurable): self.addConfigSection(self.getSiteDomain()) self.opener = u2.build_opener(u2.HTTPCookieProcessor()) self.storyDone = False + self.metadataDone = False self.story = Story() self.story.setMetadata('site',self.getSiteDomain()) self.story.setMetadata('dateCreated',datetime.datetime.now()) @@ -58,13 +59,19 @@ class BaseSiteAdapter(Configurable): # Does the download the first time it's called. def getStory(self): if not self.storyDone: - self.extractChapterUrlsAndMetadata() + self.getStoryMetadataOnly() for (title,url) in self.chapterUrls: self.story.addChapter(removeEntities(title), removeEntities(self.getChapterText(url))) self.storyDone = True return self.story + def getStoryMetadataOnly(self): + if not self.metadataDone: + self.extractChapterUrlsAndMetadata() + self.metadataDone = True + return self.story + ############################### @staticmethod diff --git a/fanficdownloader/adapters/exceptions.py b/fanficdownloader/exceptions.py similarity index 67% rename from fanficdownloader/adapters/exceptions.py rename to fanficdownloader/exceptions.py index 3f554442..44cae238 100644 --- a/fanficdownloader/adapters/exceptions.py +++ b/fanficdownloader/exceptions.py @@ -14,7 +14,7 @@ class InvalidStoryURL(Exception): self.example=example def __str__(self): - return "Bad Story URL: %s\nFor site: %s\nExample: %s" % (self.url, self.domain, self.example) + return "Bad Story URL: (%s) for site: (%s) Example: (%s)" % (self.url, self.domain, self.example) class FailedToLogin(Exception): def __init__(self,url,username): @@ -22,14 +22,14 @@ class FailedToLogin(Exception): self.username=username def __str__(self): - return "Failed to Login for URL: %s with username: %s" % (self.url, self.username) + return "Failed to Login for URL: (%s) with username: (%s)" % (self.url, self.username) class StoryDoesNotExist(Exception): def __init__(self,url): self.url=url def __str__(self): - return "Story Does Not Exit: " + self.url + return "Story does not exist: (%s)" % self.url class UnknownSite(Exception): def __init__(self,url,supported_sites_list): @@ -37,5 +37,5 @@ class UnknownSite(Exception): self.supported_sites_list=supported_sites_list def __str__(self): - return "Unknown Site("+self.url+"). Supported sites: "+", ".join(self.supported_sites_list) + return "Unknown Site(%s). Supported sites: (%s)" % (self.url, ", ".join(self.supported_sites_list)) diff --git a/index.html b/index.html index 499d3be8..51d6cd57 100644 --- a/index.html +++ b/index.html @@ -65,8 +65,8 @@ src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
    EPub HTML - Plain Text - Mobi (Kindle) + Plain Text +

    For Mobi (Kindle) select EPub and Convert it.

    diff --git a/main.py b/main.py index eaa41d9f..e2625b31 100644 --- a/main.py +++ b/main.py @@ -15,10 +15,15 @@ # limitations under the License. # +import logging +logging.getLogger().setLevel(logging.DEBUG) + import os +from os.path import dirname, basename, normpath import sys import zlib -import logging +import urllib + import traceback import StringIO @@ -42,6 +47,9 @@ from fanficdownloader.zipdir import * from ffstorage import * +from fanficdownloader import adapters, writers +import ConfigParser + class LoginRequired(webapp.RequestHandler): def get(self): user = users.get_current_user() @@ -104,29 +112,29 @@ class FileServer(webapp.RequestHandler): name = fanfic.name.encode('utf-8') - name = makeAcceptableFilename(name) + #name = urllib.quote(name) logging.info("Serving file: %s" % name) - if fanfic.format == 'epub': + if name.endswith('.epub'): self.response.headers['Content-Type'] = 'application/epub+zip' - self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.epub' - elif fanfic.format == 'html': + elif name.endswith('.html'): self.response.headers['Content-Type'] = 'text/html' - self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.html.zip' - elif fanfic.format == 'text': + elif name.endswith('.txt'): self.response.headers['Content-Type'] = 'text/plain' - self.response.headers['Content-disposition'] = 'attachment; filename=' +name + '.txt.zip' - elif fanfic.format == 'mobi': - self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' - self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi' + elif name.endswith('.zip'): + self.response.headers['Content-Type'] = 'application/zip' + else: + self.response.headers['Content-Type'] = 'application/octet-stream' + + self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name data = DownloadData.all().filter("download =", fanfic).order("index") - # epub, txt and html are all already compressed. + # epubs are all already compressed. # Each chunk is compress individually to avoid having # to hold the whole in memory just for the # compress/uncompress - if fanfic.format == 'mobi': + if fanfic.format != 'epub': def dc(data): try: return zlib.decompress(data) @@ -230,18 +238,47 @@ class FanfictionDownloader(webapp.RequestHandler): download.user = user download.url = url download.format = format - download.put() + adapter = None - taskqueue.add(url='/fdowntask', - queue_name="download", - params={'format':format, - 'url':url, - 'login':login, - 'password':password, - 'user':user.email()}) + try: + config = ConfigParser.ConfigParser() + logging.debug('reading defaults.ini config file, if present') + config.read('defaults.ini') + logging.debug('reading appengine.ini config file, if present') + config.read('appengine.ini') + adapter = adapters.getAdapter(config,url) + logging.info('Created an adaper: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password + ## This scrapes the metadata, which will be + ## duplicated in the queue task, but it + ## detects bad URLs, bad login, bad story, etc + ## without waiting for the queue. So I think + ## it's worth the double up. Could maybe save + ## it all in the download object someday. + story = adapter.getStoryMetadataOnly() + download.title = story.getMetadata('title') + download.author = story.getMetadata('author') + download.put() + + taskqueue.add(url='/fdowntask', + queue_name="download", + params={'format':format, + 'url':url, + 'login':login, + 'password':password, + 'user':user.email()}) + + logging.info("enqueued download key: " + str(download.key())) + + except Exception, e: + logging.exception(e) + download.failure = str(e) + download.put() - logging.info("enqueued download key: " + str(download.key())) self.redirect('/status?id='+str(download.key())) return @@ -289,120 +326,67 @@ class FanfictionDownloaderTask(webapp.RequestHandler): logging.info('Creating adapter...') try: - if url.find('fictionalley') != -1: - adapter = fictionalley.FictionAlley(url) - elif url.find('ficwad') != -1: - adapter = ficwad.FicWad(url) - elif url.find('fanfiction.net') != -1: - adapter = ffnet.FFNet(url) - elif url.find('fictionpress.com') != -1: - adapter = fpcom.FPCom(url) - elif url.find('harrypotterfanfiction.com') != -1: - adapter = hpfiction.HPFiction(url) - elif url.find('twilighted.net') != -1: - adapter = twilighted.Twilighted(url) - elif url.find('twiwrite.net') != -1: - adapter = twiwrite.Twiwrite(url) - elif url.find('adastrafanfic.com') != -1: - adapter = adastrafanfic.Adastrafanfic(url) - elif url.find('whofic.com') != -1: - adapter = whofic.Whofic(url) - elif url.find('potionsandsnitches.net') != -1: - adapter = potionsNsnitches.PotionsNSnitches(url) - elif url.find('mediaminer.org') != -1: - adapter = mediaminer.MediaMiner(url) - else: - logging.debug("Bad URL detected") - download.failure = url +" is not a valid story URL." - download.put() - return + config = ConfigParser.ConfigParser() + logging.debug('reading defaults.ini config file, if present') + config.read('defaults.ini') + logging.debug('reading appengine.ini config file, if present') + config.read('appengine.ini') + adapter = adapters.getAdapter(config,url) except Exception, e: logging.exception(e) - download.failure = "Adapter was not created: " + str(e) + download.failure = str(e) download.put() return logging.info('Created an adaper: %s' % adapter) if len(login) > 1: - adapter.setLogin(login) - adapter.setPassword(password) + adapter.username=login + adapter.password=password - if format == 'epub': - writerClass = output.EPubFanficWriter - elif format == 'html': - writerClass = output.HTMLWriter - elif format == 'mobi': - writerClass = output.MobiWriter - else: - writerClass = output.TextWriter - - loader = FanficLoader(adapter, - writerClass, - quiet = True, - inmemory=True, - compress=False) try: - data = loader.download() - - if format == 'html' or format == 'text': - # data is uncompressed hence huge - ext = '.html' - if format == 'text': - ext = '.txt' - logging.debug(data) - files = {makeAcceptableFilename(str(adapter.getOutputName())) + ext : StringIO.StringIO(data.decode('utf-8')) } - d = inMemoryZip(files) - data = d.getvalue() - - - except LoginRequiredException, e: - logging.exception(e) - download.failure = 'Login problem detected' - download.put() - return + # adapter.getStory() is what does all the heavy lifting. + writer = writers.getWriter(format,config,adapter.getStory()) except Exception, e: logging.exception(e) - download.failure = 'Some exception happened in downloader: ' + str(e) + download.failure = str(e) download.put() return - - if data == None: - if loader.badLogin: - logging.debug("Bad login detected") - download.failure = 'Login failed' - download.put() - return - download.failure = 'No data returned by adaptor' - download.put() - else: - download.name = self._printableVersion(adapter.getOutputName()) - download.title = self._printableVersion(adapter.getStoryName()) - download.author = self._printableVersion(adapter.getAuthorName()) - download.put() - index=0 + + download.name = writer.getOutputFileName() + download.title = adapter.getStory().getMetadata('title') + download.author = adapter.getStory().getMetadata('author') + download.put() + index=0 - # epub, txt and html are all already compressed. - # Each chunk is compressed individually to avoid having - # to hold the whole in memory just for the - # compress/uncompress. - if format == 'mobi': - def c(data): - return zlib.compress(data) - else: - def c(data): - return data - - while( len(data) > 0 ): - DownloadData(download=download, - index=index, - blob=c(data[:1000000])).put() - index += 1 - data = data[1000000:] - download.completed=True - download.put() + outbuffer = StringIO.StringIO() + writer.writeStory(outbuffer) + data = outbuffer.getvalue() + outbuffer.close() + del writer + del adapter + + # epubs are all already compressed. + # Each chunk is compressed individually to avoid having + # to hold the whole in memory just for the + # compress/uncompress. + if format != 'epub': + def c(data): + return zlib.compress(data) + else: + def c(data): + return data - logging.info("Download finished OK") + while( len(data) > 0 ): + DownloadData(download=download, + index=index, + blob=c(data[:1000000])).put() + index += 1 + data = data[1000000:] + download.completed=True + download.put() + + logging.info("Download finished OK") return def toPercentDecimal(match): diff --git a/newdownload.py b/newdownload.py index 287c665e..49c12697 100644 --- a/newdownload.py +++ b/newdownload.py @@ -6,7 +6,7 @@ logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lin import sys, os import getpass -from fanficdownloader import adapters,writers +from fanficdownloader import adapters,writers,exceptions import ConfigParser @@ -27,7 +27,7 @@ try: try: print adapter.getStory() - except adapters.FailedToLogin, ftl: + except exceptions.FailedToLogin, ftl: print "Login Failed, Need Username/Password." sys.stdout.write("Username: ") adapter.username = sys.stdin.readline().strip() @@ -40,9 +40,9 @@ try: writeStory(adapter,"txt") del adapter -except adapters.InvalidStoryURL, isu: +except exceptions.InvalidStoryURL, isu: print isu -except adapters.StoryDoesNotExist, dne: +except exceptions.StoryDoesNotExist, dne: print dne -except adapters.UnknownSite, us: +except exceptions.UnknownSite, us: print us diff --git a/simplejson/__init__.pyc b/simplejson/__init__.pyc deleted file mode 100644 index f01003d4f81d37513d0f8a2a5fb857b8448ae2bd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12071 zcmeHNL37+jc5aXoC52lM??!ES`^Va5uF#&l87#d{Ux!3 zn-|3n?q3p7OB|dN$$7DJUN}^KM;t7P z_xSsL5nU6}&*=IaadGDz>Vo#>kD8f3w86!7@%u|+hsD0O&EIev42oGpIC}l95x%f< zrIyx+#l_FX@3E|_XH|W`RXp2m??ckj2g^eIdi&8s>HRu*9&Cq2oR{*^@Rna)x_EB5coSj#}_YN%Byvr%iNvp!DC;7EE8?)~QTmG#@}@@5f9 z6~#tUrBx&Y>YT*;?9r*L2+zFO@cy?gJgjIku=it zI6O$yKw_vWQQDVVC9RKys3XiN4U*(oP6A929~HHpV;JbA9?3{Cv$KQAFtd$ioXRhc z%Q2d-`?tGtSe1<^-3qfw4jm7%gz{J(#^re0_!dvG>H9Gky|5|@m6pkIM~(yC((!&8 zkK!;$OPQ;J^_GT82GMie3ig%mO7&c&EIY&4m5$SWUR##amIR5s*P>;nyd(&aI#(*H zat-xANW(0m4#PmlVLi9Z*vB|lP;7`Fy|K}1N&LHe7p5`Ev!ayKJ)`|5?KCaejG}6i zYj4*bWtrQRFWg~JxEs>L@7E|l%u>~rYyL-Fx!yT>+Tp(LZX2!JXx&EZ_J-WW@7E}& zRg%=LpPoE*o00MYo5q9tX1w+uiP)p=M&`_o*Y~R2y=ra!<}J7G!=?7?JGgs$P20Wi zX!oKWVi{OuduV?H`aS7N4ITCm)Un=tTvW=8`=ZUYGp)JzNi&a8kxk@wiAC>kJ*qdN zE;p^>Ol~%eL#+kpb%I85+Dcc=8GuNYyJ!st{ z8`R+21;w0HWLzV4KEj*5=_9?0)o==6&jfOlQ&B&Q%x(N&GdP9*(Wn zUq*IYzTb{SY6J(`r$~{gBQFZe&IXU>`#zb1j7QS#*Y*9rOZJ0S^Npw>48JN;gr-K) zu8UKi(D6oxT{oTt`>wUMTDt9o`g&0QZTyAZ@)zxyDZmy>Y$yB_iAQM-mn0mQ>nE-; z+j;<_oc=h=4mPLjG|KnZe!2c^x(_z8K#vfXoH>s*e+|(CPC={w2y-hpZEGKgf_kw5 zox10_)Xj{;cKG@|d^=x8d&oUiy-yyN{pvo(2+o9CLPho6daF(~oY~7=H1kQxT{=iU z>DU~}TDwIMYb75a=juUGWQA9#yzsJ){H1IY#!0i%m?)4F+iWmQl#PrKF|T41LD$iD z4Rgbqf+{ID=htPF=mhd|eQ&^Hzw1I*0} zKSO$}^@JhP6u_e~QbSX{Pn zImHbun2iFO;RRRaXyw!L0$N!E4QW|4!u$CA3qJbK_FN%{Tkvir!S~ADQoIrCiB`{sg2xOP$a6!=E7X^)aNnkh(@~yZy zEC1+2@p4(*e|k-vTox}eHl$ppv7N8}UHlFc-}Xou`VUagwsjdw4hAsn0VoymI*xdT zzm?#61{TtB84N}_8hHWR@nGN|7C4YzXE0dN6%3+a+Z@G-T1nyqMGg)2+5rn8hqpC? zfZ6~ch6oyB3^AD$HBlUvxJ%Z7TR|y6_SjB#L0mQp7&_~?TDhn!KM>;Ms%#-y9n0V`o9<)gl;VvG->!xMNIJT&8OrK31S zV#Cg2TAWjiamz+40qitgN!31*BF_~DFV(&(?1|v%1w{dqSBaYNaI{&*QShWDYBHo8 zP`#(KQ5olx6D;f=%%CzsZY1&L=P8CFqGoQeDCqbjBPUCd{(&A4&}6C(nUncYz3~Kf zs%VmFqf?^11hbTeKu(~|n(#F8*cFond2oc2ep3Z-g$;?Aaz(gL;9?SVG_VUddlDT zC1Qh_Wrmp)aRm1QrC3e-_2OtFCJH&hh`f2d0Jyx!q)FkY*)?_G!TB`8{K!R=hk`JsN_&yRZ~ z?rVJX-_cd~s&mD0owjpr;j+_m-pBuC=lbFWvE+obaMD);`~D3DfTHCr1Ka{=8{-G) zFTfAb-wu9&wG6oX4GIlT`wWE!1KvadFwDTde?$Rj%=GyHg%+g0HxK)^8QnFKE$BOM zKp$=c^x@gYB11pGH9$XrK0rT!K7PnSLj&|-On^SdeY2p?0=hrLVzyrabg<4>0G)*n zb3V=Da&urA#E{vOXZ!2KWi~0oF}H~|HLjhaA9BhZU*72mP19r zQt2=t%t?EMM&#qDHV8qBGT71omI$hO zJ7ORE3JXl4EUbtMBAGy7#Xc@KYnjGDW+r$f&zuo%%I2a#Kg_mSYS;u)WQ`D7xsGCO zhW-VQ3JQ`+&JbF}pMc?|D{Fx2jCfER8b3M3F~Gyp02_%<=CFw+g@4kP=<>IQ4XJz3o7aY3LxcX7!n3JEHD|%d%5jwB8HTX zEc__%c=bX-Ozg8} z(jz|C!@(7t?B`QTUZF>?Se+8yyFvHpouVEQtG|dZqv`w?KAI*Wu3u%c*7z#&$?U5Z z;qxdDzQhT5qGj+@ra-i8u`UoT4}yxX`%wrGH@r;hiKV_*U_?O7)#3*3@tNb zToTz8bOtp81#`q5CURyFTLr#`ss&qRCS$8W;!w3{tITX{l}YEHvsIV^+B($yTj4a> zeCv1r15|z9?^4=;PYQDdI_@(^yGhc_xLdeYvR*c?H*>e(A@;08wi%}3LgX;s(C_ky z?0aG6ukpKY=5|>Uxn9oJ{}8v!lIn*G*6p71x-)KbR1tZ z^&43~xy3qq%(yFxO?cO#37Xx8d7p|GoZz-70r3Kfq2ky+mZD@i0R=d2yGy?Of&v{s z%Z4q%GRZibgf<%Uj&qvbODkk)%YuUonw<&(2nDbNW3Ti^Bjxbuc<~wxe4ytKoKIzW zb!@;?=+y)t{+?e^XpE$B&OgH1;o{$F<>>b#t{c)QqhGI_)ldh)!C*f8yxF2D<%(TK z95fr1(KBBHZN%B}NOwc7)DIrIa(ab_6!m+9=3Ny|TbZOd$NE#7YjVhNLh2|~d``(2 z;}kidI5K+*>!61ZjfWsqRnbeW0C>ip|3YD&g8S+COF7<$Uc*!7Q@h6Y3+656+B|Nj zxQZvvh_L?;(VcL{6%4Nb0T-g}Iyao_j^Qbnk))mdJoMf}6MjbD?;|Aj`;2Y+eVhMB znMg;!4+o8F;##<_kZ~_;mDK<*o7*x3R>d^E{VRGF%eaSL37V2UP9XP4Nj;iqufa$j zNvlbN??bUIMMVdWXMnUth%ajb-P5 E0OpUxq5uE@ diff --git a/simplejson/decoder.pyc b/simplejson/decoder.pyc deleted file mode 100644 index 2ae9b3591ee9c6400d5cd09eb0a05999ef680bdc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11292 zcmcIq&2tmkcE2sjACi&37_iM}!weOf0h^grg(*VjJp*RO4@U6XFwFBak=p8(-B{|D zyIWwr$eTbG*{8C4o2;@-Ro*I9S){Vde~_xAD%-5H%x3cYo!i}#{XArtI09eY&vVZ` z=iKvk^}GKnmp=LJZ`T?s`IPbfbNogBXei|K(wGQ9BBIomM%g zsO}crSyW1_%%C!%?rxlw9p;pSt)|pl;4v>toH8a%Vx}{N$}}rnGRq2~eZplA;DT$P?H3UqU?swTMdF{uF3{A%N4Zo6Ljgj~FYVy)e2 zxoy{v?br)^GiaK>SbtJ|;5Gus4NKnl_*5(4wu(=oXxqDPEo#`l?}oa!L_p|>;?VQA z;|Vl}0Epz|(Km*_`<&WE0Xgwd=G0q59RM<#;7(q>%B$d(GWr-RC=>F<|0C)z@@hf% zT|U$|hqb<;j5x2n0)+sRz^%=kIsn|9>Km$$GN&e*c^sK2RAUq6?S~J)xHa@ad#H8m z762->rtLSq#v6BQIr7NLS5e?EYplYhPA{GUj9Rl5*l{BOldw-TgRY2tQ^u4rZQL^E zjRwzEz#k7Ld|F2Z^dz6;R)d&tag+0Ej+E8x%Bi3Pc7fP=<)4!pmtg+Tl6ef04SCh1 z;Bk{`M1d*+w#>$HxqZyN8$s9C)PguU?!L?y(l|jwzB@rA)N6UZ>j%lAmLHrj-6$>G zq7k&tqnglb!6+N0+=9{IG01F6hR|ytl^7%wnM=2d3=!+kmbMrmx%$r;+T(Zvcpu)w#cPpfUu%#VsTeE0br_&zScdU=tb0+O+ zwqv;=7)iKC@}UJxPn?0goo*NcuTM~mCQ+!|8KayZg}k3Z84>rJYq&nX7x!YKk~Mfo zt9CUa>XoJIKU=K)`s(K9=Kif)H#he`Yrb6UQi88SbD@;_rpWZB)R(1yS?c3b-;(+% zsW+uwlDaSTveZMV7o{FYybRsAdGm_~4??0+wTh_y9N6&&&Wlvx24V>=B_(KECBg!Q zdH^wzt2hrS+W+qyg-rwu(s@u+{bCB?f~ZjN6r#o%JqYVKq5le1X(r{!7;2OlVKOq_)Dg+L}Q~!zXDTobN zK?9~>5%&c+Ptby$Jvc=<1Csqy=`m6y_pTU5AHXMww6XVxvRiL5nYM6HRxkWdRRN+$ z@8Ys}7aG>Vf$vh2n2s90sky`LJ>5635^Tj)KDu zsJ0|4q_Et$8~T`Q#$EU$38fNToaEqg1TG(@=3pnGq(B=`6J|nFbO{ayct>mZ+y}aC z&$mh2lz${OTu~6hGC7g6H)Z--LIrq2hfii+1IJ_0yj^jR(wUo@_IFHZK=>*&dtwu^ z@$Pr{BpmXlIY<2~itqo8#rK%~G5QYA-qdTNF1WT0%*~Ds=Z^kHL^o)&>rvCgIf7eE zFU?c`j)GQzzO$-U#LUj28gs)3s}EP6KYmtQt=@m~V6FOmr&K z1z+ZDhe5X!$s8~8#5ZiX(Bs(0_vrr1&>Fok5iqfkkKCSakQAZY3B~xg5I^M6;rX#e zJ1Q&N_I?UTkO$D6P&|iMf#+AmPY_OOl(Z{=eHlh@0r?5fnKY?w>94j7GWD(LvU zf`JqC&lys8dJwn3>uA8N%WPOF2>M&B$I7GY#i#%V0z_SKGiK~6RC`}n{U;Yq&vQOGBg2ROmu*XPY} z`pP(hQ}36_oP~r=cd=E5v0k1IT^z!K)E*||-rs1*FZ|PRRe&{oo-uJ%#0BXsFd|8? z(ZTX5o>sd^;LyND(&Pl*hf&v_4EzLe1L>09y%cZF@&x-ocn^xnVG_SkQa=fzN$O`@ zd`5A_1{BC7K^(>MHW>s4U93jDDF;Cd)9I<#I2#}sEUCU<0mHVAfmBY-S4;! zF^nCg5RjednZDh2WyT#bYJ(YfV6IzUo6bN)8#5RQwq5}W0$nIjD(@RdR^Aw_tWcwi zZnv(pM(ZP5^jX+-EiSf}*hST{9tWK@x*kkISwy$OZqfQnx2)T!TMS(#e1x;)*8+G# zl5Nr$)pmklR~v~pqPuHvBI}R1Oza)40?llcDS6TORPwuSPmBf}q^^6y%ANL??%orj z+Fzj+?W4%6qA{AEfzB;M51+yJENUk8yc#pk7Z9H!Mm2M#T;7;L?~HK`T6zSn1$;2XoBhJrE{ zU{GwdGDRj*w1xs20}WMR(g^k;nVc$Dt)XC1g9L~P{pJwRs8_)p&_dJA0qhPKPRew?=PY1weHrc9v#MaEM7nz1GMM{ zI#pUX$kly$?A5j&Vb*&tHJt%{sTWy$%;Fms*&UZ#U7?s+RTfWJ{E~&mVsI&mkKDS* zdd88vEWSl?!ncrl9^S&0FtmIBfkt!>gYx;Eae)2 zA|KL(AfpE;T)ade+(ijr1WXfI~(x*y^F)w7aeNjt21Q&BLFgZH|5_TlFn} z`bj<_ZT~cweix~f?9(@^NLFV}seA~YBFbwqavxCucH?b_FDXuB*6YX;*XuV>Npr_+ zF+o$WXSwBi-LxZtfs}_AA~7F^Zqx~U#=4j&jKL(W3~!ikb`UBvGZS><$a5q&qi?Bfdh}-}lO{DA$yzLN^s0m-IUPq!zB6*w&i3CZ~ zAX~+ZPdZ|DBPPTGin<$iLKo!qd!PocSG_LOBA>KUh!R!jP;13IAWfREFe{QJAJS%v zOQs24?bwm&2N@i<*@WW?(jv>1sla+Yxw}9!*(jkdx2|UCy9RPKNF%g*X5gbpkH7;V zxhGjMGwQ;rj&KU323OYaudSJHf^hdnhJe-@;AztgK!#p92gIvgK`dL4{-Mqa^RCrT;95r4d< zR5!X&9JDh)j*FL0582P(B*v=ZE^RMSLO|M15XOS*P3!4>v(W;b%pFJ$F3{p3;&f~< zK|gr@c;aZ@WSAT#j#isM14l?Gv}E4PjAtT_k%{!IhZ_tz0nQmLSzUp8uVE92=5Yi8 zVL)>&6^@OU_+AGT4IvFUi%#rP9+{Vxo`tJ)*iez-Tj8!kzZWk`g%!TT9~Un0c|b_hT4S+2k+u;=sH8fo@9b0IDQJ%D`Bh< zK?38Un_zv;Vfw+k6NHnLXzl)!>iV;l>a%rOQLAmcF@pYftp+`xPEM&O0l4*okoZ#& z4M}Sb3qJ(nGg#UyUYCUULsK>B7z5~-Kz@)R34zFMHFb>7V_2IkUb3jOVBCMg5dkw3 zwHn`jYqcu~fg`?Ct4l@XTBjwq${PPQm?2>DfQD9#?4O|mlZUJ&U%6oZU{_I*hfYMY zZ)p<^#;kdD023>t2=uu92$O$m4k;^{)z=Lfr@^E zLcg%-EF1q}A(4zJ_ zD3Ow1@tl3o0Jt#FTir*LVN!MJU&gR6wBOXUy42y~XXqOQi89YZl#~R7j1L7a1WM+g zEXOVK?-%4WQBxz=CI5ZRkRxPFX4EB2{u-6ug#n+z#v~IVTz%nRM!XH?ArpbOZhXK` z$U?#@$y$ zJN9cRI|`HU23D-lpJzf>%;Ujj5?O_+Bu69@S*tMz3w2Aetkv3q(`~Uyajn%r@mfu?p*qik z)PjCmd-3%eemVAZ<^HPZ{5F2vwId>Kam&Feq;?WNBeN*P`Sd8G7e;eMW41CiA-^NE zc8M2fgswdQSv*b5XUf5E(w3OD#jgfM|Hss~Lt`~Ku&#sG$brph3_Xd7Wx)>>;RDL! zqmU$_5!~E_Uf-@-_nxe;ikbTcFQRLY*Q%@XVEEw4vz5n>g;Wn8uY9#`@kenMACP7E zX%NALOSS55qbk2~(yyg|%1Pu#BWQQLmQFdZKeO=AdjIR^)km`1y_Nfq)*r8|e{GTA znpbVszGBfrA?9?zwHhFZCDn$LdM7cYq~iYr|Dtigy{pCROHoL7(J_jbKm5y0u6UVGPCtt?yCBhuPSIZfHJC2EJw=^C=d zVfU!JMb#3`xaN zKxe3_sjlZ&Rp0mf9@YBQzmHU2|K^)Jb(MZjryf^SIHD|k&>9s%L0O-t?Mu>rKs^|IUi(4QUR%m9E3Z#| zjp-^Xee9RVq3m%$9*5PcQYCLt6}+;#Qt~Ru3^6l|%m_21$c!;Fj?6J;jw5q|nF(Z0 zsw)ph)aF}Ck1B7HW8Qf5ntCv%^oa7ul|H8Q7)!k=W*U~7{2EJo@^9+CKCZk|Lc|HB zC)9(J$~&!|VY4QccSbVkQc^AKj-FBvipqOQ=~EoSdzsZ=EA_>L(@LLF8Wk@q{fg4x zQy5jhsjS#LGY=BjivMeT=BqeFl}sVgQeInjV;v`vAFR~exbFL=!7v)EyKA~;T1D|G6g*tq zym)bLar3Rl;^Oq;@?v8#Sd12vNAVacs}E85X#3$W7VWv;dNdOcnGToOq0x907x28e z7<2kX^Dv$8VcI+_OUltJd}MK!^Hg@xc&W$?`$Rm$iAh2 z%uk(DaOx7i*YSacf#GCtmF18)Ll(Ezkd?^3 zw3kgeR~S3o?IWg$yLEs)r#TysQI*Pt{9L&y*IBcK#OyHG3I`d^C@U!_zbFTbO)sg( zWfh)K77w8>J%k;l0!tB}QidohbWi$~KhWU`9M+UqBwKK{_!^RoEdu-6!u30M=E9)4 z^+GHXbEtOGn7brP;S;~YEPsp)i4;b~aP}fExl1SEa``SY7(|flrfqyWZcBA@g!iCOODgBW zJ`VR)WJ=cjU5L~f%+T6O;rsyTzlqwIt5c;ntP@swJRy8{X^%SV_~Ds+h;O`zL_@(r zvXFvxpq{}-qUfRfb$|8`qBbc1P>W1SJu6e|gZZAJBUF38WT4v7azH)9I77-YycWZ1 zP&YRd9mM`}QyU4pvYI5V%NH&X$Ige*%2hiBGrJK+8610V;u4eFb>O#-&l_M~Xt!Em zwb)KpHCh8MH=y|JaQU&Wqtp#NJK1XK*k0bUJ>78I&E&G_`R3fg&;GJE1^l-&5~vB% z&$gjFd$hNu<(uyv{Om8w#`OGpl6RNPc1m_unvQvp%(>^$+4SRN)=t{c3F#o)wi`w6 zRyJft=L?L9EuXd9flod+!)&LjF&p&7>GT7T*$jh~S-a5;UF^fQ9kkb%b@YE9^Ip`> z7M%{VZG&67%3hq#TW&M%8UW*N+l`*G7ax<*k*(7Fa_!A#r|d<>6t=q;+L|H-USIVg z3L(szdbkq!UurvqU__Zb%-V5#x!!bRBN<{m(7d!=8E0wfd@3W=nWZIrUBeFY>)M(z0X~hZcz?g`sSiE56(GzjPteGLn%$5#Oby5M%ibyA{SF^RtGg4If+UCw9CV zwws<^cd-Q&;<(+?kgU10vBTbJZXG7NPerYf(X41A~2rVt6M^COMB^|rAxp90qn+SA<-0GS%n?8 zlU7?6veI@VH%PPwtB3Y7227%S_6+n5!-AcBI7I7QR|a48RdlZ>Fs^MK*Ikm(L;rpf zIy;%sumOvl^JXKOIeQmUU)A-sTHTN8?WP;`EG4iB(ebQJ^v4hNQ@tH0;d-j_yhB!F zW|UE}Zn8%{cBC6R!tp`Dd2PCUowH2Bd?>g$PesbX5AYlUoCgu29mVu2s~f964rwD5 z!!~5g4eYy5bp&kQ5CMaD@sRkrOFW_M&Vo72@d{;j0_QTe#p&qX4hVL*Q426;_n`*% zygNT@-!b%TnXQ=19>Mdn>U>&b5O}s5%bYm2dxW}Wz3M*ou_MTnS1C7k+|qR)4w69( zut2NngK1{-!Ilj()gg2^4epGv%z7AO1ox@CP>=jpV$+fKGE>ALrZM*-2%p)Fp8bHV zW$fYFpSjJpK__ymA>VA5XuSR63mc}E$#2g1fdFF4{c!%tG&nIg^0CwVcPPdXAQyHH z#`&qy1lScEXQSN|u8H&t3@17B1Q7yJ6o%vT)rjDmP0S#jGsb}DfNamrgdBS|_{Oz8 zcWW*Q=kUY5^7pOD4?J!yXmD{h!CatMa3W6u`Vin14Nwt;lG(&a$*IsL=wY@zRVC}1 z=0Uz}8$6vPTXQWiJ(n#9Nb2)J(&?M@nZYjC$ooQ&lrWZZ(d?mkda(X=l-PmZX!-%I z%j|qbU1JSYO~N3u1u)%uBL}@KpK_q~0Du}B<^tEjxJJ`q8(q6fOswUVvplrU<4iui z*mA<{IHve6=&&{wlv{!U;kVfupK2$Hk_Ey-=C^F=3aa0-f+>G#S(+7CE zzzP5a7^a1A!gH6IY9({pjdmc63W8uJfHLw=vx#FOc%?9%(qoT*o@WlwqzT+VCw`%wFUZfNwz(8gUhKS#gK$?t%2KJ%tZCcd3ik z`6}HxaZxj!D?Vtq0!v2Gq*oUubW3wQmM6l7Adk4SMns&G9Gu;IxilF;Q4ga^s41mW zsL57R6<43Ga;%#Tht8_=5|fviyuyS5PUlr5;(+I(5Ie82g2ADq4GRc;wHno5tp-Oo z4G)|zTb;iVYtkCQU!4XdzeRknvn^MEXq1WBATk5{Z3;iXgB(M)Y!|a-K`Ir6Da|wP{R_k#U&72DfE$O=#z)a62}Q$`&BrKK_y&^+Jz^-jVHKG5)fU; zTEVFj(F1sVZCQwSk_x0l3P6Y;Cj!Q;adaGFDJLnKxNuSVBWY0@{rxj6$?3TiFbKa- z!_#8O?*@8Z8jOkrOGikuO2BsHUV`9DWH1YHj@tAgj?id~yjpLse;ijP=ZRz-RYMQgAi$W0K1Xjgw-SRc!Hrj>nX!tA#)qR#G zd^R{$ev`=AB`RH-XTPeTwiD)oA+=AOI!sz!_E!u!hp3VDtvIZ&>E!%IgdSBHZ00aY z5)L7k@GF-!bI1@(1Y)rr*$tCi#Slr%4HLEtO$bnOnKv&cfQX-wH%gf)6X~GoR0|`2 zCA)xie#i#+9^u8`ASW@x5&aYLa)+uQ2PdpLv4iJID3^5-HLwEwBTIwjVbXw8gP#aH zf(L=sQVi}!@w6!ho~OkS5EKugSOQ?&3G7Mh47d>YTqQA;WP?&WNszk_+eJ#|<(dT{ z0I|w4w?8hUDh~q0+@ZU$xPiQ|tyd7>mtgTzy4K*SG(IH?H23l^+eUp7&^kW2!;KcU z7OWBjR3NQJUKL1#Vb~CJwgyL)d2UpKxaxOsMkWN1ME{H_!Zt!?Kl6Pt+`yW_~2_FXbZEOm0scYc6J*|c>xT7%=|{FwDuSxUsMb@jyK(Q&#% zA^kgi<2N{NrD9bI{n$aK61Ue{vF5PWT9xC#P66jU*#>GY)TE`sC8Yd$%fTx z>!H_f@{y~n)x5A?tC@}9mI$B7O@MPAk4A@bTCtX#cbFqN9X^rVQQYJ^G!$>)8&ie% zzua$`zp{1mQhB0$rq0(s_+>i+Sp2U@cJZ&Ec8hAaq;`kYZok?cP`iU_x2$$6YPV1A zj;P&HwR=qM9#^|3)b51ZJ*jpl)$Wwq#Vag$)x}c3tJLqI3#zQH(a<*^4YQa5F~x8; z=6Khz-D94jo5V3O@BctvPTSaX#=*NuMH%Om+B*d-p^(mtnM#50$Myr)KxRsU309vrkvHJ6IyDolA6_`4>9eD-f7c@VtHpxu;3obia3h2-Ep;t z7q0Twdswlt_@QlZiSjmvqFgvgi$`Kr2-uid3dOq#L!26*hS)t!^#nSL zA=WS%Xm8c&`LXXB5~>1%6x{5QEbL zsvoV4QaXX!&fg?VGMHtBKDa3+H)gRlTU4O1`tU2TMqbNd?KYU;P*@wz*qg8hArZSP zg|*=v*6>Q>eprJ{bzlu_56jKqzzZ;Ln3X8yk0m1>18mI?XAGRmn zxD5UR%QR)^g)(9RND;6>m*9^uBsv&KqiG8d3B^06ctVhl#qfPl+>l}}9H7Z!00@fj z=93hNyD3<4s>!aY@Kze&J_JHP`4<(u1nU>jftR}ky`WsCypb$Y2nZKUpAgHrAG4MS zHNYw%99ilrE(}XHi^V%%tOwq+3Nu*9r<9h3(!4ZXElUOZOj|%CSH)7|8b?Sc=w1}3 z!J@jRNgJ8%Wi__117&$~1V{FF`7dO)x}cl`<#-Tb*D| zq-mejQKa@uod}$WV@J3K(XN(W3jV*!)(uSjCzLG)AQ<*KR0O#(OJ%Q#&NWkyAav9A!X!83`|n^7BP$#5&1twW-nStC7(@AFvmX$9bO# zjdSM|61kNy!DxxuJf8)={q$wa3=3f7qjO*61g@A^*Hv07@jWE{)-WPu6L4RK7;)rp mVGM#ZW|bfaoxh>Ni0Ut#Ee>1#g>t!kdZaM<*Njmf_V?dj3=-%7 diff --git a/simplejson/scanner.pyc b/simplejson/scanner.pyc deleted file mode 100644 index 30d94445f0a0c941ee46b6c4fa3bd255e662f6ef..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2340 zcmb_dUvC>l5T8B&cj6S1v}wr$X-SKd%5~uhP(>h$k&IGTvRgn&D(iB$PA@s%o$fXz z5y=AtVOzZO)Mvf`?|cM43}2x9X3kDsMZDG4&dtuw@3*rvb9;aNTPd%;dewAj{43%4 z6-F|IaEW#x6}c82DcVtVx+v2O9a-dOXeUeR``{L3b&d|p6jnX8C{7O5ZHEFAz? zAg#zNlA9ByB(hAKY@MOa3jk)x&C{>gutGXZ5r}n#b~3zmr&{2M79hUuJZY_%@JI(M ziDL(Wj?3O_{909oRWl3Gw~uspyx7K^k~N5GZKJyJ#ly4RPimh(-*ea3)~b6C_T2kx z8`WLic)nY^|9nHH4ioX1!N_1FzeAi6c|@aBQ8X%x#iCJ>OoqHHjk03N(I_WMo<=26 z3N*4rDbh%ZQle2_lroJnG|16F(ZHfXi3VjFWN83no(4pN0u74Po8grIhTRJ^EFc*c z;%PZ7ix_-l&P?(LET?l!e5UBuxkXYKLsNw@ihfaPVa_aOJ+vrXCN-4f0ET2Qq42{D zU1X^fye7qd8Sz_%UvW&&em#p)*I|i5OL5o+BD{!IPQGzefF z6A7CA^0ai@Er@-d6dG`B1h^A~DXjQEu+jvEl1#%sOJU`!uo>QM_7a9@5dw^|o5F#m zm@p#hbC9APku$HIr9^j}vIAKFOyA@i3eWG3{ zIxkRwH)e>o$Wl5#EAS>B(n%yi{GG-=Cow+6Js|1g*nk3Pu^1&KZ#*i zWPmpauP=+>*!RU|WNPALEz=&74Hp(Y+fOta8&f7~dHkB9=1}c)nG}o)>uL zR9tHw-){+v+GIg47gH8jSD!TEIE+mN(~b$FrqQu&yfBbpT4A6?dCF07DBnmZd1wc5 zcpN1Xg$~@R?9cYZ#9nY9cF#SLkOF;ToELU1A@vPkZeC#YfsTc|7u!zCa}voj)=8Cb zBLVBc30-F7Lqv9*=q|v9*V9?g4{c*6TRYQBb{yNM<4Y0|bc5smJ~m}+xPPb}(|r+! zM`rGl%L#+T*r4ZICZ$guC0}yOcEiBYQ|sw@tMI2}1ET&c(Q#5g@?y{T!9(P#W zX0gKJ9*esW>9>Z5Q%*a@zrvmZWcT(e3tKW%O|TMg;kttU^v-rjDngP~o6NYe`? n)!s$2k|Nk1^+WgA*I)9HlN%6uHH!vYlm;tVdFUTlrBD9@kb}!4 From 0e28d86a7bbd3a20f8e83a571d3dfb87c5ea3383 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Thu, 5 May 2011 20:47:39 -0500 Subject: [PATCH 136/482] Fix for zip files, status in epub. --- fanficdownloader/epubmerge.py | 8 ++++++++ fanficdownloader/writers/base_writer.py | 4 ++++ fanficdownloader/writers/writer_epub.py | 11 ++++++++++- index.html | 16 +++++----------- utils/remover.py | 2 +- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/fanficdownloader/epubmerge.py b/fanficdownloader/epubmerge.py index 6d35be6a..88b0bcb6 100644 --- a/fanficdownloader/epubmerge.py +++ b/fanficdownloader/epubmerge.py @@ -81,6 +81,10 @@ def main(): outputepub = ZipFile(outputopt, "w", compression=ZIP_STORED) outputepub.debug = 3 outputepub.writestr("mimetype", "application/epub+zip") + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in outputepub.filelist: + zf.create_system = 0 outputepub.close() ## Re-open file for content. @@ -277,6 +281,10 @@ def main(): outputepub.writestr("content.opf",contentdom.toxml('utf-8')) outputepub.writestr("toc.ncx",tocncxdom.toxml('utf-8')) + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in outputepub.filelist: + zf.create_system = 0 outputepub.close() ## Utility method for creating new tags. diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index f8866d55..7e8d13c3 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -159,6 +159,10 @@ class BaseStoryWriter(Configurable): self.writeStoryImpl(out) zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED) zipout.writestr(filename,out.getvalue()) + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in zipout.filelist: + zf.create_system = 0 zipout.close() out.close() else: diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index a7d149c6..150ab7ab 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -153,11 +153,13 @@ h6 { text-align: center; } ## ZipFile can't change compression type file-by-file, so we ## have to close and re-open outputepub = ZipFile(zipio, 'w', compression=ZIP_STORED) + outputepub.debug=3 outputepub.writestr('mimetype','application/epub+zip') outputepub.close() ## Re-open file for content. outputepub = ZipFile(zipio, 'a', compression=ZIP_DEFLATED) + outputepub.debug=3 ## Create META-INF/container.xml file. The only thing it does is ## point to content.opf @@ -238,6 +240,8 @@ h6 { text-align: center; } metadata.appendChild(newTag(contentdom,"dc:description",text= self.getMetadata('description'))) + metadata.appendChild(newTag(contentdom,"dc:subject",text= + self.getMetadata('status'))) # listables all go into dc:suject tags, but only if they are configured. for (name,lst) in self.story.getLists().iteritems(): if name in self.getConfigList("include_subject_tags"): @@ -387,7 +391,12 @@ h6 { text-align: center; } fullhtml = fullhtml.replace('

    ','

    \n').replace('
    ','
    \n') outputepub.writestr("OEBPS/file%04d.xhtml"%(index+1),fullhtml.encode('utf-8')) del fullhtml - + + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + logging.debug("outputepub create_system") + for zf in outputepub.filelist: + zf.create_system = 0 outputepub.close() out.write(zipio.getvalue()) zipio.close() diff --git a/index.html b/index.html index 51d6cd57..b3ec8843 100644 --- a/index.html +++ b/index.html @@ -35,23 +35,17 @@ src="http://pagead2.googlesyndication.com/pagead/show_ads.js">

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier.

    -

    For Amazon Kindle use Mobi output(see notice below), for Sony Reader, Nook and iPad use ePub

    Or see your personal list of previously downloaded fanfics.

    -

    Experimental New Feature

    +

    Experimental New Version!

    - If you select EPub format, when it's done you will also be given a 'Convert' link. + This version is a new re-org/re-write of the code

    - That link will take you to convertfiles.com where you can - directly convert your new story to FictionBook (fb2), Mobipocket (mobi), MS Reader (lit) or Adobe Portable - Document Format(pdf). - There's also a 'Convert' link for EPubs on your recent downloads - page. We'd really like to hear from users about this in our Google Group. + So far, only a few sites are supported: fanfiction.net, twilighted.net and whofic.com.

    - We'd especially like Kindle and other Mobi users to try it. The convertfiles.com Mobi file - appears to be more correct than our Mobi output. + Mobi support (for Kindle) is only via EPub conversion in this version.

    @@ -66,7 +60,7 @@ src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> EPub HTML Plain Text -

    For Mobi (Kindle) select EPub and Convert it.

    +

    For Mobi (Kindle) select EPub and use the Convert link when it's finished.

    diff --git a/utils/remover.py b/utils/remover.py index d9aa8249..d81fe85f 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -21,7 +21,7 @@ class Remover(webapp.RequestHandler): logging.debug("Starting r3m0v3r") user = users.get_current_user() logging.debug("Working as user %s" % user) - theDate = datetime.date.today() - datetime.timedelta(days=5) + theDate = datetime.date.today() - datetime.timedelta(days=7) logging.debug("Will delete stuff older than %s" % theDate) fics = DownloadMeta.all() From 3e1f0dd53bcde6f61a261f3ef4c53805dd4dd4f6 Mon Sep 17 00:00:00 2001 From: sigizmund Date: Fri, 6 May 2011 11:24:27 +0100 Subject: [PATCH 137/482] Fixing screwed Google Analytics tracking code. I don't know how, but it nearly disappeared and our impressions went down from 250+ a day to around 30. --- index.html | 429 +++++++++++++++++++++++++++------------------------- recent.html | 158 +++++++++---------- status.html | 19 ++- 3 files changed, 318 insertions(+), 288 deletions(-) diff --git a/index.html b/index.html index 499d3be8..25abb32f 100644 --- a/index.html +++ b/index.html @@ -1,219 +1,232 @@ - - - Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza - - - - -
    -

    - FanFiction Downloader -

    + + + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + - -
    - - {{yourfile}} - + var _gaq = _gaq || []; + _gaq.push(['_setAccount', 'UA-12136939-1']); + _gaq.push(['_trackPageview']); - {% if authorized %} -
    -
    -
    -

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites - much easier.

    -

    For Amazon Kindle use Mobi output(see notice below), for Sony Reader, Nook and iPad use ePub

    -

    Or see your personal list of previously downloaded fanfics.

    -
    -

    Experimental New Feature

    -

    - If you select EPub format, when it's done you will also be given a 'Convert' link. -

    -

    - That link will take you to convertfiles.com where you can - directly convert your new story to FictionBook (fb2), Mobipocket (mobi), MS Reader (lit) or Adobe Portable - Document Format(pdf). - There's also a 'Convert' link for EPubs on your recent downloads - page. We'd really like to hear from users about this in our Google Group. -

    -

    - We'd especially like Kindle and other Mobi users to try it. The convertfiles.com Mobi file - appears to be more correct than our Mobi output. -

    + (function() { + var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; + ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; + var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); + })(); -
    - {{ error_message }} -
    - -
    - -
    -
    Ebook format
    -
    - EPub - HTML - Plain Text - Mobi (Kindle) -
    -
    - -
    -

    Login and Password

    -
    - If the story requires a login and - password to download, you may need - to provide your credentials to - download it, otherwise just leave - it empty. Currently only needed - by twilighted.net and twiwrite.net. -
    -
    -
    Login
    -
    -
    - -
    -
    Password
    -
    -
    -
    - -
    - -
    -
    - {% else %} -
    -
    -

    - This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you - can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. -

    -

    Login using Google account

    -
    -
    - {% endif %} - -
    -
    -
    fictionalley.org -
    Use the URL of the story's chapter list, such as -
    http://www.fictionalley.org/authors/drt/DA.html. Or the story text URL for - fictionalley.org one-shots, such as -
    http://www.fictionalley.org/authors/drt/JOTP01a.html. -
    fanfiction.net -
    Use the URL of any story chapter, with or without story title such as -
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or -
    http://www.fanfiction.net/s/2345466/3/. -
    fictionpress.com -
    Use the URL of any story chapter, such as -
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or -
    http://www.fictionpress.com/s/2847338/6/. -
    twilighted.net -
    Use the URL of the start of the story, such as -
    http://twilighted.net/viewstory.php?sid=8422. -
    twiwrite.net -
    Use the URL of the start of the story, such as -
    http://twiwrite.net/viewstory.php?sid=427. -
    ficwad.com -
    Use the URL of any story chapter, such as -
    http://www.ficwad.com/story/75246. -
    harrypotterfanfiction.com + + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + + {{yourfile}} + + + {% if authorized %} +
    +
    +
    +

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier.

    +

    For Amazon Kindle use Mobi output(see notice below), for Sony Reader, Nook and iPad use ePub

    +

    Or see your personal list of previously downloaded fanfics.

    +
    +

    Experimental New Feature

    +

    + If you select EPub format, when it's done you will also be given a 'Convert' link. +

    +

    + That link will take you to convertfiles.com where you can + directly convert your new story to FictionBook (fb2), Mobipocket (mobi), MS Reader (lit) or Adobe Portable + Document Format(pdf). + There's also a 'Convert' link for EPubs on your recent downloads + page. We'd really like to hear from users about this in our Google Group. +

    +

    + We'd especially like Kindle and other Mobi users to try it. The convertfiles.com Mobi file + appears to be more correct than our Mobi output. +

    + +
    + {{ error_message }} +
    + +
    + +
    +
    Ebook format
    +
    + EPub + HTML + Plain Text + Mobi (Kindle) +
    +
    + +
    +

    Login and Password

    +
    + If the story requires a login and + password to download, you may need + to provide your credentials to + download it, otherwise just leave + it empty. Currently only needed + by twilighted.net and twiwrite.net. +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    + +
    + +
    +
    + {% else %} +
    +
    +

    + This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. +

    +

    Login using Google account

    +
    +
    + {% endif %} + +
    +
    +
    fictionalley.org +
    Use the URL of the story's chapter list, such as +
    http://www.fictionalley.org/authors/drt/DA.html. Or the story text URL for + fictionalley.org one-shots, such as +
    http://www.fictionalley.org/authors/drt/JOTP01a.html. +
    fanfiction.net +
    Use the URL of any story chapter, with or without story title such as +
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
    http://www.fanfiction.net/s/2345466/3/. +
    fictionpress.com +
    Use the URL of any story chapter, such as +
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
    http://www.fictionpress.com/s/2847338/6/. +
    twilighted.net +
    Use the URL of the start of the story, such as +
    http://twilighted.net/viewstory.php?sid=8422. +
    twiwrite.net +
    Use the URL of the start of the story, such as +
    http://twiwrite.net/viewstory.php?sid=427. +
    ficwad.com +
    Use the URL of any story chapter, such as +
    http://www.ficwad.com/story/75246. +
    harrypotterfanfiction.com
    Use the URL of the story's chapter list, such as
    http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. -
    potionsandsnitches.net -
    Use the URL of the story's chapter list, such as -
    http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. -
    mediaminer.org -
    Use the URL of the story's chapter list, such as -
    http://www.mediaminer.org/fanfic/view_st.php/166653. - Or the story URL for one-shots, such as -
    http://www.mediaminer.org/fanfic/view_st.php/167618. -
    adastrafanfic.com -
    Use the URL of the story's chapter list, such as -
    http://www.adastrafanfic.com/viewstory.php?sid=854. -
    whofic.com -
    Use the URL of the story's chapter list, such as -
    http://www.whofic.com/viewstory.php?sid=16334. -
    +
    potionsandsnitches.net +
    Use the URL of the story's chapter list, such as +
    http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
    mediaminer.org +
    Use the URL of the story's chapter list, such as +
    http://www.mediaminer.org/fanfic/view_st.php/166653. + Or the story URL for one-shots, such as +
    http://www.mediaminer.org/fanfic/view_st.php/167618. +
    adastrafanfic.com +
    Use the URL of the story's chapter list, such as +
    http://www.adastrafanfic.com/viewstory.php?sid=854. +
    whofic.com +
    Use the URL of the story's chapter list, such as +
    http://www.whofic.com/viewstory.php?sid=16334. +
    - - A few additional things to know, which will make your life substantially easier: -
      -
    1. - First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password - is being verified by Google and is absolutely, totally unknown to anyone but you. -
    2. -
    3. - Small post written by me - — how to read fiction in Stanza or any other ebook reader. -
    4. -
    5. - You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. -
    6. -
    7. - Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep - Google happy about the app not going over the storage limit). -
    8. -
    9. - If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and - not something else. -
    10. -
    11. - If you think that something that should work in fact doesn't, drop me a mail - to sigizmund@gmail.com, or, even better, write an email to - our Google Group. I also encourage you to join it so - you will find out about latest updates and fixes as soon as possible -
    12. -
    - Otherwise, just have fun, and if you want to say thank you — use the contacts above. -
    -
    - Powered by Google App Engine -

    - FanfictionLoader is a web front-end to fanficdownloader
    - Copyright © Roman Kirillov -
    + + A few additional things to know, which will make your life substantially easier: +
      +
    1. + First thing to know: I do not use your login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
    2. +
    3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
    4. +
    5. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
    6. +
    7. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
    8. +
    9. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
    10. +
    11. + If you think that something that should work in fact doesn't, drop me a mail + to sigizmund@gmail.com, or, even better, write an email to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
    12. +
    + Otherwise, just have fun, and if you want to say thank you — use the contacts above. + +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    -
    - - + -
    - - - - - + + + + + + + diff --git a/recent.html b/recent.html index d03a621f..2db3ac92 100644 --- a/recent.html +++ b/recent.html @@ -1,80 +1,84 @@ - - - Fanfiction Downloader (fanfiction.net, fanficauthors, fictionalley, ficwad to epub and HTML) - - - -
    -

    - FanFiction Downloader -

    - - - - - {{yourfile}} - - -
    -
    - Hi, {{ nickname }}! These are the fanfics you've recently requested. -
    -
    - -
    - {% for fic in fics %} -

    - {% if fic.completed %} - Download {{ fic.title }} - by {{ fic.author }} ({{ fic.format }})
    - {% if fic.escaped_url %} - Convert {{ fic.title }} to other formats
    - {% endif %} - {% endif %} - {% if fic.failure %} -

    {{ fic.failure }}
    - {% endif %} - {% if not fic.completed and not fic.failure %} - Request Processing...
    - {% endif %} - {{ fic.url }} - -

    - {% endfor %} -
    - - - - -
    - - - + + + Fanfiction Downloader (fanfiction.net, fanficauthors, fictionalley, ficwad to epub and HTML) + + + + +
    +

    + FanFiction Downloader +

    + + + + + {{yourfile}} + + +
    +
    + Hi, {{ nickname }}! These are the fanfics you've recently requested. +
    +
    + +
    + {% for fic in fics %} +

    + {% if fic.completed %} + Download {{ fic.title }} + by {{ fic.author }} ({{ fic.format }})
    + {% if fic.escaped_url %} + Convert {{ fic.title }} to other formats
    + {% endif %} + {% endif %} + {% if fic.failure %} +

    {{ fic.failure }}
    + {% endif %} + {% if not fic.completed and not fic.failure %} + Request Processing...
    + {% endif %} + {{ fic.url }} + +

    + {% endfor %} +
    + + + + +
    + diff --git a/status.html b/status.html index cb70cb0e..b8c22a57 100644 --- a/status.html +++ b/status.html @@ -8,6 +8,19 @@ {% if not fic.completed and not fic.failure %} {% endif %} +
    @@ -37,9 +50,9 @@

    Your fic has finished processing and you can download it now:

    Download {{ fic.title }} by {{ fic.author }} ({{ fic.format }})

    - {% if escaped_url %} -

    Convert {{ fic.title }} to other formats

    - {% endif %} + {% if escaped_url %} +

    Convert {{ fic.title }} to other formats

    + {% endif %} {% else %} {% if fic.failure %} Your fic failed to process. Please check the URL and the error message below.
    From 605c4c40fc7f19b4de4612505d1cfb6cb83cfeb6 Mon Sep 17 00:00:00 2001 From: sigizmund Date: Fri, 6 May 2011 12:00:50 +0100 Subject: [PATCH 138/482] Mis-aligned ad block --- index.html | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/index.html b/index.html index 25abb32f..77335146 100644 --- a/index.html +++ b/index.html @@ -215,18 +215,6 @@ src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
    - - - From 9515ff00be97c1b6d46879d07ccd6bf31c0121e4 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 6 May 2011 12:42:20 -0500 Subject: [PATCH 139/482] Tweak output a bit, allow any 'entry' as epub subject. --- defaults.ini | 8 ++++---- fanficdownloader/epubmerge.py | 4 ---- fanficdownloader/writers/writer_epub.py | 10 ++++++---- fanficdownloader/writers/writer_html.py | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/defaults.ini b/defaults.ini index f0029adf..eea3f4ae 100644 --- a/defaults.ini +++ b/defaults.ini @@ -28,7 +28,7 @@ # formatext ## items to include in title page -titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyId,authorId,extratags,description +titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description ## include title page as first page. include_titlepage: true @@ -73,10 +73,10 @@ windows_eol: true ## epub is already a zip file. zip_output: false -# possible subject tags: extratags, genre, category, warnings, lastupdate +# entries tags to make epub subject tags # lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d" -include_subject_tags: extratags, genre, category, lastupdate -include_tocpage: false +include_subject_tags: extratags, genre, category, lastupdate, status +#include_tocpage: false # epub->mobi conversions typically don't like tables. titlepage_use_table: true diff --git a/fanficdownloader/epubmerge.py b/fanficdownloader/epubmerge.py index 88b0bcb6..9e518060 100644 --- a/fanficdownloader/epubmerge.py +++ b/fanficdownloader/epubmerge.py @@ -81,10 +81,6 @@ def main(): outputepub = ZipFile(outputopt, "w", compression=ZIP_STORED) outputepub.debug = 3 outputepub.writestr("mimetype", "application/epub+zip") - # declares all the files created by Windows. otherwise, when - # it runs in appengine, windows unzips the files as 000 perms. - for zf in outputepub.filelist: - zf.create_system = 0 outputepub.close() ## Re-open file for content. diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index 150ab7ab..52f18dcb 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -80,7 +80,7 @@ h6 { text-align: center; } -

    ${title} by ${author}

    +

    ${title} by ${author}

    ''') @@ -240,8 +240,11 @@ h6 { text-align: center; } metadata.appendChild(newTag(contentdom,"dc:description",text= self.getMetadata('description'))) - metadata.appendChild(newTag(contentdom,"dc:subject",text= - self.getMetadata('status'))) + for entry in self.titleLabels.keys(): + if entry in self.getConfigList("include_subject_tags") and \ + self.story.getMetadata(entry): + metadata.appendChild(newTag(contentdom,"dc:subject",text= + self.getMetadata(entry))) # listables all go into dc:suject tags, but only if they are configured. for (name,lst) in self.story.getLists().iteritems(): if name in self.getConfigList("include_subject_tags"): @@ -394,7 +397,6 @@ h6 { text-align: center; } # declares all the files created by Windows. otherwise, when # it runs in appengine, windows unzips the files as 000 perms. - logging.debug("outputepub create_system") for zf in outputepub.filelist: zf.create_system = 0 outputepub.close() diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py index 0c040084..378fb5b7 100644 --- a/fanficdownloader/writers/writer_html.py +++ b/fanficdownloader/writers/writer_html.py @@ -25,7 +25,7 @@ class HTMLWriter(BaseStoryWriter): ${title} by ${author} -

    ${title} by ${author}

    +

    ${title} by ${author}

    ''') self.HTML_TITLE_PAGE_START = string.Template(''' From d3b1ddcda99645b2e1a9a254447d272329c63ab5 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 6 May 2011 19:59:04 -0500 Subject: [PATCH 140/482] Added tag fanficdownloader-3.0.2 for changeset 564ada569d46 From d91f2e74fd207c3a9b081ac758e6a7bf840d531c Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 6 May 2011 20:33:54 -0500 Subject: [PATCH 141/482] Only ask for Login/Password when required. Save and use user configuration. --- defaults.ini | 222 +++--- editconfig.html | 83 +++ fanficdownloader/adapters/adapter_test1.py | 8 +- .../adapters/adapter_twilightednet.py | 5 +- fanficdownloader/adapters/base_adapter.py | 1 + ffstorage.py | 4 + index.html | 35 +- login.html | 96 +++ main.py | 696 ++++++++++-------- newdownload.py | 2 +- 10 files changed, 687 insertions(+), 465 deletions(-) create mode 100644 editconfig.html create mode 100644 login.html diff --git a/defaults.ini b/defaults.ini index eea3f4ae..ec3f8c41 100644 --- a/defaults.ini +++ b/defaults.ini @@ -1,111 +1,111 @@ -[defaults] - -## [defaults] section applies to all formats and sites but may be -## overridden. - -# All available titlepage_entries: -# category -# genre -# status -# datePublished -# dateUpdated -# dateCreated -# rating -# warnings -# numChapters -# numWords -# site -# siteabbrev -# author -# authorId -# authorURL -# title -# storyId -# storyUrl -# extratags -# description -# formatname -# formatext - -## items to include in title page -titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description - -## include title page as first page. -include_titlepage: true - -## include TOC page immediately after title page. -include_tocpage: true - -## python string Template, string with ${title}, ${author} etc, same as titlepage_entries -## Can include directories. ${formatext} will be added if not in name somewhere. -output_filename: ${title}-${siteabbrev}_${storyId}${formatext} -## Make directories as needed. -make_directories: true - -## put output (with output_filename) in a zip file zip_filename. -zip_output: false -## Can include directories. .zip will be added if not in name somewhere -zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip - -## try to make the output file name 'safe'--remove invalid filename chars. -## applies to both output_filename & zip_filename -safe_filename: true - -## extra tags (comma separated) to include, primarily for epub. -extratags: FanFiction - -## number of seconds to sleep between calls to the story site. -#slow_down_sleep_time:0.5 - -## Each output format has a section that overrides [defaults] - -[html] - -[txt] -## Add URLs since there aren't links. -titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,extratags,storyUrl, author URL, description - -# use \r\n for line endings, the windows convention. txt output only. -windows_eol: true - -[epub] - -## epub is already a zip file. -zip_output: false - -# entries tags to make epub subject tags -# lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d" -include_subject_tags: extratags, genre, category, lastupdate, status -#include_tocpage: false - -# epub->mobi conversions typically don't like tables. -titlepage_use_table: true - -## When using tables, make these span both columns. -wide_titlepage_entries: description, storyUrl, author URL - - -## Each site has a section that overrides [defaults] *and* the format section -[test1.com] -#titlepage_entries: title,description,category,genre, status,dateCreated,rating,numChapters,numWords,extratags,description,storyUrl,extratags -extratags: FanFiction,Testing - -## If necessary, you can define [:] sections to customize -## the formats differently for the same site. Overrides defaults, format and site. -[test1.com:txt] -extratags: FanFiction,Testing,Text - -[test1.com:html] -extratags: FanFiction,Testing,HTML - -[www.twilighted.net] -## Some sites require login (or login for some rated stories) -## The program can prompt you, or you can save it in config. -## This should go in your personal.ini, not defaults.ini. -#username:YourName -#password:yourpassword - -[www.whofic.com] - -[www.fanfiction.net] - +[defaults] + +## [defaults] section applies to all formats and sites but may be +## overridden. + +# All available titlepage_entries: +# category +# genre +# status +# datePublished +# dateUpdated +# dateCreated +# rating +# warnings +# numChapters +# numWords +# site +# siteabbrev +# author +# authorId +# authorURL +# title +# storyId +# storyUrl +# extratags +# description +# formatname +# formatext + +## items to include in title page +titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description + +## include title page as first page. +include_titlepage: true + +## include TOC page immediately after title page. +include_tocpage: true + +## python string Template, string with ${title}, ${author} etc, same as titlepage_entries +## Can include directories. ${formatext} will be added if not in name somewhere. +output_filename: ${title}-${siteabbrev}_${storyId}${formatext} +## Make directories as needed. +make_directories: true + +## put output (with output_filename) in a zip file zip_filename. +zip_output: false +## Can include directories. .zip will be added if not in name somewhere +zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip + +## try to make the output file name 'safe'--remove invalid filename chars. +## applies to both output_filename & zip_filename +safe_filename: true + +## extra tags (comma separated) to include, primarily for epub. +extratags: FanFiction + +## number of seconds to sleep between calls to the story site. +#slow_down_sleep_time:0.5 + +## Each output format has a section that overrides [defaults] + +[html] + +[txt] +## Add URLs since there aren't links. +titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,extratags,storyUrl, author URL, description + +# use \r\n for line endings, the windows convention. txt output only. +windows_eol: true + +[epub] + +## epub is already a zip file. +zip_output: false + +# entries tags to make epub subject tags +# lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d" +include_subject_tags: extratags, genre, category, lastupdate, status +#include_tocpage: false + +# epub->mobi conversions typically don't like tables. +titlepage_use_table: true + +## When using tables, make these span both columns. +wide_titlepage_entries: description, storyUrl, author URL + + +## Each site has a section that overrides [defaults] *and* the format section +[test1.com] +#titlepage_entries: title,description,category,genre, status,dateCreated,rating,numChapters,numWords,extratags,description,storyUrl,extratags +extratags: FanFiction,Testing + +## If necessary, you can define [:] sections to customize +## the formats differently for the same site. Overrides defaults, format and site. +[test1.com:txt] +extratags: FanFiction,Testing,Text + +[test1.com:html] +extratags: FanFiction,Testing,HTML + +[www.twilighted.net] +## Some sites require login (or login for some rated stories) +## The program can prompt you, or you can save it in config. +## This should go in your personal.ini, not defaults.ini. +#username:YourName +#password:yourpassword + +[www.whofic.com] + +[www.fanfiction.net] + diff --git a/editconfig.html b/editconfig.html new file mode 100644 index 00000000..9b59fd63 --- /dev/null +++ b/editconfig.html @@ -0,0 +1,83 @@ + + + + + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + +
    + +
    +

    Edit Config

    +
    + Editing configuration for {{ nickname }}. + {% if default %} Default values are shown. {% else %} Empty this box and Save to go back to use the default values. {% endif %} +
    +
    + +
    +
    + +
    + +
    + + +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + +
    +
    + + diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index fdf9cb87..3f4af12e 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -15,6 +15,7 @@ class TestSiteAdapter(BaseSiteAdapter): self.crazystring = u" crazy tests:[bare amp(&) quote(') amp(&) gt(>) lt(<) ATnT(AT&T) pound(£)]" # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + self.username='' @staticmethod def getSiteDomain(): @@ -31,8 +32,11 @@ class TestSiteAdapter(BaseSiteAdapter): if self.story.getMetadata('storyId') == '666': raise exceptions.StoryDoesNotExist(self.url) - if self.story.getMetadata('storyId') == '668': - raise exceptions.FailedToLogin(self.url,"FakeUser") + if self.getConfig("username"): + self.username = self.getConfig("username") + + if self.story.getMetadata('storyId') == '668' and self.username != "Me" : + raise exceptions.FailedToLogin(self.url,self.username) self.story.setMetadata(u'title',"Test Story Title "+self.crazystring) self.story.setMetadata('storyUrl',self.url) diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index f3d64064..751fb9c3 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -72,7 +72,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): d = self._fetchUrl(loginUrl, urlvals) - if self.needToLoginCheck(d) : + if "Member Account" not in d : #Member Account logging.info("Failed to login to URL %s as %s" % (loginUrl, data['penname'])) raise exceptions.FailedToLogin(url,data['penname']) @@ -98,6 +98,9 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): self.performLogin(url) data = self._fetchUrl(url) + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + # use BeautifulSoup HTML parser to make everything easier to find. soup = bs.BeautifulSoup(data) diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index d4233d08..67c0033a 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -82,6 +82,7 @@ class BaseSiteAdapter(Configurable): ## URL pattern validation is done *after* picking an adaptor based ## on domain instead of *as* the adaptor selector so we can offer ## the user example(s) for that particular site. + ## Override validateURL(self) instead if you need more control. def getSiteURLPattern(self): "Used to validate URL. Should be override in each adapter class." return '^http://'+re.escape(self.getSiteDomain()) diff --git a/ffstorage.py b/ffstorage.py index bb17d8bb..dae09352 100644 --- a/ffstorage.py +++ b/ffstorage.py @@ -17,3 +17,7 @@ class DownloadData(db.Model): collection_name='data_chunks') blob = db.BlobProperty() index = db.IntegerProperty() + +class UserConfig(db.Model): + user = db.UserProperty() + config = db.TextProperty() diff --git a/index.html b/index.html index 5af8067c..940a5cad 100644 --- a/index.html +++ b/index.html @@ -53,11 +53,14 @@

    Experimental New Version!

    - This version is a new re-org/re-write of the code + This version is a new re-org/re-write of the code.

    So far, only a few sites are supported: fanfiction.net, twilighted.net and whofic.com.

    +

    + Login/Password is only asked for when required now. +

    Mobi support (for Kindle) is only via EPub conversion in this version.

    @@ -65,10 +68,11 @@
    {{ error_message }}
    -
    +
    URL:
    +
    Ebook format
    EPub @@ -76,30 +80,13 @@ Plain Text

    For Mobi (Kindle) select EPub and use the Convert link when it's finished.

    -
    - -
    -

    Login and Password

    -
    - If the story requires a login and password to download, - you may need to provide your credentials to download it, - otherwise just leave it empty. Currently only needed by - twilighted.net and twiwrite.net. -
    -
    -
    Login
    -
    -
    - -
    -
    Password
    -
    +
    +
    - -
    - -
    +
    + Customize your User Configuration. +
    {% else %}
    diff --git a/login.html b/login.html new file mode 100644 index 00000000..e54141cd --- /dev/null +++ b/login.html @@ -0,0 +1,96 @@ + + + + + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + + {% if fic.failure %} +
    + {{ fic.failure }} +
    + {% endif %} +
    +
    +

    Login and Password

    +
    + {{ site }} requires a Login/Password for this story. + You need to provide your Login/Password for {{ site }} + to download it. +
    + + +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    + +
    + +
    + + +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + +
    +
    + + diff --git a/main.py b/main.py index e2625b31..08838957 100644 --- a/main.py +++ b/main.py @@ -47,371 +47,415 @@ from fanficdownloader.zipdir import * from ffstorage import * -from fanficdownloader import adapters, writers +from fanficdownloader import adapters, writers, exceptions import ConfigParser -class LoginRequired(webapp.RequestHandler): - def get(self): - user = users.get_current_user() - if user: - self.redirect('/') - return - else: - logging.debug(users.create_login_url('/')) - url = users.create_login_url(self.request.uri) - template_values = {'login_url' : url} - path = os.path.join(os.path.dirname(__file__), 'index-nonlogin.html') - self.response.out.write(template.render(path, template_values)) - class MainHandler(webapp.RequestHandler): - def get(self): - user = users.get_current_user() - if user: - error = self.request.get('error') - template_values = {'nickname' : user.nickname(), 'authorized': True} - url = self.request.get('url') - template_values['url'] = url - - if error != None and len(error) > 1: - if error == 'login_required': - template_values['error_message'] = 'This story (or one of the chapters) requires you to be logged in.' - elif error == 'bad_url': - template_values['error_message'] = 'Unsupported URL: ' + url - elif error == 'custom': - template_values['error_message'] = 'Error happened: ' + self.request.get('errtext') - - filename = self.request.get('file') - if len(filename) > 1: - template_values['yourfile'] = '''''' % (filename, self.request.get('name'), self.request.get('author')) - - self.response.headers['Content-Type'] = 'text/html' - path = os.path.join(os.path.dirname(__file__), 'index.html') + def get(self): + user = users.get_current_user() + if user: + error = self.request.get('error') + template_values = {'nickname' : user.nickname(), 'authorized': True} + url = self.request.get('url') + template_values['url'] = url + + if error: + if error == 'login_required': + template_values['error_message'] = 'This story (or one of the chapters) requires you to be logged in.' + elif error == 'bad_url': + template_values['error_message'] = 'Unsupported URL: ' + url + elif error == 'custom': + template_values['error_message'] = 'Error happened: ' + self.request.get('errtext') + elif error == 'configsaved': + template_values['error_message'] = 'Configuration Saved' + + filename = self.request.get('file') + if len(filename) > 1: + template_values['yourfile'] = '''''' % (filename, self.request.get('name'), self.request.get('author')) + + self.response.headers['Content-Type'] = 'text/html' + path = os.path.join(os.path.dirname(__file__), 'index.html') - self.response.out.write(template.render(path, template_values)) - else: - logging.debug(users.create_login_url('/')) - url = users.create_login_url(self.request.uri) - template_values = {'login_url' : url, 'authorized': False} - path = os.path.join(os.path.dirname(__file__), 'index.html') - self.response.out.write(template.render(path, template_values)) + self.response.out.write(template.render(path, template_values)) + else: + logging.debug(users.create_login_url('/')) + url = users.create_login_url(self.request.uri) + template_values = {'login_url' : url, 'authorized': False} + path = os.path.join(os.path.dirname(__file__), 'index.html') + self.response.out.write(template.render(path, template_values)) +class EditConfigServer(webapp.RequestHandler): + def get(self): + self.post() + + def post(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + template_values = {'nickname' : user.nickname(), 'authorized': True} + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l: + uconfig=l[0] + else: + uconfig=None + + if self.request.get('update'): + if uconfig is None: + uconfig = UserConfig() + uconfig.user = user + uconfig.config = self.request.get('config').encode('utf8')[:1000000] ## just in case. + uconfig.put() + self.redirect("/?error=configsaved") + else: # not update, assume display for edit + if uconfig is not None and uconfig.config: + config = uconfig.config + else: + template_values['default'] = True + configfile = open("defaults.ini","rb") + config = configfile.read() + configfile.close() + template_values['config'] = config + path = os.path.join(os.path.dirname(__file__), 'editconfig.html') + self.response.headers['Content-Type'] = 'text/html' + self.response.out.write(template.render(path, template_values)) + + class FileServer(webapp.RequestHandler): - def get(self): - fileId = self.request.get('id') - - if fileId == None or len(fileId) < 3: - self.redirect('/') - return - - key = db.Key(fileId) - fanfic = db.get(key) + def get(self): + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + return + + key = db.Key(fileId) + fanfic = db.get(key) - # check for completed & failure. - - name = fanfic.name.encode('utf-8') - - #name = urllib.quote(name) - - logging.info("Serving file: %s" % name) + # check for completed & failure. + + name = fanfic.name.encode('utf-8') + + #name = urllib.quote(name) + + logging.info("Serving file: %s" % name) - if name.endswith('.epub'): - self.response.headers['Content-Type'] = 'application/epub+zip' - elif name.endswith('.html'): - self.response.headers['Content-Type'] = 'text/html' - elif name.endswith('.txt'): - self.response.headers['Content-Type'] = 'text/plain' - elif name.endswith('.zip'): - self.response.headers['Content-Type'] = 'application/zip' - else: - self.response.headers['Content-Type'] = 'application/octet-stream' - - self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name + if name.endswith('.epub'): + self.response.headers['Content-Type'] = 'application/epub+zip' + elif name.endswith('.html'): + self.response.headers['Content-Type'] = 'text/html' + elif name.endswith('.txt'): + self.response.headers['Content-Type'] = 'text/plain' + elif name.endswith('.zip'): + self.response.headers['Content-Type'] = 'application/zip' + else: + self.response.headers['Content-Type'] = 'application/octet-stream' + + self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name - data = DownloadData.all().filter("download =", fanfic).order("index") - # epubs are all already compressed. - # Each chunk is compress individually to avoid having - # to hold the whole in memory just for the - # compress/uncompress - if fanfic.format != 'epub': - def dc(data): - try: - return zlib.decompress(data) - # if error, assume it's a chunk from before we started compessing. - except zlib.error: - return data - else: - def dc(data): - return data - - for datum in data: - self.response.out.write(dc(datum.blob)) + data = DownloadData.all().filter("download =", fanfic).order("index") + # epubs are all already compressed. + # Each chunk is compress individually to avoid having + # to hold the whole in memory just for the + # compress/uncompress + if fanfic.format != 'epub': + def dc(data): + try: + return zlib.decompress(data) + # if error, assume it's a chunk from before we started compessing. + except zlib.error: + return data + else: + def dc(data): + return data + + for datum in data: + self.response.out.write(dc(datum.blob)) class FileStatusServer(webapp.RequestHandler): - def get(self): - user = users.get_current_user() - if not user: - self.redirect(users.create_login_url(self.request.uri)) - return - - fileId = self.request.get('id') - - if fileId == None or len(fileId) < 3: - self.redirect('/') - - key = db.Key(fileId) - fic = db.get(key) + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + + key = db.Key(fileId) + fic = db.get(key) - logging.info("Status url: %s" % fic.url) - if fic.completed and fic.format=='epub': - escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+fileId+"&fake=file."+fic.format) - else: - escaped_url=False - template_values = dict(fic = fic, - nickname = user.nickname(), - escaped_url = escaped_url - ) - path = os.path.join(os.path.dirname(__file__), 'status.html') - self.response.out.write(template.render(path, template_values)) - + logging.info("Status url: %s" % fic.url) + if fic.completed and fic.format=='epub': + escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+fileId+"&fake=file."+fic.format) + else: + escaped_url=False + template_values = dict(fic = fic, + nickname = user.nickname(), + escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + class RecentFilesServer(webapp.RequestHandler): - def get(self): - user = users.get_current_user() - if not user: - self.redirect(users.create_login_url(self.request.uri)) - return - - q = DownloadMeta.all() - q.filter('user =', user).order('-date') - fics = q.fetch(100) - - for fic in fics: - if fic.completed and fic.format == 'epub': - fic.escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+str(fic.key())+"&fake=file."+fic.format) - - template_values = dict(fics = fics, nickname = user.nickname()) - path = os.path.join(os.path.dirname(__file__), 'recent.html') - self.response.out.write(template.render(path, template_values)) - -class RecentAllFilesServer(webapp.RequestHandler): - def get(self): - user = users.get_current_user() - if user.nickname() != 'sigizmund': - return - - fics = db.GqlQuery("Select * From DownloadedFanfic") - template_values = dict(fics = fics, nickname = user.nickname()) - path = os.path.join(os.path.dirname(__file__), 'recent.html') - self.response.out.write(template.render(path, template_values)) + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + q = DownloadMeta.all() + q.filter('user =', user).order('-date') + fics = q.fetch(100) + for fic in fics: + if fic.completed and fic.format == 'epub': + fic.escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+str(fic.key())+"&fake=file."+fic.format) + + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + class FanfictionDownloader(webapp.RequestHandler): - def get(self): - self.post() + def get(self): + self.post() - def post(self): - logging.getLogger().setLevel(logging.DEBUG) - - user = users.get_current_user() - if not user: - self.redirect(users.create_login_url(self.request.uri)) - return - - format = self.request.get('format') - url = self.request.get('url') - login = self.request.get('login') - password = self.request.get('password') - - logging.info("Queuing Download: " + url) + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + format = self.request.get('format') + url = self.request.get('url') + login = self.request.get('login') + password = self.request.get('password') + + logging.info("Queuing Download: " + url) - # use existing record if available. - q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) - if( q is None or len(q) < 1 ): - download = DownloadMeta() - else: - download = q[0] - download.completed=False - download.failure=None - for c in download.data_chunks: - c.delete() - - download.user = user - download.url = url - download.format = format + # use existing record if available. + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is None or len(q) < 1 ): + download = DownloadMeta() + else: + download = q[0] + download.completed=False + download.failure=None + for c in download.data_chunks: + c.delete() + + download.user = user + download.url = url + download.format = format - adapter = None - - try: - config = ConfigParser.ConfigParser() - logging.debug('reading defaults.ini config file, if present') - config.read('defaults.ini') - logging.debug('reading appengine.ini config file, if present') - config.read('appengine.ini') - adapter = adapters.getAdapter(config,url) - logging.info('Created an adaper: %s' % adapter) - - if len(login) > 1: - adapter.username=login - adapter.password=password - ## This scrapes the metadata, which will be - ## duplicated in the queue task, but it - ## detects bad URLs, bad login, bad story, etc - ## without waiting for the queue. So I think - ## it's worth the double up. Could maybe save - ## it all in the download object someday. - story = adapter.getStoryMetadataOnly() - download.title = story.getMetadata('title') - download.author = story.getMetadata('author') - download.put() + adapter = None + try: + config = ConfigParser.SafeConfigParser() - taskqueue.add(url='/fdowntask', - queue_name="download", - params={'format':format, - 'url':url, - 'login':login, - 'password':password, - 'user':user.email()}) - - logging.info("enqueued download key: " + str(download.key())) + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l: + uconfig=l[0] + logging.debug('reading config from UserConfig') + config.readfp(StringIO.StringIO(uconfig.config)) + else: + logging.debug('reading defaults.ini config file') + config.read('defaults.ini') + + adapter = adapters.getAdapter(config,url) + logging.info('Created an adaper: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password + ## This scrapes the metadata, which will be + ## duplicated in the queue task, but it + ## detects bad URLs, bad login, bad story, etc + ## without waiting for the queue. So I think + ## it's worth the double up. Could maybe save + ## it all in the download object someday. + story = adapter.getStoryMetadataOnly() + download.title = story.getMetadata('title') + download.author = story.getMetadata('author') + download.put() - except Exception, e: - logging.exception(e) - download.failure = str(e) - download.put() - - self.redirect('/status?id='+str(download.key())) + taskqueue.add(url='/fdowntask', + queue_name="download", + params={'format':format, + 'url':url, + 'login':login, + 'password':password, + 'user':user.email()}) + + logging.info("enqueued download key: " + str(download.key())) - return + except exceptions.FailedToLogin, e: + logging.exception(e) + download.failure = str(e) + download.put() + logging.debug('Need to Login, display log in page') + template_values = dict(nickname = user.nickname(), + url = url, + format = format, + site = adapter.getSiteDomain(), + fic = download + ) + path = os.path.join(os.path.dirname(__file__), 'login.html') + self.response.out.write(template.render(path, template_values)) + return + except Exception, e: + logging.exception(e) + download.failure = str(e) + download.put() + + self.redirect('/status?id='+str(download.key())) + + return class FanfictionDownloaderTask(webapp.RequestHandler): - def _printableVersion(self, text): - text = removeEntities(text) - try: - d = text.decode('utf-8') - except: - d = text - return d - + def _printableVersion(self, text): + text = removeEntities(text) + try: + d = text.decode('utf-8') + except: + d = text + return d + - def post(self): - logging.getLogger().setLevel(logging.DEBUG) - - format = self.request.get('format') - url = self.request.get('url') - login = self.request.get('login') - password = self.request.get('password') - # User object can't pass, just email address - user = users.User(self.request.get('user')) - - logging.info("Downloading: " + url + " for user: "+user.nickname()) - - adapter = None - writerClass = None + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + format = self.request.get('format') + url = self.request.get('url') + login = self.request.get('login') + password = self.request.get('password') + # User object can't pass, just email address + user = users.User(self.request.get('user')) + + logging.info("Downloading: " + url + " for user: "+user.nickname()) + + adapter = None + writerClass = None - # use existing record if available. - q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) - if( q is None or len(q) < 1 ): - download = DownloadMeta() - else: - download = q[0] - download.completed=False - for c in download.data_chunks: - c.delete() - - download.user = user - download.url = url - download.format = format - download.put() - logging.info('Creating adapter...') - - try: - config = ConfigParser.ConfigParser() - logging.debug('reading defaults.ini config file, if present') - config.read('defaults.ini') - logging.debug('reading appengine.ini config file, if present') - config.read('appengine.ini') - adapter = adapters.getAdapter(config,url) - except Exception, e: - logging.exception(e) - download.failure = str(e) - download.put() - return - - logging.info('Created an adaper: %s' % adapter) - - if len(login) > 1: - adapter.username=login - adapter.password=password + # use existing record if available. + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is None or len(q) < 1 ): + download = DownloadMeta() + else: + download = q[0] + download.completed=False + for c in download.data_chunks: + c.delete() + + download.user = user + download.url = url + download.format = format + download.put() + logging.info('Creating adapter...') + + try: + config = ConfigParser.ConfigParser() + config = ConfigParser.SafeConfigParser() - try: - # adapter.getStory() is what does all the heavy lifting. - writer = writers.getWriter(format,config,adapter.getStory()) - except Exception, e: - logging.exception(e) - download.failure = str(e) - download.put() - return - - download.name = writer.getOutputFileName() - download.title = adapter.getStory().getMetadata('title') - download.author = adapter.getStory().getMetadata('author') - download.put() - index=0 + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l: + uconfig=l[0] + logging.debug('reading config from UserConfig') + config.readfp(StringIO.StringIO(uconfig.config)) + else: + logging.debug('reading defaults.ini config file') + config.read('defaults.ini') + + adapter = adapters.getAdapter(config,url) + except Exception, e: + logging.exception(e) + download.failure = str(e) + download.put() + return + + logging.info('Created an adaper: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password - outbuffer = StringIO.StringIO() - writer.writeStory(outbuffer) - data = outbuffer.getvalue() - outbuffer.close() - del writer - del adapter + try: + # adapter.getStory() is what does all the heavy lifting. + writer = writers.getWriter(format,config,adapter.getStory()) + except Exception, e: + logging.exception(e) + download.failure = str(e) + download.put() + return + + download.name = writer.getOutputFileName() + download.title = adapter.getStory().getMetadata('title') + download.author = adapter.getStory().getMetadata('author') + download.put() + index=0 - # epubs are all already compressed. - # Each chunk is compressed individually to avoid having - # to hold the whole in memory just for the - # compress/uncompress. - if format != 'epub': - def c(data): - return zlib.compress(data) - else: - def c(data): - return data - - while( len(data) > 0 ): - DownloadData(download=download, - index=index, - blob=c(data[:1000000])).put() - index += 1 - data = data[1000000:] - download.completed=True - download.put() - - logging.info("Download finished OK") - return - + outbuffer = StringIO.StringIO() + writer.writeStory(outbuffer) + data = outbuffer.getvalue() + outbuffer.close() + del writer + del adapter + + # epubs are all already compressed. + # Each chunk is compressed individually to avoid having + # to hold the whole in memory just for the + # compress/uncompress. + if format != 'epub': + def c(data): + return zlib.compress(data) + else: + def c(data): + return data + + while( len(data) > 0 ): + DownloadData(download=download, + index=index, + blob=c(data[:1000000])).put() + index += 1 + data = data[1000000:] + download.completed=True + download.put() + + logging.info("Download finished OK") + return + def toPercentDecimal(match): - "Return the %decimal number for the character for url escaping" - s = match.group(1) - return "%%%02x" % ord(s) + "Return the %decimal number for the character for url escaping" + s = match.group(1) + return "%%%02x" % ord(s) def urlEscape(data): - "Escape text, including unicode, for use in URLs" - p = re.compile(r'([^\w])') - return p.sub(toPercentDecimal, data.encode("utf-8")) + "Escape text, including unicode, for use in URLs" + p = re.compile(r'([^\w])') + return p.sub(toPercentDecimal, data.encode("utf-8")) def main(): application = webapp.WSGIApplication([('/', MainHandler), - ('/fdowntask', FanfictionDownloaderTask), - ('/fdown', FanfictionDownloader), - (r'/file.*', FileServer), - ('/status', FileStatusServer), - ('/recent', RecentFilesServer), - ('/r2d2', RecentAllFilesServer), - ('/login', LoginRequired)], + ('/fdowntask', FanfictionDownloaderTask), + ('/fdown', FanfictionDownloader), + (r'/file.*', FileServer), + ('/status', FileStatusServer), + ('/recent', RecentFilesServer), + ('/editconfig', EditConfigServer), + ], debug=False) util.run_wsgi_app(application) if __name__ == '__main__': - logging.getLogger().setLevel(logging.DEBUG) - main() + logging.getLogger().setLevel(logging.DEBUG) + main() diff --git a/newdownload.py b/newdownload.py index 49c12697..550ad5fd 100644 --- a/newdownload.py +++ b/newdownload.py @@ -10,7 +10,7 @@ from fanficdownloader import adapters,writers,exceptions import ConfigParser -config = ConfigParser.ConfigParser() +config = ConfigParser.SafeConfigParser() logging.debug('reading defaults.ini config file, if present') config.read('defaults.ini') From 87a57cc286301823e5068c9ae21eb2b0d59c5b36 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sat, 7 May 2011 12:51:03 -0500 Subject: [PATCH 142/482] Add (partial) CLI options, fixes to site editconfig. --- defaults.ini | 2 +- fanficdownloader/adapters/adapter_test1.py | 28 +++++-- fanficdownloader/adapters/base_adapter.py | 2 + fanficdownloader/writers/base_writer.py | 6 ++ fanficdownloader/writers/writer_epub.py | 2 +- main.py | 50 ++++++------ newdownload.py | 89 ++++++++++++++-------- 7 files changed, 111 insertions(+), 68 deletions(-) diff --git a/defaults.ini b/defaults.ini index ec3f8c41..1d47e81c 100644 --- a/defaults.ini +++ b/defaults.ini @@ -79,7 +79,7 @@ include_subject_tags: extratags, genre, category, lastupdate, status #include_tocpage: false # epub->mobi conversions typically don't like tables. -titlepage_use_table: true +titlepage_use_table: false ## When using tables, make these span both columns. wide_titlepage_entries: description, storyUrl, author URL diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 3f4af12e..47f00dbc 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -46,7 +46,10 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" ''') self.story.setMetadata('datePublished',datetime.date(1972, 01, 31)) self.story.setMetadata('dateCreated',datetime.datetime.now()) - self.story.setMetadata('dateUpdated',datetime.date(1975, 01, 31)) + if self.story.getMetadata('storyId') == '669': + self.story.setMetadata('dateUpdated',datetime.datetime.now()) + else: + self.story.setMetadata('dateUpdated',datetime.date(1975, 01, 31)) self.story.setMetadata('numChapters','5') self.story.setMetadata('numWords','123456') self.story.setMetadata('status','In-Completed') @@ -75,15 +78,23 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" def getChapterText(self, url): -# return "

    Really short chapter

    " -# return u''' -#

    “It might be all it takes.” He held out his hand and shook Wilfred’s, he glanced at the Vinvocci woman as she knelt there cradling the body of her partner, and he said not a word.

    Disclaimer: I don't own Harry Potter or the craziness of Romilda Vane.

    *EDIT* Romilda is in her 4th year, like she always has.

    Thanks xxSkitten for Beta reading this! :D

    Full Summary: Harry and Ginny are together. Romilda Vane is not happy. She can't stand seeing the guy she wants to be with the person she deserves to be with, with another girl - especially a girl younger that is far less pretty than her. She orders 100 Love potions from Weasley's Wizard Wheezes, Wonder Witch line. Several get to undesired targets, such as Ron Weasley. What happens when Ginny takes matters into her own hands?


    Romilda Vane (3rd Person)

    "Th-Tha-That little skank!" snarled Romilda Vane as she watched Harry Potter and Ginny Weasley from the balcony overlooking the common room.

    "Romilda," said Abigail Stones, one of her friends, "Lets go, you don't need to watch this."

    Abigail stones had long, sleek black hair that was always in a high ponytail. She had pale skin that very few blemishes. She had a long, blocky nose and a small mouth. Her hazel eyes were behind think horned rimmed glasses, and her uniform was in order without a crease or wrinkle in sight.

    "What does he see in her?" Romilda snarled in a whisper, her eyes upon the red-headed fifth year. "I mean, she's all freckle-y and gingery, she's a filthy fifth year-"

    "And you're a fourth year!" Abigail interjected, but Romilda just kept on ranting.

    "…and I heard they live in a dump!" Her nostrils flared.

    "Well what are you going to do about it, just sit and watch them all the time?" Piped up Charlotte Henderson, the second of Romilda's present friends. She had curly shoulder length blonde hair and wore a thick layer of make up to cover up her various large red pimples. Her eyes were dark blue and were surrounded with large clumpy eyelashes. She had an eager expression, like she was witnessing two people on a Muggle drama who were about to kiss.

    "Of course not!" She said, looking away as Ginny kissed Harry. "I've ordered one-hundred love potions from that Wonder Witch line from Weasley's Wizard Wheezes, so once I get him in my grasp I'll have him for the rest of the year!"

    "You realize," Abigail said, rolling her eyes slightly. "That with your luck, you'll get every guy in the school but him."

    "It will only be for around an hour, and I could always just make him jealous by making every guy close to him fall in love with me."

    Abigail sighed, "One, he has a girlfriend. Two, you already got his best friend and he wasn't jealous, he was pissed, and three, you'll get expelled before you can get to him."

    "Sometimes I wonder how we're friends!" Romilda snapped at Abigail.

    "We're friends because you need a good influence around you, or you would be as crazy as Peeves." Abigail stated.

    Romilda spun around to glare at her friend, knowing Abigail was right but did not daring to admit it.

    The silence was broken by Charlotte. "So how are you going to slip him the potion?" She asked, honestly interested.

    "Just wait 'till morning, and you'll see." Romilda said, looking back down at Harry, then suddenly realizing Ginny wasn't there.

    Then, Ginny appeared next to them. She stalked through their group, not looking at any of them. She stopped at the girl's dorm door and turned her head slightly to see them from the corner of her eye.

    "One-hundred? You're that desperate?" Ginny said with a mix of humor and anger. Then, the red-head turned to the door and left them all in a surprised state.

    "You're screwed." Abigail said matter-of-factly. She went into the dorm without another word.

    "She can be so insensitive." Charlotte said, looking where Abigail had left while shaking her head.

    "You can say that again," mumbled Romilda, downcast.

    "She can be-" Charlotte began again, but Romilda held her hand up.

    "That was a figure of speech, pea-brain." She snapped. "Sometimes you can be as dumb as that Loony Lovegood." She then stalked up to her room with one last pleading look at Harry, whispering fiercely under her breath.

    "You will be mine…"


    Isn't Romilda Pleasant? ;] xD Oh she's crazy, insane, envious, has stalkerish and man stealing tendencies. and that's why she's everyone's FAVORITE character.

    Also Romilda's in her fourth year. yeah. oh an NO FEMSLASH geez.

    Also, Abigail Stones and Charlotte Henderson are to OC's that i made up on the spot because even crazies need friends. Ones the ignored good influence and ones a stereotypical dumb 'blonde' (NO OFFENSE TO BLONDES! I'm blonde and I don't take those things that personally unless their clearly mean that way. Also Charlotte's Muggle-Born so she watches all those Muggle TV's shows were all addicted too. ;] .. )

    The rest of the story will be in Ginny's point of view whether its 1st or 3rd Person IDK yet but probably 1st person. The pairing in this are - Harry x Ginny / Romilda x Harry / Ron x Hermione (hints of) / Charolette x OC (Undetermined).

    Reviews = Something... GOOD!

    ~ Sincerely MNM

    - -#
    ''' if self.story.getMetadata('storyId') == '667': raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url) - soup = bs.BeautifulStoneSoup(u''' + if "chapter=1" in url : + text=u''' +
    +

    Prologue

    +

    This is a fake adapter for testing purposes. Different storyId's will give different errors:

    +

    http://test1.com?sid=666 - raises StoryDoesNotExist

    +

    http://test1.com?sid=667 - raises FailedToDownload on chapter 1

    +

    http://test1.com?sid=668 - raises FailedToLogin unless username='Me'

    +

    http://test1.com?sid=669 - Succeeds with Updated Date=now

    +

    And other storyId will succeed with the same output.

    +
    +''' + else: + text=u'''

    Chapter

    Centered text

    @@ -96,7 +107,8 @@ horizontal rules

    Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

    Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

    -''',selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. +''' + soup = bs.BeautifulStoneSoup(text,selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. return utf8FromSoup(soup) def getClass(): diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index 67c0033a..f4c012c2 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -27,6 +27,8 @@ class BaseSiteAdapter(Configurable): def __init__(self, config, url): Configurable.__init__(self, config) self.addConfigSection(self.getSiteDomain()) + self.addConfigSection("commandline") + self.opener = u2.build_opener(u2.HTTPCookieProcessor()) self.storyDone = False self.metadataDone = False diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index 7e8d13c3..47024e4e 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -24,6 +24,8 @@ class BaseStoryWriter(Configurable): def __init__(self, config, story): Configurable.__init__(self, config) self.addConfigSection(self.getFormatName()) + ## Pass adapter instead, to check date before fetching all? + ## Or add 'check update' method to writer? self.story = story self.titleLabels = { 'category':'Category', @@ -127,6 +129,8 @@ class BaseStoryWriter(Configurable): def writeStory(self,outstream=None): self.addConfigSection(self.story.getMetadata('site')) self.addConfigSection(self.story.getMetadata('site')+":"+self.getFormatName()) + self.addConfigSection("commandline") + for tag in self.getConfigList("extratags"): self.story.addToList("extratags",tag) @@ -148,6 +152,8 @@ class BaseStoryWriter(Configurable): path+=dir+"/" if not os.path.exists(path): os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2? + + ## Check for output file date vs updated date here? outstream = open(outfilename,"wb") else: close=False diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index 52f18dcb..e06c4140 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -57,7 +57,7 @@ h6 { text-align: center; } -

    ${title} by ${author}

    +

    ${title} by ${author}

    ''') diff --git a/main.py b/main.py index 08838957..9bf69855 100644 --- a/main.py +++ b/main.py @@ -108,7 +108,7 @@ class EditConfigServer(webapp.RequestHandler): if uconfig is None: uconfig = UserConfig() uconfig.user = user - uconfig.config = self.request.get('config').encode('utf8')[:1000000] ## just in case. + uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case. uconfig.put() self.redirect("/?error=configsaved") else: # not update, assume display for edit @@ -222,8 +222,25 @@ class RecentFilesServer(webapp.RequestHandler): template_values = dict(fics = fics, nickname = user.nickname()) path = os.path.join(os.path.dirname(__file__), 'recent.html') self.response.out.write(template.render(path, template_values)) + +class UserConfigServer(webapp.RequestHandler): + def getUserConfig(self,user): + config = ConfigParser.SafeConfigParser() + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + ## TEST THIS + if l and l[0].config: + uconfig=l[0] + logging.debug('reading config from UserConfig(%s)'%uconfig.config) + config.readfp(StringIO.StringIO(uconfig.config)) + else: + logging.debug('reading defaults.ini config file') + config.read('defaults.ini') + + return config -class FanfictionDownloader(webapp.RequestHandler): +class FanfictionDownloader(UserConfigServer): def get(self): self.post() @@ -258,18 +275,7 @@ class FanfictionDownloader(webapp.RequestHandler): adapter = None try: - config = ConfigParser.SafeConfigParser() - - ## Pull user's config record. - l = UserConfig.all().filter('user =', user).fetch(1) - if l: - uconfig=l[0] - logging.debug('reading config from UserConfig') - config.readfp(StringIO.StringIO(uconfig.config)) - else: - logging.debug('reading defaults.ini config file') - config.read('defaults.ini') - + config = self.getUserConfig(user) adapter = adapters.getAdapter(config,url) logging.info('Created an adaper: %s' % adapter) @@ -321,7 +327,7 @@ class FanfictionDownloader(webapp.RequestHandler): return -class FanfictionDownloaderTask(webapp.RequestHandler): +class FanfictionDownloaderTask(UserConfigServer): def _printableVersion(self, text): text = removeEntities(text) try: @@ -362,19 +368,7 @@ class FanfictionDownloaderTask(webapp.RequestHandler): logging.info('Creating adapter...') try: - config = ConfigParser.ConfigParser() - config = ConfigParser.SafeConfigParser() - - ## Pull user's config record. - l = UserConfig.all().filter('user =', user).fetch(1) - if l: - uconfig=l[0] - logging.debug('reading config from UserConfig') - config.readfp(StringIO.StringIO(uconfig.config)) - else: - logging.debug('reading defaults.ini config file') - config.read('defaults.ini') - + config = self.getUserConfig(user) adapter = adapters.getAdapter(config,url) except Exception, e: logging.exception(e) diff --git a/newdownload.py b/newdownload.py index 550ad5fd..e9aeecae 100644 --- a/newdownload.py +++ b/newdownload.py @@ -4,45 +4,74 @@ import logging logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") import sys, os +from optparse import OptionParser import getpass from fanficdownloader import adapters,writers,exceptions import ConfigParser -config = ConfigParser.SafeConfigParser() - -logging.debug('reading defaults.ini config file, if present') -config.read('defaults.ini') -logging.debug('reading personal.ini config file, if present') -config.read('personal.ini') - -def writeStory(adapter,writeformat): +def writeStory(config,adapter,writeformat): writer = writers.getWriter(writeformat,config,adapter.getStory()) writer.writeStory() del writer -try: - adapter = adapters.getAdapter(config,sys.argv[1]) - - try: - print adapter.getStory() - except exceptions.FailedToLogin, ftl: - print "Login Failed, Need Username/Password." - sys.stdout.write("Username: ") - adapter.username = sys.stdin.readline().strip() - adapter.password = getpass.getpass(prompt='Password: ') - #print("Login: `%s`, Password: `%s`" % (adapter.username, adapter.password)) - print adapter.getStory() +def main(): + + # read in args, anything starting with -- will be treated as --= + usage = "usage: %prog [options] storyurl" + parser = OptionParser(usage) + parser.add_option("-f", "--format", dest="format", default='epub', + help="write story as FORMAT, epub(default), text or html", metavar="FORMAT") + parser.add_option("-o", "--option", + action="append", dest="options", + help="set an option NAME=VALUE", metavar="NAME=VALUE") + + (options, args) = parser.parse_args() - writeStory(adapter,"epub") - writeStory(adapter,"html") - writeStory(adapter,"txt") - del adapter + if len(args) != 1: + parser.error("incorrect number of arguments") -except exceptions.InvalidStoryURL, isu: - print isu -except exceptions.StoryDoesNotExist, dne: - print dne -except exceptions.UnknownSite, us: - print us + config = ConfigParser.SafeConfigParser() + + logging.debug('reading defaults.ini config file, if present') + config.read('defaults.ini') + logging.debug('reading personal.ini config file, if present') + config.read('personal.ini') + + config.add_section("commandline") + if options.options: + for opt in options.options: + (var,val) = opt.split('=') + config.set("commandline",var,val) + + try: + adapter = adapters.getAdapter(config,args[0]) + + try: + print adapter.getStoryMetadataOnly() + except exceptions.FailedToLogin, ftl: + print "Login Failed, Need Username/Password." + sys.stdout.write("Username: ") + adapter.username = sys.stdin.readline().strip() + adapter.password = getpass.getpass(prompt='Password: ') + #print("Login: `%s`, Password: `%s`" % (adapter.username, adapter.password)) + print adapter.getStoryMetadataOnly() + + + ## XXX Use format. + print "format: %s" % options.format + writeStory(config,adapter,"epub") + writeStory(config,adapter,"html") + writeStory(config,adapter,"txt") + del adapter + + except exceptions.InvalidStoryURL, isu: + print isu + except exceptions.StoryDoesNotExist, dne: + print dne + except exceptions.UnknownSite, us: + print us + +if __name__ == "__main__": + main() From ecd1690b7023e12a6670046d0050950c4cd8ee62 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sat, 7 May 2011 15:14:46 -0500 Subject: [PATCH 143/482] Add fictionpress.com adapter, some more tweak/fixes. --- defaults.ini | 10 +- .../adapters/adapter_fanfictionnet.py | 3 +- .../adapters/adapter_fictionpresscom.py | 168 ++++++++++++++++++ fanficdownloader/writers/writer_epub.py | 14 +- newdownload.py | 11 +- 5 files changed, 193 insertions(+), 13 deletions(-) create mode 100644 fanficdownloader/adapters/adapter_fictionpresscom.py diff --git a/defaults.ini b/defaults.ini index 1d47e81c..ee6b561f 100644 --- a/defaults.ini +++ b/defaults.ini @@ -98,6 +98,10 @@ extratags: FanFiction,Testing,Text [test1.com:html] extratags: FanFiction,Testing,HTML +[www.whofic.com] + +[www.fanfiction.net] + [www.twilighted.net] ## Some sites require login (or login for some rated stories) ## The program can prompt you, or you can save it in config. @@ -105,7 +109,7 @@ extratags: FanFiction,Testing,HTML #username:YourName #password:yourpassword -[www.whofic.com] - -[www.fanfiction.net] +[www.fictionpress.com] +## Clear FanFiction from defaults, fictionpress.com is original fiction. +extratags: diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index 0ef69765..c0419953 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -144,7 +144,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): # after Rating, the same bit of text containing id:123456 contains # Complete--if completed. - if 'Complete' in a.findNext(text=re.compile(r'id:\d+')): + if 'Complete' in a.findNext(text=re.compile(r'id:'+self.story.getMetadata('storyId'))): self.story.setMetadata('status', 'Completed') else: self.story.setMetadata('status', 'In-Progress') @@ -181,7 +181,6 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): return utf8FromSoup(span) -#_register_handler(FanFictionNetSiteAdapter) def getClass(): return FanFictionNetSiteAdapter diff --git a/fanficdownloader/adapters/adapter_fictionpresscom.py b/fanficdownloader/adapters/adapter_fictionpresscom.py new file mode 100644 index 00000000..9ee2c50c --- /dev/null +++ b/fanficdownloader/adapters/adapter_fictionpresscom.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- + +import time +import datetime +import logging +import re +import urllib2 +import time + +import fanficdownloader.BeautifulSoup as bs +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup + +class FictionPressComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fpcom') + + # get storyId from url--url validation guarantees second part is storyId + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + + # normalized story URL. + self._setURL("http://"+self.getSiteDomain()\ + +"/s/"+self.story.getMetadata('storyId')+"/1/") + + @staticmethod + def getSiteDomain(): + return 'www.fictionpress.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.fictionpress.com'] + + def getSiteExampleURLs(self): + return "http://www.fictionpress.com/s/1234/1/ http://www.fictionpress.com/s/1234/12/ http://www.fictionpress.com/s/1234/1/Story_Title" + + def getSiteURLPattern(self): + return r"http://www\.fictionpress\.com/s/\d+/\d+(/|/[a-zA-Z0-9_]+)?$" + + def extractChapterUrlsAndMetadata(self): + + # fetch the chapter. From that we will get metadata and chapter list + # You'd think this would be very similar to ffnet. But you'd be wrong. + + url = self.url + logging.debug("URL: "+url) + logging.debug('Getting metadata from: %s' % url) + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"^/u/\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[2]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + # Starr and Temple Detective Agency, a Sci-Fi fanfic - FictionPress.com + title = soup.find('title') + m = re.match(r"^(.*?), a (.*?) fanfic - FictionPress.com",title.string) + title,category = m.groups() + self.story.setMetadata('title', title) + self.story.addToList('category',category) + + # Find the chapter selector + select = soup.find('select', { 'name' : 'chapter' } ) + + if select is None: + # no selector found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = u'http://%s/s/%s/%s/' % ( self.getSiteDomain(), + self.story.getMetadata('storyId'), + o['value']) + # just in case there's tags, like in chapter titles. + title = u"%s" % o + title = re.sub(r'<[^>]+>','',title) + self.chapterUrls.append((title,url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Pull some additional data from html. + + # Find Rating and look around it. + a = soup.find('a', href=re.compile(r'^http://www.fictionratings.com')) + # "Fiction Rated: K+" + self.story.setMetadata('rating',a.string.split()[-1]) + + # after Rating, the same bit of text containing id:123456 contains + # Complete--if completed, and Published/Updated dates. + # - Published: 02-07-11 - Updated: 02-07-11 - Complete - id:2889508 + dataline = a.findNext(text=re.compile(r'id:'+self.story.getMetadata('storyId'))) + if dataline: + if 'Complete' in dataline: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + m = re.match(r".*?Published: ([0-9-]+) - Updated: ([0-9-]+).*?",dataline) + if m: + published,updated = m.groups() + self.story.setMetadata('datePublished', + datetime.datetime.fromtimestamp(time.mktime(time.strptime(published, '%m-%d-%y')))) + self.story.setMetadata('dateUpdated', + datetime.datetime.fromtimestamp(time.mktime(time.strptime(updated, '%m-%d-%y')))) + + # category, genres, then desc. + # + # Parse genre(s) and description from .*?), (?P.*?), (?P.*?)$", + soup.find('meta',{'name':'description'})['content']) + if m != None: + self.story.setMetadata('description', m.group('desc')) + genres=m.group('genres') + # Hurt/Comfort is one genre. + genres=re.sub('Hurt/Comfort','Hurt-Comfort',genres) + for g in genres.split('/'): + self.story.addToList('genre',g) + + # Number of words only on author page. + # status, category, etc could also be parsed from here, but this way the one + # off-page hit is isolated. + logging.debug('Getting more metadata from: %s' % self.story.getMetadata('authorUrl')) + soup = bs.BeautifulStoneSoup(self._fetchUrl(self.story.getMetadata('authorUrl')), + selfClosingTags=('br')) # normalize
    tags to
    + # Find the link for this story. + a = soup.find('a', href=re.compile(r'^/s/'+self.story.getMetadata('storyId')+'/')) + # Find the 'data line' after it. + # Complete - Sci-Fi - Fiction Rated: T - English - Suspense/Hurt/Comfort - Chapters: 1 - Words: 2,762 - Reviews: 2 - Updated: 2-7-11 - Published: 2-7-11 + dataline = a.findNext(text=re.compile(r'Words: ')) + if dataline: + m = re.match(r".*?Words: ([0-9,]+).*?",dataline) + if m: + words = m.group(1) + self.story.setMetadata('numWords',words) + + return + + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + time.sleep(0.5) ## ffnet(and, I assume, fpcom) tends to fail + ## more if hit too fast. This is in + ## additional to what ever the + ## slow_down_sleep_time setting is. + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'storytext'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return FictionPressComSiteAdapter + diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index e06c4140..b4c1cd91 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -240,18 +240,22 @@ h6 { text-align: center; } metadata.appendChild(newTag(contentdom,"dc:description",text= self.getMetadata('description'))) + # set to avoid duplicates subject tags. + subjectset = set() for entry in self.titleLabels.keys(): if entry in self.getConfigList("include_subject_tags") and \ + entry not in self.story.getLists() and \ self.story.getMetadata(entry): - metadata.appendChild(newTag(contentdom,"dc:subject",text= - self.getMetadata(entry))) - # listables all go into dc:suject tags, but only if they are configured. + subjectset.add(self.getMetadata(entry)) + # listables all go into dc:subject tags, but only if they are configured. for (name,lst) in self.story.getLists().iteritems(): if name in self.getConfigList("include_subject_tags"): for tag in lst: - metadata.appendChild(newTag(contentdom,"dc:subject",text= - tag)) + subjectset.add(tag) + for subject in subjectset: + metadata.appendChild(newTag(contentdom,"dc:subject",text=subject)) + if self.getMetadata('site'): metadata.appendChild(newTag(contentdom,"dc:publisher", text=self.getMetadata('site'))) diff --git a/newdownload.py b/newdownload.py index e9aeecae..1a66a523 100644 --- a/newdownload.py +++ b/newdownload.py @@ -26,6 +26,9 @@ def main(): parser.add_option("-o", "--option", action="append", dest="options", help="set an option NAME=VALUE", metavar="NAME=VALUE") + parser.add_option("-m", "--meta-only", + action="store_true", dest="metaonly", + help="Retrieve metadata and stop",) (options, args) = parser.parse_args() @@ -49,16 +52,18 @@ def main(): adapter = adapters.getAdapter(config,args[0]) try: - print adapter.getStoryMetadataOnly() + adapter.getStoryMetadataOnly() except exceptions.FailedToLogin, ftl: print "Login Failed, Need Username/Password." sys.stdout.write("Username: ") adapter.username = sys.stdin.readline().strip() adapter.password = getpass.getpass(prompt='Password: ') #print("Login: `%s`, Password: `%s`" % (adapter.username, adapter.password)) - print adapter.getStoryMetadataOnly() - + adapter.getStoryMetadataOnly() + if options.metaonly: + adapter.getStoryMetadataOnly() + return ## XXX Use format. print "format: %s" % options.format writeStory(config,adapter,"epub") From d90c9ebceddfafc3e6cd32e7627e0975a53c8e3a Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 8 May 2011 21:53:06 -0500 Subject: [PATCH 144/482] Support for ficwad.com. Further tweaks/improvments, especially to user config. (I'm getting a bit carried away with making things configurable, honestly.) --- defaults.ini | 144 +++++++----- editconfig.html | 10 +- example.ini | 29 +++ .../adapters/adapter_fictionpresscom.py | 8 +- .../adapters/adapter_ficwadcom.py | 206 ++++++++++++++++++ .../adapters/adapter_twilightednet.py | 27 ++- fanficdownloader/adapters/base_adapter.py | 25 ++- fanficdownloader/htmlcleanup.py | 2 +- fanficdownloader/writers/__init__.py | 4 + fanficdownloader/writers/base_writer.py | 42 +++- fanficdownloader/writers/writer_epub.py | 2 +- ffstorage.py | 2 +- index.html | 11 +- main.py | 15 +- newdownload.py | 13 +- 15 files changed, 443 insertions(+), 97 deletions(-) create mode 100644 example.ini create mode 100644 fanficdownloader/adapters/adapter_ficwadcom.py diff --git a/defaults.ini b/defaults.ini index ee6b561f..d825058a 100644 --- a/defaults.ini +++ b/defaults.ini @@ -1,43 +1,64 @@ [defaults] ## [defaults] section applies to all formats and sites but may be -## overridden. +## overridden at several levels -# All available titlepage_entries: -# category -# genre -# status -# datePublished -# dateUpdated -# dateCreated -# rating -# warnings -# numChapters -# numWords -# site -# siteabbrev -# author -# authorId -# authorURL -# title -# storyId -# storyUrl -# extratags -# description -# formatname -# formatext +## All available titlepage_entries and the label used for them: +## _label:
    @@ -56,6 +55,13 @@
    + +
    +

    Default System configuration

    +
    +{{ defaultsini }}
    +
    +
    [!!] [R] [V] [Y] + spanreq = metap.find("span",{"class":"req"}) + for a in spanreq.findAll("a"): + self.story.addToList('warnings',a['title']) + + ## perhaps not the most efficient way to parse this, using + ## regexps for each rather than something more complex, but + ## IMO, it's more readable and amenable to change. + metapstr = stripHTML(str(metap)).replace('\n',' ').replace('\t','') + #print "metap: (%s)"%metapstr + + m = re.match(r".*?Rating: (.+?) -.*?",metapstr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genres: (.+?) -.*?",metapstr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Published: ([0-9/]+?) -.*?",metapstr) + if m: + self.story.setMetadata('datePublished', + datetime.datetime.fromtimestamp(\ + time.mktime(time.strptime(m.group(1), "%Y/%m/%d")))) + + # Updated can have more than one space after it. + m = re.match(r".*?Updated: ([0-9/]+?) +-.*?",metapstr) + if m: + self.story.setMetadata('dateUpdated', + datetime.datetime.fromtimestamp(\ + time.mktime(time.strptime(m.group(1), "%Y/%m/%d")))) + + m = re.match(r".*? - ([0-9/]+?) words.*?",metapstr) + if m: + self.story.setMetadata('numWords',m.group(1)) + + if metapstr.endswith("Complete"): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + # get the chapter list first this time because that's how we + # detect the need to login. + storylistul = soup.find('ul',{'id':'storylist'}) + if not storylistul: + # no list found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + chapterlistlis = storylistul.findAll('li') + for chapterli in chapterlistlis: + if "blocked" in chapterli['class']: + # paranoia check. We should already be logged in by now. + raise exceptions.FailedToLogin(url,self.username) + else: + #print "chapterli.h4.a (%s)"%chapterli.h4.a + self.chapterUrls.append((chapterli.h4.a.string, + u'http://%s%s'%(self.getSiteDomain(), + chapterli.h4.a['href']))) + #print "self.chapterUrls:%s"%self.chapterUrls + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + return + + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + time.sleep(0.5) ## ffnet tends to fail more if hit too fast. + ## This is in additional to what ever the + ## slow_down_sleep_time setting is. + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'storytext'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return FicwadComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index 751fb9c3..3a8e14c8 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -54,28 +54,27 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): return False def performLogin(self, url): - data = {} + params = {} if self.password: - data['penname'] = self.username - data['password'] = self.password + params['penname'] = self.username + params['password'] = self.password else: - data['penname'] = self.getConfig("username") - data['password'] = self.getConfig("password") - data['cookiecheck'] = '1' - data['submit'] = 'Submit' + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' - urlvals = urllib.urlencode(data) loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, - data['penname'])) + params['penname'])) - d = self._fetchUrl(loginUrl, urlvals) + d = self._fetchUrl(loginUrl, params) if "Member Account" not in d : #Member Account logging.info("Failed to login to URL %s as %s" % (loginUrl, - data['penname'])) - raise exceptions.FailedToLogin(url,data['penname']) + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) return False else: return True @@ -150,10 +149,10 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): self.story.setMetadata('description',stripHTML(svalue)) if 'Rated' in label: - self.story.setMetadata('rating', value.strip()) + self.story.setMetadata('rating', value) if 'Word count' in label: - self.story.setMetadata('numWords', value.strip()) + self.story.setMetadata('numWords', value) if 'Categories' in label: cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index f4c012c2..a4fb7290 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -3,6 +3,7 @@ import re import datetime import time +import urllib import urllib2 as u2 import urlparse as up @@ -27,7 +28,7 @@ class BaseSiteAdapter(Configurable): def __init__(self, config, url): Configurable.__init__(self, config) self.addConfigSection(self.getSiteDomain()) - self.addConfigSection("commandline") + self.addConfigSection("overrides") self.opener = u2.build_opener(u2.HTTPCookieProcessor()) self.storyDone = False @@ -49,12 +50,30 @@ class BaseSiteAdapter(Configurable): self.host = self.parsedUrl.netloc self.path = self.parsedUrl.path self.story.setMetadata('storyUrl',self.url) - + + # Assumes application/x-www-form-urlencoded. parameters, headers are dict()s + def _postUrl(self, url, parameters={}, headers={}): + if self.getConfig('slow_down_sleep_time'): + time.sleep(float(self.getConfig('slow_down_sleep_time'))) + + ## u2.Request assumes POST when data!=None. Also assumes data + ## is application/x-www-form-urlencoded. + if 'Content-type' not in headers: + headers['Content-type']='application/x-www-form-urlencoded' + if 'Accept' not in headers: + headers['Accept']="text/html,*/*" + req = u2.Request(url, + data=urllib.urlencode(parameters), + headers=headers) + return self.opener.open(req).read().decode(self.decode) + + # parameters is a dict() def _fetchUrl(self, url, parameters=None): if self.getConfig('slow_down_sleep_time'): time.sleep(float(self.getConfig('slow_down_sleep_time'))) if parameters: - return self.opener.open(url,parameters).read().decode(self.decode) + return self.opener.open(url,urllib.urlencode(parameters))\ + .read().decode(self.decode) else: return self.opener.open(url).read().decode(self.decode) diff --git a/fanficdownloader/htmlcleanup.py b/fanficdownloader/htmlcleanup.py index b02cab2b..8ff3722a 100644 --- a/fanficdownloader/htmlcleanup.py +++ b/fanficdownloader/htmlcleanup.py @@ -26,7 +26,7 @@ def stripHTML(soup): def conditionalRemoveEntities(value): if isinstance(value,str) or isinstance(value,unicode) : - return removeEntities(value.strip()) + return removeEntities(value).strip() else: return value diff --git a/fanficdownloader/writers/__init__.py b/fanficdownloader/writers/__init__.py index 339680ea..28575e0d 100644 --- a/fanficdownloader/writers/__init__.py +++ b/fanficdownloader/writers/__init__.py @@ -3,6 +3,8 @@ ## This could (should?) use a dynamic loader like adapters, but for ## now, it's static, since there's so few of them. +from fanficdownloader.exceptions import FailedToDownload + from writer_html import HTMLWriter from writer_txt import TextWriter from writer_epub import EpubWriter @@ -14,3 +16,5 @@ def getWriter(type,config,story): return TextWriter(config,story) if type == "epub": return EpubWriter(config,story) + + raise FailedToDownload("(%s) is not a supported download format."%type) diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index 47024e4e..64dcd4a7 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -27,6 +27,31 @@ class BaseStoryWriter(Configurable): ## Pass adapter instead, to check date before fetching all? ## Or add 'check update' method to writer? self.story = story + self.validEntries = [ + 'category', + 'genre', + 'status', + 'datePublished', + 'dateUpdated', + 'dateCreated', + 'rating', + 'warnings', + 'numChapters', + 'numWords', + 'site', + 'storyId', + 'authorId', + 'extratags', + 'title', + 'storyUrl', + 'description', + 'author', + 'authorUrl', + 'formatname', + 'formatext', + 'siteabbrev'] + + # fall back labels. self.titleLabels = { 'category':'Category', 'genre':'Genre', @@ -38,7 +63,7 @@ class BaseStoryWriter(Configurable): 'warnings':'Warnings', 'numChapters':'Chapters', 'numWords':'Words', - 'site':'Publisher', + 'site':'Site', 'storyId':'Story ID', 'authorId':'Author ID', 'extratags':'Extra Tags', @@ -49,6 +74,7 @@ class BaseStoryWriter(Configurable): 'authorUrl':'Author URL', 'formatname':'File Format', 'formatext':'File Extension', + 'siteabbrev':'Site Abbrev' } self.story.setMetadata('formatname',self.getFormatName()) self.story.setMetadata('formatext',self.getFormatExt()) @@ -61,17 +87,15 @@ class BaseStoryWriter(Configurable): def getFileName(self,template,extension="${formatext}"): values = self.story.metadata - fallback=False # fall back default: if not template: template="${title}-${siteabbrev}_${storyId}${formatext}" - fallback=True # Add extension if not already included. if extension not in template: template+=extension - if fallback or self.getConfig('safe_filename'): + if not self.getConfig('allow_unsafe_filename'): values={} pattern = re.compile(r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+") for k in self.story.metadata.keys(): @@ -99,13 +123,17 @@ class BaseStoryWriter(Configurable): wideTitleEntriesList = self.getConfigList("wide_titlepage_entries") for entry in titleEntriesList: - if entry in self.titleLabels: + if entry in self.validEntries: if self.story.getMetadata(entry): if entry in wideTitleEntriesList: TEMPLATE=WIDE_ENTRY else: TEMPLATE=ENTRY - self._write(out,TEMPLATE.substitute({'label':self.titleLabels[entry], + if self.getConfigList(entry): + label=self.getConfig(entry+"_label") + else: + label=self.titleLabels[entry] + self._write(out,TEMPLATE.substitute({'label':label, 'value':self.story.getMetadata(entry)})) self._write(out,END.substitute(self.story.metadata)) @@ -129,7 +157,7 @@ class BaseStoryWriter(Configurable): def writeStory(self,outstream=None): self.addConfigSection(self.story.getMetadata('site')) self.addConfigSection(self.story.getMetadata('site')+":"+self.getFormatName()) - self.addConfigSection("commandline") + self.addConfigSection("overrides") for tag in self.getConfigList("extratags"): self.story.addToList("extratags",tag) diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index b4c1cd91..d50a5de6 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -242,7 +242,7 @@ h6 { text-align: center; } # set to avoid duplicates subject tags. subjectset = set() - for entry in self.titleLabels.keys(): + for entry in self.validEntries: if entry in self.getConfigList("include_subject_tags") and \ entry not in self.story.getLists() and \ self.story.getMetadata(entry): diff --git a/ffstorage.py b/ffstorage.py index dae09352..a196487b 100644 --- a/ffstorage.py +++ b/ffstorage.py @@ -20,4 +20,4 @@ class DownloadData(db.Model): class UserConfig(db.Model): user = db.UserProperty() - config = db.TextProperty() + config = db.BlobProperty() diff --git a/index.html b/index.html index 940a5cad..e7574a52 100644 --- a/index.html +++ b/index.html @@ -56,10 +56,10 @@ This version is a new re-org/re-write of the code.

    - So far, only a few sites are supported: fanfiction.net, twilighted.net and whofic.com. + So far, the only sites supported are: fanfiction.net, fictionalley.com, ficwad.com, twilighted.net and whofic.com.

    - Login/Password is only asked for when required now. + Login/Password is asked for when required now.

    Mobi support (for Kindle) is only via EPub conversion in this version. @@ -77,7 +77,7 @@

    EPub HTML - Plain Text + Plain Text

    For Mobi (Kindle) select EPub and use the Convert link when it's finished.

    @@ -133,8 +133,9 @@
    ficwad.com
    - Use the URL of any story chapter, such as -
    http://www.ficwad.com/story/75246. + Use the URL of the story's chapter list, such as +
    http://www.ficwad.com/story/74884. +
    Note that this is changed from the previous version. The system will still accept chapter URLs, however.
    harrypotterfanfiction.com
    diff --git a/main.py b/main.py index 9bf69855..5d5f1d94 100644 --- a/main.py +++ b/main.py @@ -115,11 +115,16 @@ class EditConfigServer(webapp.RequestHandler): if uconfig is not None and uconfig.config: config = uconfig.config else: - template_values['default'] = True - configfile = open("defaults.ini","rb") + configfile = open("example.ini","rb") config = configfile.read() configfile.close() template_values['config'] = config + + configfile = open("defaults.ini","rb") + config = configfile.read() + configfile.close() + template_values['defaultsini'] = config + path = os.path.join(os.path.dirname(__file__), 'editconfig.html') self.response.headers['Content-Type'] = 'text/html' self.response.out.write(template.render(path, template_values)) @@ -227,6 +232,9 @@ class UserConfigServer(webapp.RequestHandler): def getUserConfig(self,user): config = ConfigParser.SafeConfigParser() + logging.debug('reading defaults.ini config file') + config.read('defaults.ini') + ## Pull user's config record. l = UserConfig.all().filter('user =', user).fetch(1) ## TEST THIS @@ -234,9 +242,6 @@ class UserConfigServer(webapp.RequestHandler): uconfig=l[0] logging.debug('reading config from UserConfig(%s)'%uconfig.config) config.readfp(StringIO.StringIO(uconfig.config)) - else: - logging.debug('reading defaults.ini config file') - config.read('defaults.ini') return config diff --git a/newdownload.py b/newdownload.py index 1a66a523..d174cd50 100644 --- a/newdownload.py +++ b/newdownload.py @@ -41,12 +41,15 @@ def main(): config.read('defaults.ini') logging.debug('reading personal.ini config file, if present') config.read('personal.ini') - - config.add_section("commandline") + + try: + config.add_section("overrides") + except ConfigParser.DuplicateSectionError: + pass if options.options: for opt in options.options: (var,val) = opt.split('=') - config.set("commandline",var,val) + config.set("overrides",var,val) try: adapter = adapters.getAdapter(config,args[0]) @@ -62,9 +65,11 @@ def main(): adapter.getStoryMetadataOnly() if options.metaonly: - adapter.getStoryMetadataOnly() + print adapter.getStoryMetadataOnly() return ## XXX Use format. + ## XXX Doing all three formats actually causes some interesting + ## XXX config issues with format-specific sections. print "format: %s" % options.format writeStory(config,adapter,"epub") writeStory(config,adapter,"html") From 36d4b9afcf7ea28fb9e6b773c9e3f9b0b6cf2c44 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 9 May 2011 14:46:05 -0500 Subject: [PATCH 145/482] Add adastrafanfic.com support and support to confirm adult w/o login/pass. --- defaults.ini | 6 + example.ini | 5 + .../adapters/adapter_adastrafanficcom.py | 173 ++++++++++++++++++ fanficdownloader/adapters/adapter_test1.py | 7 + .../adapters/adapter_twilightednet.py | 2 +- fanficdownloader/exceptions.py | 7 + login.html | 16 +- main.py | 17 +- newdownload.py | 11 +- 9 files changed, 235 insertions(+), 9 deletions(-) create mode 100644 fanficdownloader/adapters/adapter_adastrafanficcom.py diff --git a/defaults.ini b/defaults.ini index d825058a..307ce2c9 100644 --- a/defaults.ini +++ b/defaults.ini @@ -148,6 +148,12 @@ extratags: [www.whofic.com] +[www.adastrafanfic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + [overrides] ## It may sometimes be useful to override all of the specific format, ## site and site:format sections in your private configuration. For diff --git a/example.ini b/example.ini index 81f2042b..cf63ac6b 100644 --- a/example.ini +++ b/example.ini @@ -11,6 +11,11 @@ #username:YourUsername #password:YourPassword +[www.adastrafanfic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. +#is_adult:true + ## The [defaults] section here will override the system [defaults], ## but not format, site for site:format sections. [defaults] diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py new file mode 100644 index 00000000..119ef626 --- /dev/null +++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- + +import time +import datetime +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup + +class AdAstraFanficComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','aaff') + self.decode = "utf8" + self.story.addToList("category","Star Trek") + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.adastrafanfic.com' + + @classmethod + def getAcceptDomains(cls): + return [cls.getSiteDomain()] + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&warning=5" + else: + addurl="" + + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data: + raise exceptions.AdultCheckRequired(self.url) + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## + ## Summary, strangely, is in the content attr of a tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
    etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = str(value) + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%m/%d/%Y")))) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%m/%d/%Y")))) + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return AdAstraFanficComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 47f00dbc..d2cb6457 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import datetime +import logging import fanficdownloader.BeautifulSoup as bs import fanficdownloader.exceptions as exceptions @@ -16,6 +17,7 @@ class TestSiteAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only sid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) self.username='' + self.is_adult=False @staticmethod def getSiteDomain(): @@ -29,6 +31,10 @@ class TestSiteAdapter(BaseSiteAdapter): def extractChapterUrlsAndMetadata(self): + if self.story.getMetadata('storyId') == '665' and not (self.is_adult or self.getConfig("is_adult")): + logging.warn("self.is_adult:%s"%self.is_adult) + raise exceptions.AdultCheckRequired(self.url) + if self.story.getMetadata('storyId') == '666': raise exceptions.StoryDoesNotExist(self.url) @@ -86,6 +92,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"

    Prologue

    This is a fake adapter for testing purposes. Different storyId's will give different errors:

    +

    http://test1.com?sid=665 - raises AdultCheckRequired

    http://test1.com?sid=666 - raises StoryDoesNotExist

    http://test1.com?sid=667 - raises FailedToDownload on chapter 1

    http://test1.com?sid=668 - raises FailedToLogin unless username='Me'

    diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index 3a8e14c8..6a654e46 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -166,7 +166,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): # genrestext = [genre.string for genre in genres] # self.genre = ', '.join(genrestext) # for genre in genrestext: - # self.addSubject(genre.string) + # self.story.addToList('genre',genre.string) if 'Completed' in label: if 'Yes' in value: diff --git a/fanficdownloader/exceptions.py b/fanficdownloader/exceptions.py index 44cae238..4b316442 100644 --- a/fanficdownloader/exceptions.py +++ b/fanficdownloader/exceptions.py @@ -24,6 +24,13 @@ class FailedToLogin(Exception): def __str__(self): return "Failed to Login for URL: (%s) with username: (%s)" % (self.url, self.username) +class AdultCheckRequired(Exception): + def __init__(self,url): + self.url=url + + def __str__(self): + return "Story requires confirmation of adult status: (%s)" % self.url + class StoryDoesNotExist(Exception): def __init__(self,url): self.url=url diff --git a/login.html b/login.html index e54141cd..2d1240f8 100644 --- a/login.html +++ b/login.html @@ -45,15 +45,18 @@
    {% endif %}
    + +
    + + {% if login %} +

    Login and Password

    {{ site }} requires a Login/Password for this story. You need to provide your Login/Password for {{ site }} to download it.
    - -
    Login
    @@ -63,6 +66,15 @@
    Password
    + + {% else %} + +
    +
    Are you an Adult?
    +
    + + {% endif %} +
    diff --git a/main.py b/main.py index 5d5f1d94..39d84ea2 100644 --- a/main.py +++ b/main.py @@ -240,7 +240,7 @@ class UserConfigServer(webapp.RequestHandler): ## TEST THIS if l and l[0].config: uconfig=l[0] - logging.debug('reading config from UserConfig(%s)'%uconfig.config) + #logging.debug('reading config from UserConfig(%s)'%uconfig.config) config.readfp(StringIO.StringIO(uconfig.config)) return config @@ -260,6 +260,7 @@ class FanfictionDownloader(UserConfigServer): url = self.request.get('url') login = self.request.get('login') password = self.request.get('password') + is_adult = self.request.get('is_adult') == "on" logging.info("Queuing Download: " + url) @@ -287,6 +288,7 @@ class FanfictionDownloader(UserConfigServer): if len(login) > 1: adapter.username=login adapter.password=password + adapter.is_adult=is_adult ## This scrapes the metadata, which will be ## duplicated in the queue task, but it ## detects bad URLs, bad login, bad story, etc @@ -304,20 +306,23 @@ class FanfictionDownloader(UserConfigServer): 'url':url, 'login':login, 'password':password, - 'user':user.email()}) + 'user':user.email(), + 'is_adult':is_adult}) logging.info("enqueued download key: " + str(download.key())) - except exceptions.FailedToLogin, e: + except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e: logging.exception(e) download.failure = str(e) download.put() logging.debug('Need to Login, display log in page') + login= ( e is exceptions.FailedToLogin ) template_values = dict(nickname = user.nickname(), url = url, format = format, site = adapter.getSiteDomain(), - fic = download + fic = download, + login=login, ) path = os.path.join(os.path.dirname(__file__), 'login.html') self.response.out.write(template.render(path, template_values)) @@ -348,6 +353,7 @@ class FanfictionDownloaderTask(UserConfigServer): url = self.request.get('url') login = self.request.get('login') password = self.request.get('password') + is_adult = self.request.get('is_adult') # User object can't pass, just email address user = users.User(self.request.get('user')) @@ -381,11 +387,12 @@ class FanfictionDownloaderTask(UserConfigServer): download.put() return - logging.info('Created an adaper: %s' % adapter) + logging.info('Created an adapter: %s' % adapter) if len(login) > 1: adapter.username=login adapter.password=password + adapter.is_adult=is_adult try: # adapter.getStory() is what does all the heavy lifting. diff --git a/newdownload.py b/newdownload.py index d174cd50..46dc73dd 100644 --- a/newdownload.py +++ b/newdownload.py @@ -7,6 +7,10 @@ import sys, os from optparse import OptionParser import getpass +if sys.version_info < (2, 5): + print "This program requires Python 2.5 or newer." + sys.exit(1) + from fanficdownloader import adapters,writers,exceptions import ConfigParser @@ -56,13 +60,18 @@ def main(): try: adapter.getStoryMetadataOnly() - except exceptions.FailedToLogin, ftl: + except exceptions.FailedToLogin: print "Login Failed, Need Username/Password." sys.stdout.write("Username: ") adapter.username = sys.stdin.readline().strip() adapter.password = getpass.getpass(prompt='Password: ') #print("Login: `%s`, Password: `%s`" % (adapter.username, adapter.password)) adapter.getStoryMetadataOnly() + except exceptions.AdultCheckRequired: + print "Please confirm you are an adult in your locale: (y/n)?" + if sys.stdin.readline().strip().lower().startswith('y'): + adapter.is_adult=True + adapter.getStoryMetadataOnly() if options.metaonly: print adapter.getStoryMetadataOnly() From 9b2450a609654b00cf1d63469d6915cb73579eed Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 9 May 2011 21:20:06 -0500 Subject: [PATCH 146/482] Support for twiwrite.net, potionsandsnitches.com & harrypotterfanfiction.com. --- .../adapters/adapter_adastrafanficcom.py | 2 +- .../adapters/adapter_ficwadcom.py | 16 +- .../adapter_harrypotterfanfictioncom.py | 180 +++++++++++++++ .../adapters/adapter_potionsandsnitchesnet.py | 168 ++++++++++++++ .../adapters/adapter_twiwritenet.py | 208 ++++++++++++++++++ main.py | 2 +- 6 files changed, 566 insertions(+), 10 deletions(-) create mode 100644 fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py create mode 100644 fanficdownloader/adapters/adapter_potionsandsnitchesnet.py create mode 100644 fanficdownloader/adapters/adapter_twiwritenet.py diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py index 119ef626..3f38079c 100644 --- a/fanficdownloader/adapters/adapter_adastrafanficcom.py +++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py @@ -107,7 +107,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter): if 'Summary' in label: ## Everything until the next span class='label' - svalue = str(value) + svalue = '' while not defaultGetattr(value,'class') == 'label': svalue += str(value) value = value.nextSibling diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py index 475d942a..27324a02 100644 --- a/fanficdownloader/adapters/adapter_ficwadcom.py +++ b/fanficdownloader/adapters/adapter_ficwadcom.py @@ -129,36 +129,36 @@ class FicwadComSiteAdapter(BaseSiteAdapter): ## perhaps not the most efficient way to parse this, using ## regexps for each rather than something more complex, but ## IMO, it's more readable and amenable to change. - metapstr = stripHTML(str(metap)).replace('\n',' ').replace('\t','') - #print "metap: (%s)"%metapstr + metastr = stripHTML(str(metap)).replace('\n',' ').replace('\t','') + #print "metap: (%s)"%metastr - m = re.match(r".*?Rating: (.+?) -.*?",metapstr) + m = re.match(r".*?Rating: (.+?) -.*?",metastr) if m: self.story.setMetadata('rating', m.group(1)) - m = re.match(r".*?Genres: (.+?) -.*?",metapstr) + m = re.match(r".*?Genres: (.+?) -.*?",metastr) if m: for g in m.group(1).split(','): self.story.addToList('genre',g) - m = re.match(r".*?Published: ([0-9/]+?) -.*?",metapstr) + m = re.match(r".*?Published: ([0-9/]+?) -.*?",metastr) if m: self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(\ time.mktime(time.strptime(m.group(1), "%Y/%m/%d")))) # Updated can have more than one space after it. - m = re.match(r".*?Updated: ([0-9/]+?) +-.*?",metapstr) + m = re.match(r".*?Updated: ([0-9/]+?) +-.*?",metastr) if m: self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(\ time.mktime(time.strptime(m.group(1), "%Y/%m/%d")))) - m = re.match(r".*? - ([0-9/]+?) words.*?",metapstr) + m = re.match(r".*? - ([0-9/]+?) words.*?",metastr) if m: self.story.setMetadata('numWords',m.group(1)) - if metapstr.endswith("Complete"): + if metastr.endswith("Complete"): self.story.setMetadata('status', 'Completed') else: self.story.setMetadata('status', 'In-Progress') diff --git a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py new file mode 100644 index 00000000..2efd2720 --- /dev/null +++ b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- + +import time +import datetime +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup + +class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','hp') + self.decode = "ISO-8859-1" + self.story.addToList("category","Harry Potter") + self.is_adult=False + + # get storyId from url--url validation guarantees query is only psid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.harrypotterfanfiction.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.harrypotterfanfiction.com','harrypotterfanfiction.com'] + + def getSiteExampleURLs(self): + return "http://www.harrypotterfanfiction.com/viewstory.php?psid=1234 http://harrypotterfanfiction.com/viewstory.php?psid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only.' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database." in data: + return True + else: + return False + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'\?psid='+self.story.getMetadata('storyId'))) + #print "title a:%s"%a + self.story.setMetadata('title',a.string) + ## javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?psid=290995' + if "This story may contain adult themes." in a['href'] and not (self.is_adult or self.getConfig("is_adult")): + raise exceptions.AdultCheckRequired(self.url) + + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?showuid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + ## hpcom doesn't give us total words--but it does give + ## us words/chapter. I'd rather add than fetch and + ## parse another page. + words=0 + for tr in soup.find('table',{'class':'text'}).findAll('tr'): + tdstr = tr.findAll('td')[2].string + if tdstr and tdstr.isdigit(): + words+=int(tdstr) + self.story.setMetadata('numWords',str(words)) + + # Find the chapters: + tablelist = soup.find('table',{'class':'text'}) + for chapter in tablelist.findAll('a', href=re.compile(r'\?chapterid=\d+')): + #javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?chapterid=433441&i=1' + # just in case there's tags, like in chapter titles. + chpt=re.sub(r'^.*?(\?chapterid=433441).*?',r'\1',chapter['href']) + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/viewstory.php'+chpt)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Finding the metadata is a bit of a pain. Desc is the only thing this color. + desctable= soup.find('table',{'bgcolor':'#f0e8e8'}) + self.story.setMetadata('description',stripHTML(desctable)) + + ## Finding the metadata is a bit of a pain. Most of the meta + ## data is in a center.table without a bgcolor. + for center in soup.findAll('center'): + table = center.find('table',{'bgcolor':None}) + if table: + metastr = stripHTML(str(table)).replace('\n',' ').replace('\t',' ') + # Rating: 12+ Story Reviews: 3 + # Chapters: 3 + # Characters: Andromeda, Ted, Bellatrix, R. Lestrange, Lucius, Narcissa, OC + # Genre(s): Fluff, Romance, Young Adult Era: OtherPairings: Other Pairing, Lucius/Narcissa + # Status: Completed + # First Published: 2010.09.02 + # Last Published Chapter: 2010.09.28 + # Last Updated: 2010.09.28 + # Favorite Story Of: 1 users + # Warnings: Scenes of a Mild Sexual Nature + + m = re.match(r".*?Status: Completed.*?",metastr) + if m: + self.story.setMetadata('status','Completed') + else: + self.story.setMetadata('status','In-Progress') + + m = re.match(r".*?Rating: (.+?) Story Reviews.*?",metastr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genre\(s\): (.+?) Era.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Warnings: (.+).*?",metastr) + if m: + for w in m.group(1).split(','): + if w != 'Now Warnings': + self.story.addToList('warnings',w) + + m = re.match(r".*?First Published: ([0-9\.]+).*?",metastr) + if m: + self.story.setMetadata('datePublished', + datetime.datetime.fromtimestamp(\ + time.mktime(time.strptime(m.group(1), "%Y.%m.%d")))) + + # Updated can have more than one space after it. + m = re.match(r".*?Last Updated: ([0-9\.]+).*?",metastr) + if m: + self.story.setMetadata('dateUpdated', + datetime.datetime.fromtimestamp(\ + time.mktime(time.strptime(m.group(1), "%Y.%m.%d")))) + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'fluidtext'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return HarryPotterFanFictionComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py new file mode 100644 index 00000000..197f2336 --- /dev/null +++ b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- + +import time +import datetime +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup + +class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','pns') + self.decode = "utf8" + self.story.addToList("category","Harry Potter") + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.potionsandsnitches.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.potionsandsnitches.net','potionsandsnitches.net'] + + def getSiteExampleURLs(self): + return "http://www.potionsandsnitches.net/fanfiction/viewstory.php?sid=1234 http://potionsandsnitches.net/fanfiction/viewstory.php?sid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("potionsandsnitches.net/fanfiction/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only.' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database." in data: + return True + else: + return False + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/fanfiction/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfiction/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## + ## Summary, strangely, is in the content attr of a tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
    etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next div class='listbox' + svalue = "" + while not defaultGetattr(value,'class') == 'listbox': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(time.mktime(time.strptime(stripHTML(value), "%b %d %Y")))) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(time.mktime(time.strptime(stripHTML(value), "%b %d %Y")))) + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return PotionsAndSnitchesNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_twiwritenet.py b/fanficdownloader/adapters/adapter_twiwritenet.py new file mode 100644 index 00000000..18b83b02 --- /dev/null +++ b/fanficdownloader/adapters/adapter_twiwritenet.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- + +import time +import datetime +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup + +class TwiwriteNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','twrt') + self.decode = "ISO-8859-1" + self.story.addToList("category","Twilight") + self.username = "NoneGiven" # if left empty, twiwrite.net doesn't return any message at all. + self.password = "" + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.twiwrite.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.twiwrite.net','twiwrite.net'] + + def getSiteExampleURLs(self): + return "http://www.twiwrite.net/viewstory.php?sid=1234 http://twiwrite.net/viewstory.php?sid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("twiwrite.net/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database." in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## + ## Summary, strangely, is in the content attr of a tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
    etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=8')) + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warning',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%B %d, %Y")))) + + if 'Updated' in label: + # there's a stray [ at the end. + value = value[0:-1] + self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%B %d, %Y")))) + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return TwiwriteNetSiteAdapter + diff --git a/main.py b/main.py index 39d84ea2..f34a8256 100644 --- a/main.py +++ b/main.py @@ -316,7 +316,7 @@ class FanfictionDownloader(UserConfigServer): download.failure = str(e) download.put() logging.debug('Need to Login, display log in page') - login= ( e is exceptions.FailedToLogin ) + login= ( isinstance(e, exceptions.FailedToLogin) ) template_values = dict(nickname = user.nickname(), url = url, format = format, From 6d2f6577292e9ff738b884d1b6935662e77a3e74 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 10 May 2011 13:26:34 -0500 Subject: [PATCH 147/482] Support for thewriterscoffeeshop.com, allow both login/pass and Are Adult in one site. --- .../adapter_harrypotterfanfictioncom.py | 4 +- .../adapters/adapter_potionsandsnitchesnet.py | 4 +- fanficdownloader/adapters/adapter_test1.py | 8 +- .../adapter_thewriterscoffeeshopcom.py | 201 ++++++++++++++++++ .../adapters/adapter_twilightednet.py | 14 +- .../adapters/adapter_twiwritenet.py | 2 +- index.html | 9 +- login.html | 4 +- main.py | 9 +- 9 files changed, 232 insertions(+), 23 deletions(-) create mode 100644 fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py diff --git a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py index 2efd2720..4f3de5e9 100644 --- a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py +++ b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py @@ -45,9 +45,9 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): return re.escape("http://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$" def needToLoginCheck(self, data): - if 'Registered Users Only.' in data \ + if 'Registered Users Only' in data \ or 'There is no such account on our website' in data \ - or "That password doesn't match the one in our database." in data: + or "That password doesn't match the one in our database" in data: return True else: return False diff --git a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py index 197f2336..7fb7e637 100644 --- a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py +++ b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py @@ -44,9 +44,9 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): return re.escape("http://")+r"(www\.)?"+re.escape("potionsandsnitches.net/fanfiction/viewstory.php?sid=")+r"\d+$" def needToLoginCheck(self, data): - if 'Registered Users Only.' in data \ + if 'Registered Users Only' in data \ or 'There is no such account on our website' in data \ - or "That password doesn't match the one in our database." in data: + or "That password doesn't match the one in our database" in data: return True else: return False diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index d2cb6457..c6d925eb 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -43,8 +43,11 @@ class TestSiteAdapter(BaseSiteAdapter): if self.story.getMetadata('storyId') == '668' and self.username != "Me" : raise exceptions.FailedToLogin(self.url,self.username) - - self.story.setMetadata(u'title',"Test Story Title "+self.crazystring) + + if self.story.getMetadata('storyId') == '664': + self.story.setMetadata(u'title',"Test Story Title "+self.crazystring) + else: + self.story.setMetadata(u'title',"Test Story Title") self.story.setMetadata('storyUrl',self.url) self.story.setMetadata('description',u'Description '+self.crazystring+u''' Done @@ -92,6 +95,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"

    Prologue

    This is a fake adapter for testing purposes. Different storyId's will give different errors:

    +

    http://test1.com?sid=664 - Crazy string title

    http://test1.com?sid=665 - raises AdultCheckRequired

    http://test1.com?sid=666 - raises StoryDoesNotExist

    http://test1.com?sid=667 - raises FailedToDownload on chapter 1

    diff --git a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py new file mode 100644 index 00000000..18cc9d1b --- /dev/null +++ b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py @@ -0,0 +1,201 @@ +# -*- coding: utf-8 -*- + +import time +import datetime +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup + +class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','twcs') + self.decode = "ISO-8859-1" + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/library/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.thewriterscoffeeshop.com' + + @classmethod + def getAcceptDomains(cls): + return [cls.getSiteDomain()] + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/library/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/library/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/library/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&ageconsent=ok&warning=3" + else: + addurl="" + + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Age Consent Required" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/library/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/library/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
    etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(time.mktime(time.strptime(stripHTML(value), "%B %d, %Y")))) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(time.mktime(time.strptime(stripHTML(value), "%B %d, %Y")))) + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return TheWritersCoffeeShopComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index 6a654e46..ec1b5140 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -20,7 +20,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): self.story.setMetadata('siteabbrev','tw') self.decode = "utf8" self.story.addToList("category","Twilight") - self.username = "NoneGiven" # if left empty, twilighted.net doesn't return any message at all. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. self.password = "" # get storyId from url--url validation guarantees query is only sid=1234 @@ -46,9 +46,9 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): return re.escape("http://")+r"(www\.)?"+re.escape("twilighted.net/viewstory.php?sid=")+r"\d+$" def needToLoginCheck(self, data): - if 'Registered Users Only.' in data \ + if 'Registered Users Only' in data \ or 'There is no such account on our website' in data \ - or "That password doesn't match the one in our database." in data: + or "That password doesn't match the one in our database" in data: return True else: return False @@ -120,14 +120,6 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): self.story.setMetadata('numChapters',len(self.chapterUrls)) - ## - ## Summary, strangely, is in the content attr of a tag - ## which is escaped HTML. Unfortunately, we can't use it because they don't - ## escape (') chars in the desc, breakin the tag. - #meta_desc = soup.find('meta',{'name':'description'}) - #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) - #self.story.setMetadata('description',stripHTML(metasoup)) - def defaultGetattr(d,k): try: return d[k] diff --git a/fanficdownloader/adapters/adapter_twiwritenet.py b/fanficdownloader/adapters/adapter_twiwritenet.py index 18b83b02..c9174fda 100644 --- a/fanficdownloader/adapters/adapter_twiwritenet.py +++ b/fanficdownloader/adapters/adapter_twiwritenet.py @@ -48,7 +48,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter): def needToLoginCheck(self, data): if 'Registered Users Only' in data \ or 'There is no such account on our website' in data \ - or "That password doesn't match the one in our database." in data: + or "That password doesn't match the one in our database" in data: return True else: return False diff --git a/index.html b/index.html index e7574a52..0376cfa8 100644 --- a/index.html +++ b/index.html @@ -56,10 +56,10 @@ This version is a new re-org/re-write of the code.

    - So far, the only sites supported are: fanfiction.net, fictionalley.com, ficwad.com, twilighted.net and whofic.com. + fictionalley.org and mediaminer.org aren't support yet.

    - Login/Password is asked for when required now. + Login/Password is asked for when required now, as is 'Are you an Adult?' where required.

    Mobi support (for Kindle) is only via EPub conversion in this version. @@ -164,6 +164,11 @@ Use the URL of the story's chapter list, such as
    http://www.whofic.com/viewstory.php?sid=16334.

    +
    thewriterscoffeeshop.com
    +
    + Use the URL of the story's chapter list, such as +
    http://www.thewriterscoffeeshop.com/library/viewstory.php?sid=2110. +
    diff --git a/login.html b/login.html index 2d1240f8..a36d9193 100644 --- a/login.html +++ b/login.html @@ -49,7 +49,7 @@
    - {% if login %} + {% if is_login %}

    Login and Password

    @@ -69,6 +69,8 @@ {% else %} + +
    Are you an Adult?
    diff --git a/main.py b/main.py index f34a8256..c93c9afb 100644 --- a/main.py +++ b/main.py @@ -316,14 +316,19 @@ class FanfictionDownloader(UserConfigServer): download.failure = str(e) download.put() logging.debug('Need to Login, display log in page') - login= ( isinstance(e, exceptions.FailedToLogin) ) + is_login= ( isinstance(e, exceptions.FailedToLogin) ) template_values = dict(nickname = user.nickname(), url = url, format = format, site = adapter.getSiteDomain(), fic = download, - login=login, + is_login=is_login, ) + # thewriterscoffeeshop.com can do adult check *and* user required. + if isinstance(e,exceptions.AdultCheckRequired): + template_values['login']=login + template_values['password']=password + path = os.path.join(os.path.dirname(__file__), 'login.html') self.response.out.write(template.render(path, template_values)) return From 756b6944fccd857043dd37bfb3d5f4eaa90e3bd6 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 11 May 2011 12:29:59 -0500 Subject: [PATCH 148/482] Support for mediaminer.org, update copyrights. --- defaults.ini | 20 +- editconfig.html | 2 +- fanficdownloader/adapters/__init__.py | 15 ++ .../adapters/adapter_adastrafanficcom.py | 15 ++ .../adapters/adapter_fanfictionnet.py | 15 ++ .../adapters/adapter_fictionpresscom.py | 15 ++ .../adapters/adapter_ficwadcom.py | 17 +- .../adapter_harrypotterfanfictioncom.py | 15 ++ .../adapters/adapter_mediaminerorg.py | 195 ++++++++++++++++++ .../adapters/adapter_potionsandsnitchesnet.py | 15 ++ fanficdownloader/adapters/adapter_test1.py | 15 ++ .../adapter_thewriterscoffeeshopcom.py | 15 ++ .../adapters/adapter_twilightednet.py | 15 ++ .../adapters/adapter_twiwritenet.py | 15 ++ .../adapters/adapter_whoficcom.py | 15 ++ fanficdownloader/adapters/base_adapter.py | 15 ++ fanficdownloader/configurable.py | 15 ++ fanficdownloader/exceptions.py | 17 ++ fanficdownloader/htmlcleanup.py | 15 ++ fanficdownloader/story.py | 15 ++ fanficdownloader/writers/__init__.py | 15 ++ fanficdownloader/writers/base_writer.py | 15 ++ fanficdownloader/writers/writer_epub.py | 15 ++ fanficdownloader/writers/writer_html.py | 15 ++ fanficdownloader/writers/writer_txt.py | 15 ++ ffstorage.py | 15 ++ index.html | 5 +- login.html | 2 +- main.py | 1 + newdownload.py | 15 ++ status.html | 2 +- utils/remover.py | 17 +- 32 files changed, 600 insertions(+), 8 deletions(-) create mode 100644 fanficdownloader/adapters/adapter_mediaminerorg.py diff --git a/defaults.ini b/defaults.ini index 307ce2c9..310008bc 100644 --- a/defaults.ini +++ b/defaults.ini @@ -1,3 +1,18 @@ +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + [defaults] ## [defaults] section applies to all formats and sites but may be @@ -99,7 +114,10 @@ zip_output: false ## entries to make epub subject tags ## lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d" include_subject_tags: extratags, genre, category, lastupdate, status -#include_tocpage: false + +## epub carries the TOC in metadata. +## mobi generated from epub will have a TOC at the end. +include_tocpage: false ## epub->mobi conversions typically don't like tables. titlepage_use_table: false diff --git a/editconfig.html b/editconfig.html index 942ad330..1eb369d3 100644 --- a/editconfig.html +++ b/editconfig.html @@ -68,7 +68,7 @@ alt="Powered by Google App Engine" />

    FanfictionLoader is a web front-end to fanficdownloader
    - Copyright © Roman Kirillov + Copyright © Fanficdownloader team
    diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py index 63254b4d..8213de9e 100644 --- a/fanficdownloader/adapters/__init__.py +++ b/fanficdownloader/adapters/__init__.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import os, sys, glob from os.path import dirname, basename, normpath import logging diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py index 3f38079c..adb4ac93 100644 --- a/fanficdownloader/adapters/adapter_adastrafanficcom.py +++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import time import datetime import logging diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index c0419953..3b96f547 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import time import datetime import logging diff --git a/fanficdownloader/adapters/adapter_fictionpresscom.py b/fanficdownloader/adapters/adapter_fictionpresscom.py index 051ade92..16d35e7e 100644 --- a/fanficdownloader/adapters/adapter_fictionpresscom.py +++ b/fanficdownloader/adapters/adapter_fictionpresscom.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import time import datetime import logging diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py index 27324a02..eea4a540 100644 --- a/fanficdownloader/adapters/adapter_ficwadcom.py +++ b/fanficdownloader/adapters/adapter_ficwadcom.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import time import datetime import logging @@ -129,7 +144,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter): ## perhaps not the most efficient way to parse this, using ## regexps for each rather than something more complex, but ## IMO, it's more readable and amenable to change. - metastr = stripHTML(str(metap)).replace('\n',' ').replace('\t','') + metastr = stripHTML(str(metap)).replace('\n',' ').replace('\t',' ') #print "metap: (%s)"%metastr m = re.match(r".*?Rating: (.+?) -.*?",metastr) diff --git a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py index 4f3de5e9..937c4278 100644 --- a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py +++ b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import time import datetime import logging diff --git a/fanficdownloader/adapters/adapter_mediaminerorg.py b/fanficdownloader/adapters/adapter_mediaminerorg.py new file mode 100644 index 00000000..68c68d11 --- /dev/null +++ b/fanficdownloader/adapters/adapter_mediaminerorg.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import datetime +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup + +class MediaMinerOrgSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','mm') + self.decode = "utf8" + + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('storyId',m.group('id')) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfic/view_st.php/'+self.story.getMetadata('storyId')) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + @staticmethod + def getSiteDomain(): + return 'www.mediaminer.org' + + @classmethod + def getAcceptDomains(cls): + return [cls.getSiteDomain()] + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/fanfic/view_st.php/123456 http://"+self.getSiteDomain()+"/fanfic/view_ch.php/1234123/123444#fic_c" + + def getSiteURLPattern(self): + ## http://www.mediaminer.org/fanfic/view_st.php/76882 + ## http://www.mediaminer.org/fanfic/view_ch.php/167618/594087#fic_c + return re.escape("http://"+self.getSiteDomain())+\ + "/fanfic/view_(st|ch)\.php/"+r"(?P\d+)(/\d+#fic_c)?$" + + def extractChapterUrlsAndMetadata(self): + + url = self.url + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + title = soup.find('title').string + ## MediaMiner - Fan Fic: Par Tout Autre Nom + ## MediaMiner: Question and Answer ( One-Shot ) + ## MediaMiner: Moaning to Wake the Dead ( Chapter 1 ) + title = re.match(r'^MediaMiner(?: - Fan Fic)?:(.*?)(?: \( .*? \))?$',title).group(1) + + # [ A - All Readers ], strip '[ ' ' ]' + rating = soup.find("font",{"class":"smtxt"}).string[1:-1] + self.story.setMetadata('title',title) + self.story.setMetadata('rating',rating) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"/fanfic/src.php/u/\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[-1]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + # save date from first for later. + firstdate=None + + # Find the chapters + select = soup.find('select',{'name':'cid'}) + if not select: + self.chapterUrls.append((title,self.url)) + else: + for option in select.findAll("option"): + chapter = stripHTML(option.string) + ## chapter can be: Chapter 7 [Jan 23, 2011] + ## or: Vigilant Moonlight ( Chapter 1 ) [Jan 30, 2004] + ## or even: Prologue ( Prologue ) [Jul 31, 2010] + m = re.match(r'^(.*?) (\( .*? \))? \[(.*?)\]$',chapter) + chapter = m.group(1) + # save date from first for later. + if not firstdate: + firstdate = m.group(3) + self.chapterUrls.append((chapter,'http://'+self.host+'/fanfic/view_ch.php/'+self.story.getMetadata('storyId')+'/'+option['value'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # category + # Ranma 1/2 + for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/a/")): + self.story.addToList('category',a.string) + + # genre + # Ranma 1/2 + for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/g/")): + self.story.addToList('genre',a.string) + + # if firstdate, then the block below will only have last updated. + if firstdate: + self.story.setMetadata('datePublished', + datetime.datetime.fromtimestamp(time.mktime(time.strptime(firstdate, "%b %d, %Y")))) + # Everything else is in
    + + metastr = stripHTML(soup.find("tr",{"bgcolor":"#EEEED4"})).replace('\n',' ').replace('\r',' ').replace('\t',' ') + print metastr + # Latest Revision: August 03, 2010 + m = re.match(r".*?(?:Latest Revision|Uploaded On): ([a-zA-Z]+ \d\d, \d\d\d\d)",metastr) + if m: + self.story.setMetadata('dateUpdated', + datetime.datetime.fromtimestamp(time.mktime(time.strptime(m.group(1), "%B %d, %Y")))) + if not firstdate: + self.story.setMetadata('datePublished', + self.story.getMetadataRaw('dateUpdated')) + + else: + self.story.setMetadata('dateUpdated', + self.story.getMetadataRaw('datePublished')) + + # Words: 123456 + m = re.match(r".*?\| Words: (\d+) \|",metastr) + if m: + self.story.setMetadata('numWords', m.group(1)) + + # Summary: .... + m = re.match(r".*?Summary: (.*)$",metastr) + if m: + self.story.setMetadata('description', m.group(1)) + + # completed + m = re.match(r".*?Status: Completed.*?",metastr) + if m: + self.story.setMetadata('status','Completed') + else: + self.story.setMetadata('status','In-Progress') + + return + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + anchor = soup.find('a',{'name':'fic_c'}) + + if None == anchor: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + for div in anchor.findAllNext('div',{'align':'left'}): + div.name='p' # convert to

    mediaminer uses div with a + # margin for paragraphs. + anchor.append(div) # cheat! stuff all the content divs + # into anchor just as a holder. + + return utf8FromSoup(anchor) + +def getClass(): + return MediaMinerOrgSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py index 7fb7e637..aa7d074b 100644 --- a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py +++ b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import time import datetime import logging diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index c6d925eb..369de457 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import datetime import logging diff --git a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py index 18cc9d1b..4a6cbc43 100644 --- a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py +++ b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import time import datetime import logging diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index ec1b5140..eb79ee47 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import time import datetime import logging diff --git a/fanficdownloader/adapters/adapter_twiwritenet.py b/fanficdownloader/adapters/adapter_twiwritenet.py index c9174fda..d3e3f67b 100644 --- a/fanficdownloader/adapters/adapter_twiwritenet.py +++ b/fanficdownloader/adapters/adapter_twiwritenet.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import time import datetime import logging diff --git a/fanficdownloader/adapters/adapter_whoficcom.py b/fanficdownloader/adapters/adapter_whoficcom.py index b25a9785..89a35b0c 100644 --- a/fanficdownloader/adapters/adapter_whoficcom.py +++ b/fanficdownloader/adapters/adapter_whoficcom.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import time import datetime import logging diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index a4fb7290..f44e35b7 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import re import datetime import time diff --git a/fanficdownloader/configurable.py b/fanficdownloader/configurable.py index 2825b035..bc27a82f 100644 --- a/fanficdownloader/configurable.py +++ b/fanficdownloader/configurable.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import ConfigParser # All of the writers(epub,html,txt) and adapters(ffnet,twlt,etc) diff --git a/fanficdownloader/exceptions.py b/fanficdownloader/exceptions.py index 4b316442..cf8e558e 100644 --- a/fanficdownloader/exceptions.py +++ b/fanficdownloader/exceptions.py @@ -1,3 +1,20 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + ## A few exceptions for different things for adapters class FailedToDownload(Exception): diff --git a/fanficdownloader/htmlcleanup.py b/fanficdownloader/htmlcleanup.py index 8ff3722a..7e91f190 100644 --- a/fanficdownloader/htmlcleanup.py +++ b/fanficdownloader/htmlcleanup.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import re def _unirepl(match): diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index 4c7c0c81..55d31836 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + from htmlcleanup import conditionalRemoveEntities class Story: diff --git a/fanficdownloader/writers/__init__.py b/fanficdownloader/writers/__init__.py index 28575e0d..19b4f42e 100644 --- a/fanficdownloader/writers/__init__.py +++ b/fanficdownloader/writers/__init__.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + ## This could (should?) use a dynamic loader like adapters, but for ## now, it's static, since there's so few of them. diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index 64dcd4a7..fc014507 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import re import os.path import string diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index d50a5de6..7a23ce97 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import logging import string import StringIO diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py index 378fb5b7..b8a60f1c 100644 --- a/fanficdownloader/writers/writer_html.py +++ b/fanficdownloader/writers/writer_html.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import logging import string diff --git a/fanficdownloader/writers/writer_txt.py b/fanficdownloader/writers/writer_txt.py index 6b5eaaec..6cbeba31 100644 --- a/fanficdownloader/writers/writer_txt.py +++ b/fanficdownloader/writers/writer_txt.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import logging import string from textwrap import wrap diff --git a/ffstorage.py b/ffstorage.py index a196487b..0f37d29a 100644 --- a/ffstorage.py +++ b/ffstorage.py @@ -1,3 +1,18 @@ +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + from google.appengine.ext import db class DownloadMeta(db.Model): diff --git a/index.html b/index.html index 0376cfa8..d6ce7712 100644 --- a/index.html +++ b/index.html @@ -56,7 +56,7 @@ This version is a new re-org/re-write of the code.

    - fictionalley.org and mediaminer.org aren't support yet. + fictionalley.org still isn't support yet.

    Login/Password is asked for when required now, as is 'Are you an Adult?' where required. @@ -153,6 +153,7 @@
    http://www.mediaminer.org/fanfic/view_st.php/166653. Or the story URL for one-shots, such as
    http://www.mediaminer.org/fanfic/view_st.php/167618. +
    http://www.mediaminer.org/fanfic/view_ch.php/1234123/123444#fic_c

    adastrafanfic.com
    @@ -207,7 +208,7 @@ alt="Powered by Google App Engine" />

    FanfictionLoader is a web front-end to fanficdownloader
    - Copyright © Roman Kirillov + Copyright © Fanficdownloader team
    diff --git a/login.html b/login.html index a36d9193..bd316b9d 100644 --- a/login.html +++ b/login.html @@ -89,7 +89,7 @@ alt="Powered by Google App Engine" />

    FanfictionLoader is a web front-end to fanficdownloader
    - Copyright © Roman Kirillov + Copyright © Fanficdownloader team
    diff --git a/main.py b/main.py index c93c9afb..0aad9b5f 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # # Copyright 2007 Google Inc. +# Copyright 2011 Fanficdownloader team # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/newdownload.py b/newdownload.py index 46dc73dd..ee2adb00 100644 --- a/newdownload.py +++ b/newdownload.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import logging logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") diff --git a/status.html b/status.html index b8c22a57..2fb5805b 100644 --- a/status.html +++ b/status.html @@ -71,7 +71,7 @@ alt="Powered by Google App Engine" />

    FanfictionLoader is a web front-end to fanficdownloader
    - Copyright © Roman Kirillov + Copyright © Fanficdownloader team
    diff --git a/utils/remover.py b/utils/remover.py index d81fe85f..5162f1b2 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -1,10 +1,25 @@ #!/usr/bin/env python # encoding: utf-8 +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + """ remover.py Created by Roman on 2010-06-20. -Copyright (c) 2010 __MyCompanyName__. All rights reserved. +Copyright 2011 Fanficdownloader team """ import datetime From a88d4cac5090f0a0605d98dfece0beaa07ec3043 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 11 May 2011 22:44:18 -0500 Subject: [PATCH 149/482] Support for fictionalley.org, add feature to not overwrite files newer than last update, fix to config section precedence, consolidate date parsing a bit. --- css/index.css | 4 +- defaults.ini | 50 ++++ .../adapters/adapter_adastrafanficcom.py | 7 +- .../adapters/adapter_fanfictionnet.py | 9 +- .../adapters/adapter_fictionalleyorg.py | 227 ++++++++++++++++++ .../adapters/adapter_fictionpresscom.py | 9 +- .../adapters/adapter_ficwadcom.py | 11 +- .../adapter_harrypotterfanfictioncom.py | 11 +- .../adapters/adapter_mediaminerorg.py | 9 +- .../adapters/adapter_potionsandsnitchesnet.py | 7 +- fanficdownloader/adapters/adapter_test1.py | 6 +- .../adapter_thewriterscoffeeshopcom.py | 7 +- .../adapters/adapter_twilightednet.py | 7 +- .../adapters/adapter_twiwritenet.py | 7 +- .../adapters/adapter_whoficcom.py | 7 +- fanficdownloader/adapters/base_adapter.py | 4 +- fanficdownloader/writers/base_writer.py | 30 ++- index.html | 12 +- main.py | 4 +- newdownload.py | 5 +- 20 files changed, 349 insertions(+), 84 deletions(-) create mode 100644 fanficdownloader/adapters/adapter_fictionalleyorg.py diff --git a/css/index.css b/css/index.css index 36c22034..d77f4246 100644 --- a/css/index.css +++ b/css/index.css @@ -5,8 +5,8 @@ body #main { - width: 43%; - margin-left: 23%; + width: 60%; + margin-left: 20%; background-color: #dae6ff; padding: 2em; } diff --git a/defaults.ini b/defaults.ini index 310008bc..8c26d9d7 100644 --- a/defaults.ini +++ b/defaults.ini @@ -77,6 +77,10 @@ include_tocpage: true output_filename: ${title}-${siteabbrev}_${storyId}${formatext} ## Make directories as needed. make_directories: true +## Always overwrite output files. Otherwise, the downloader checks +## the timestamp on the existing file and only overwrites if the story +## has been updated more recently. Command line version only +#always_overwrite: true ## put output (with output_filename) in a zip file zip_filename. zip_output: false @@ -164,14 +168,60 @@ extratags: #username:YourName #password:yourpassword +[www.twiwrite.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + [www.whofic.com] +[www.mediaminer.org] + +[www.thewriterscoffeeshop.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[www.ficwad.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + [www.adastrafanfic.com] ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. #is_adult:true +[www.fictionalley.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## fictionally.org storyIds are not unique. Combine with authorId. +output_filename: ${title}-${siteabbrev}_${authorId}_${storyId}${formatext} + +[www.harrypotterfanfiction.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + [overrides] ## It may sometimes be useful to override all of the specific format, ## site and site:format sections in your private configuration. For diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py index adb4ac93..9cdbe637 100644 --- a/fanficdownloader/adapters/adapter_adastrafanficcom.py +++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib @@ -26,7 +25,7 @@ import fanficdownloader.BeautifulSoup as bs from fanficdownloader.htmlcleanup import stripHTML import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class AdAstraFanficComSiteAdapter(BaseSiteAdapter): @@ -161,12 +160,12 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter): self.story.setMetadata('status', 'In-Progress') if 'Published' in label: - self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%m/%d/%Y")))) + self.story.setMetadata('datePublished', makeDate(value.strip(), "%m/%d/%Y")) if 'Updated' in label: # there's a stray [ at the end. #value = value[0:-1] - self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%m/%d/%Y")))) + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%m/%d/%Y")) def getChapterText(self, url): diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index 3b96f547..39afe38b 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib2 @@ -25,7 +24,7 @@ import time import fanficdownloader.BeautifulSoup as bs import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class FanFictionNetSiteAdapter(BaseSiteAdapter): @@ -111,11 +110,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): if 'summary' in var: self.story.setMetadata('description', value) if 'datep' in var: - self.story.setMetadata('datePublished', - datetime.datetime.fromtimestamp(time.mktime(time.strptime(value, '%m-%d-%y')))) + self.story.setMetadata('datePublished',makeDate(value, '%m-%d-%y')) if 'dateu' in var: - self.story.setMetadata('dateUpdated', - datetime.datetime.fromtimestamp(time.mktime(time.strptime(value, '%m-%d-%y')))) + self.story.setMetadata('dateUpdated',makeDate(value, '%m-%d-%y')) if 'cat_title' in var: if "Crossover" in value: value = re.sub(r' Crossover$','',value) diff --git a/fanficdownloader/adapters/adapter_fictionalleyorg.py b/fanficdownloader/adapters/adapter_fictionalleyorg.py new file mode 100644 index 00000000..f7c06ba1 --- /dev/null +++ b/fanficdownloader/adapters/adapter_fictionalleyorg.py @@ -0,0 +1,227 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fa') + self.decode = "ISO-8859-1" ## fa *lies*. It claims to be UTF8 in the headers, but it isn't. + self.story.addToList("category","Harry Potter") + self.is_adult=False + + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('authorId',m.group('auth')) + self.story.setMetadata('storyId',m.group('id')) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + # normalized story URL. + self._setURL(url) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + @staticmethod + def getSiteDomain(): + return 'www.fictionalley.org' + + @classmethod + def getAcceptDomains(cls): + return [cls.getSiteDomain()] + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/authors/drt/DA.html http://"+self.getSiteDomain()+"/authors/drt/JOTP01a.html" + + def getSiteURLPattern(self): + # http://www.fictionalley.org/authors/drt/DA.html + # http://www.fictionalley.org/authors/drt/JOTP01a.html + return re.escape("http://"+self.getSiteDomain())+"/authors/(?P[a-zA-Z0-9_]+)/(?P[a-zA-Z0-9_]+)\.html" + + def _postFetchWithIAmOld(self,url): + if self.is_adult or self.getConfig("is_adult"): + params={'iamold':'Yes', + 'action':'ageanswer'} + logging.info("Attempting to get cookie for %s" % url) + ## posting on list doesn't work, but doesn't hurt, either. + data = self._postUrl(url,params) + else: + data = self._fetchUrl(url) + return data + + def extractChapterUrlsAndMetadata(self): + + ## could be either chapter list page or one-shot text page. + url = self.url + logging.debug("URL: "+url) + + try: + data = self._postFetchWithIAmOld(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + chapterdata = data + # If chapter list page, get the first chapter to look for adult check + chapterlinklist = soup.findAll('a',{'class':'chapterlink'}) + if chapterlinklist: + chapterdata = self._postFetchWithIAmOld(chapterlinklist[0]['href']) + + if "Are you over seventeen years old" in chapterdata: + raise exceptions.AdultCheckRequired(self.url) + + if not chapterlinklist: + # no chapter list, chapter URL: change to list link. + # second a tag inside div breadcrumbs + storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1] + self._setURL(storya['href']) + url=self.url + logging.debug("Normalizing to URL: "+url) + ## title's right there... + self.story.setMetadata('title',storya.string) + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(data) + chapterlinklist = soup.findAll('a',{'class':'chapterlink'}) + else: + ## still need title from somewhere. If chapterlinklist, + ## then chapterdata contains a chapter, find title the + ## same way. + chapsoup = bs.BeautifulSoup(chapterdata) + storya = chapsoup.find('div',{'class':'breadcrumbs'}).findAll('a')[1] + self.story.setMetadata('title',storya.string) + del chapsoup + + del chapterdata + + ## authorid already set. + ##

    Just Off The Platform II by DrT

    + authora=soup.find('h1',{'class':'title'}).find('a') + self.story.setMetadata('author',authora.string) + self.story.setMetadata('authorUrl',authora['href']) + + print chapterlinklist + if len(chapterlinklist) == 1: + self.chapterUrls.append((self.story.getMetadata('title'),chapterlinklist[0]['href'])) + else: + # Find the chapters: + for chapter in chapterlinklist: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Go scrape the rest of the metadata from the author's page. + data = self._fetchUrl(self.story.getMetadata('authorUrl')) + soup = bs.BeautifulSoup(data) + + #
    + # [Rid] The Magical Hottiez by Aafro Man Ziegod
    + #
    + # Chaos ensues after Witch Weekly, seeking to increase readers, decides to create a boyband out of five seemingly talentless wizards: Harry Potter, Draco Malfoy, Ron Weasley, Neville Longbottom, and Oliver "Toss Your Knickers Here" Wood.
    + # + #
    + + storya = soup.find('a',{'href':self.story.getMetadata('storyUrl')}) + storydd = storya.findNext('dd') + + # Rating: PG - Spoilers: None - 2525 hits - 736 words + # Genre: Humor - Main character(s): H, R - Ships: None - Era: Multiple Eras + # Harry and Ron are back at it again! They reeeeeeally don't want to be back, because they know what's awaiting them. "VH1 Goes Inside..." is back! Why? 'Cos there are soooo many more couples left to pick on. + # Published: September 25, 2004 (between Order of Phoenix and Half-Blood Prince) - Updated: September 25, 2004 + + ## change to text and regexp find. + metastr = stripHTML(storydd).replace('\n',' ').replace('\t',' ') + + m = re.match(r".*?Rating: (.+?) -.*?",metastr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genre: (.+?) -.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Published: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr) + if m: + self.story.setMetadata('datePublished',makeDate(m.group(1), "%B %d, %Y")) + + m = re.match(r".*?Updated: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr) + if m: + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%B %d, %Y")) + + m = re.match(r".*? (\d+) words Genre.*?",metastr) + if m: + self.story.setMetadata('numWords', m.group(1)) + + for small in storydd.findAll('small'): + small.extract() ## removes the tags, leaving only the summary. + self.story.setMetadata('description',stripHTML(storydd)) + + return + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # find & and + # replaced with matching div pair for easier parsing. + # Yes, it's an evil kludge, but what can ya do? Using + # something other than div prevents soup from pairing + # our div with poor html inside the story text. + data = data.replace('','').replace('','') + + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + body = soup.findAll('body') ## some stories use a nested body and body + ## tag, in which case we don't + ## need crazytagstringnobodywouldstumbleonaccidently + ## and use the second one instead. + if len(body)>1: + text = body[1] + text.name='div' # force to be a div to avoid multiple body tags. + else: + text = soup.find('crazytagstringnobodywouldstumbleonaccidently', {'id' : 'storytext'}) + text.name='div' # change to div tag. + + if not data or not text: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(text) + +def getClass(): + return FictionAlleyOrgSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_fictionpresscom.py b/fanficdownloader/adapters/adapter_fictionpresscom.py index 16d35e7e..578c4863 100644 --- a/fanficdownloader/adapters/adapter_fictionpresscom.py +++ b/fanficdownloader/adapters/adapter_fictionpresscom.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib2 @@ -25,7 +24,7 @@ import time import fanficdownloader.BeautifulSoup as bs import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class FictionPressComSiteAdapter(BaseSiteAdapter): @@ -124,10 +123,8 @@ class FictionPressComSiteAdapter(BaseSiteAdapter): m = re.match(r".*?Published: ([0-9-]+) - Updated: ([0-9-]+).*?",dataline) if m: published,updated = m.groups() - self.story.setMetadata('datePublished', - datetime.datetime.fromtimestamp(time.mktime(time.strptime(published, '%m-%d-%y')))) - self.story.setMetadata('dateUpdated', - datetime.datetime.fromtimestamp(time.mktime(time.strptime(updated, '%m-%d-%y')))) + self.story.setMetadata('datePublished',makeDate(published, '%m-%d-%y')) + self.story.setMetadata('dateUpdated',makeDate(updated, '%m-%d-%y')) # category, genres, then desc. # diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py index eea4a540..791c07c2 100644 --- a/fanficdownloader/adapters/adapter_ficwadcom.py +++ b/fanficdownloader/adapters/adapter_ficwadcom.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib2 @@ -27,7 +26,7 @@ import fanficdownloader.BeautifulSoup as bs import fanficdownloader.exceptions as exceptions from fanficdownloader.htmlcleanup import stripHTML -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class FicwadComSiteAdapter(BaseSiteAdapter): @@ -158,16 +157,12 @@ class FicwadComSiteAdapter(BaseSiteAdapter): m = re.match(r".*?Published: ([0-9/]+?) -.*?",metastr) if m: - self.story.setMetadata('datePublished', - datetime.datetime.fromtimestamp(\ - time.mktime(time.strptime(m.group(1), "%Y/%m/%d")))) + self.story.setMetadata('datePublished',makeDate(m.group(1), "%Y/%m/%d")) # Updated can have more than one space after it. m = re.match(r".*?Updated: ([0-9/]+?) +-.*?",metastr) if m: - self.story.setMetadata('dateUpdated', - datetime.datetime.fromtimestamp(\ - time.mktime(time.strptime(m.group(1), "%Y/%m/%d")))) + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%Y/%m/%d")) m = re.match(r".*? - ([0-9/]+?) words.*?",metastr) if m: diff --git a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py index 937c4278..6f1538d7 100644 --- a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py +++ b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib @@ -26,7 +25,7 @@ import fanficdownloader.BeautifulSoup as bs from fanficdownloader.htmlcleanup import stripHTML import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): @@ -165,16 +164,12 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): m = re.match(r".*?First Published: ([0-9\.]+).*?",metastr) if m: - self.story.setMetadata('datePublished', - datetime.datetime.fromtimestamp(\ - time.mktime(time.strptime(m.group(1), "%Y.%m.%d")))) + self.story.setMetadata('datePublished',makeDate(m.group(1), "%Y.%m.%d")) # Updated can have more than one space after it. m = re.match(r".*?Last Updated: ([0-9\.]+).*?",metastr) if m: - self.story.setMetadata('dateUpdated', - datetime.datetime.fromtimestamp(\ - time.mktime(time.strptime(m.group(1), "%Y.%m.%d")))) + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%Y.%m.%d")) def getChapterText(self, url): diff --git a/fanficdownloader/adapters/adapter_mediaminerorg.py b/fanficdownloader/adapters/adapter_mediaminerorg.py index 68c68d11..db30dc2d 100644 --- a/fanficdownloader/adapters/adapter_mediaminerorg.py +++ b/fanficdownloader/adapters/adapter_mediaminerorg.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib @@ -26,7 +25,7 @@ import fanficdownloader.BeautifulSoup as bs from fanficdownloader.htmlcleanup import stripHTML import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class MediaMinerOrgSiteAdapter(BaseSiteAdapter): @@ -132,8 +131,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): # if firstdate, then the block below will only have last updated. if firstdate: - self.story.setMetadata('datePublished', - datetime.datetime.fromtimestamp(time.mktime(time.strptime(firstdate, "%b %d, %Y")))) + self.story.setMetadata('datePublished', makeDate(firstdate, "%b %d, %Y")) # Everything else is in
    metastr = stripHTML(soup.find("tr",{"bgcolor":"#EEEED4"})).replace('\n',' ').replace('\r',' ').replace('\t',' ') @@ -141,8 +139,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): # Latest Revision: August 03, 2010 m = re.match(r".*?(?:Latest Revision|Uploaded On): ([a-zA-Z]+ \d\d, \d\d\d\d)",metastr) if m: - self.story.setMetadata('dateUpdated', - datetime.datetime.fromtimestamp(time.mktime(time.strptime(m.group(1), "%B %d, %Y")))) + self.story.setMetadata('dateUpdated', makeDate(m.group(1), "%B %d, %Y")) if not firstdate: self.story.setMetadata('datePublished', self.story.getMetadataRaw('dateUpdated')) diff --git a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py index aa7d074b..8becddc1 100644 --- a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py +++ b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib @@ -26,7 +25,7 @@ import fanficdownloader.BeautifulSoup as bs from fanficdownloader.htmlcleanup import stripHTML import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): @@ -156,12 +155,12 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): self.story.setMetadata('status', 'In-Progress') if 'Published' in label: - self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(time.mktime(time.strptime(stripHTML(value), "%b %d %Y")))) + self.story.setMetadata('datePublished', makeDate(stripHTML(value), "%b %d %Y")) if 'Updated' in label: # there's a stray [ at the end. #value = value[0:-1] - self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(time.mktime(time.strptime(stripHTML(value), "%b %d %Y")))) + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), "%b %d %Y")) def getChapterText(self, url): diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 369de457..f3d36037 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -21,7 +21,7 @@ import logging import fanficdownloader.BeautifulSoup as bs import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class TestSiteAdapter(BaseSiteAdapter): @@ -68,12 +68,12 @@ class TestSiteAdapter(BaseSiteAdapter): Some more longer description. "I suck at summaries!" "Better than it sounds!" "My first fic" ''') - self.story.setMetadata('datePublished',datetime.date(1972, 01, 31)) + self.story.setMetadata('datePublished',makeDate("1972-01-31","%Y-%m-%d")) self.story.setMetadata('dateCreated',datetime.datetime.now()) if self.story.getMetadata('storyId') == '669': self.story.setMetadata('dateUpdated',datetime.datetime.now()) else: - self.story.setMetadata('dateUpdated',datetime.date(1975, 01, 31)) + self.story.setMetadata('dateUpdated',makeDate("1975-01-31","%Y-%m-%d")) self.story.setMetadata('numChapters','5') self.story.setMetadata('numWords','123456') self.story.setMetadata('status','In-Completed') diff --git a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py index 4a6cbc43..f11d37e6 100644 --- a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py +++ b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib @@ -26,7 +25,7 @@ import fanficdownloader.BeautifulSoup as bs from fanficdownloader.htmlcleanup import stripHTML import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): @@ -189,12 +188,12 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): self.story.setMetadata('status', 'In-Progress') if 'Published' in label: - self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(time.mktime(time.strptime(stripHTML(value), "%B %d, %Y")))) + self.story.setMetadata('datePublished', makeDate(stripHTML(value), "%B %d, %Y")) if 'Updated' in label: # there's a stray [ at the end. #value = value[0:-1] - self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(time.mktime(time.strptime(stripHTML(value), "%B %d, %Y")))) + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), "%B %d, %Y")) def getChapterText(self, url): diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index eb79ee47..6b716912 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib @@ -26,7 +25,7 @@ import fanficdownloader.BeautifulSoup as bs from fanficdownloader.htmlcleanup import stripHTML import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class TwilightedNetSiteAdapter(BaseSiteAdapter): @@ -182,12 +181,12 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): self.story.setMetadata('status', 'In-Progress') if 'Published' in label: - self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%B %d, %Y")))) + self.story.setMetadata('datePublished', makeDate(value.strip(), "%B %d, %Y")) if 'Updated' in label: # there's a stray [ at the end. #value = value[0:-1] - self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%B %d, %Y")))) + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%B %d, %Y")) def getChapterText(self, url): diff --git a/fanficdownloader/adapters/adapter_twiwritenet.py b/fanficdownloader/adapters/adapter_twiwritenet.py index d3e3f67b..14dc8582 100644 --- a/fanficdownloader/adapters/adapter_twiwritenet.py +++ b/fanficdownloader/adapters/adapter_twiwritenet.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib @@ -26,7 +25,7 @@ import fanficdownloader.BeautifulSoup as bs from fanficdownloader.htmlcleanup import stripHTML import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class TwiwriteNetSiteAdapter(BaseSiteAdapter): @@ -196,12 +195,12 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter): self.story.setMetadata('status', 'In-Progress') if 'Published' in label: - self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%B %d, %Y")))) + self.story.setMetadata('datePublished', makeDate(value.strip(), "%B %d, %Y")) if 'Updated' in label: # there's a stray [ at the end. value = value[0:-1] - self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value.strip(), "%B %d, %Y")))) + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%B %d, %Y")) def getChapterText(self, url): diff --git a/fanficdownloader/adapters/adapter_whoficcom.py b/fanficdownloader/adapters/adapter_whoficcom.py index 89a35b0c..4f9c9c04 100644 --- a/fanficdownloader/adapters/adapter_whoficcom.py +++ b/fanficdownloader/adapters/adapter_whoficcom.py @@ -16,7 +16,6 @@ # import time -import datetime import logging import re import urllib2 @@ -24,7 +23,7 @@ import urllib2 import fanficdownloader.BeautifulSoup as bs import fanficdownloader.exceptions as exceptions -from base_adapter import BaseSiteAdapter, utf8FromSoup +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate class WhoficComSiteAdapter(BaseSiteAdapter): @@ -165,9 +164,9 @@ class WhoficComSiteAdapter(BaseSiteAdapter): name=name.strip() value=value.strip() if name == 'Published': - self.story.setMetadata('datePublished', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value, '%Y.%m.%d')))) + self.story.setMetadata('datePublished', makeDate(value, '%Y.%m.%d')) if name == 'Updated': - self.story.setMetadata('dateUpdated', datetime.datetime.fromtimestamp(time.mktime(time.strptime(value, '%Y.%m.%d')))) + self.story.setMetadata('dateUpdated', makeDate(value, '%Y.%m.%d')) if name == 'Completed': if value == 'Yes': self.story.setMetadata('status', 'Completed') diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index f44e35b7..06118376 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -139,7 +139,9 @@ class BaseSiteAdapter(Configurable): "Needs to be overriden in each adapter class." pass - +def makeDate(string,format): + return datetime.datetime.strptime(string,format) + # this gives us a unicode object, not just a string containing bytes. # (I gave soup a unicode string, you'd think it could give it back...) def utf8FromSoup(soup): diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index fc014507..d81d7d0b 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -17,6 +17,7 @@ import re import os.path +import datetime import string import StringIO import zipfile @@ -36,12 +37,15 @@ class BaseStoryWriter(Configurable): def getFormatExt(): return '.bse' - def __init__(self, config, story): + def __init__(self, config, adapter): Configurable.__init__(self, config) + self.addConfigSection(adapter.getSiteDomain()) self.addConfigSection(self.getFormatName()) - ## Pass adapter instead, to check date before fetching all? - ## Or add 'check update' method to writer? - self.story = story + self.addConfigSection(adapter.getSiteDomain()+":"+self.getFormatName()) + self.addConfigSection("overrides") + + self.adapter = adapter + self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially. self.validEntries = [ 'category', 'genre', @@ -170,10 +174,6 @@ class BaseStoryWriter(Configurable): # if no outstream is given, write to file. def writeStory(self,outstream=None): - self.addConfigSection(self.story.getMetadata('site')) - self.addConfigSection(self.story.getMetadata('site')+":"+self.getFormatName()) - self.addConfigSection("overrides") - for tag in self.getConfigList("extratags"): self.story.addToList("extratags",tag) @@ -196,13 +196,23 @@ class BaseStoryWriter(Configurable): if not os.path.exists(path): os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2? - ## Check for output file date vs updated date here? + ## Check for output file date vs updated date here + if not self.getConfig('always_overwrite'): + if os.path.exists(outfilename): + ## date() truncs off time, which files have, but sites don't report. + lastupdated=self.story.getMetadataRaw('dateUpdated').date() + fileupdated=datetime.datetime.fromtimestamp(os.stat(outfilename)[8]).date() + if fileupdated > lastupdated: + print "File(%s) Updated(%s) more recently than Story(%s) - Skipping" % (outfilename,fileupdated,lastupdated) + return + outstream = open(outfilename,"wb") else: close=False logging.debug("Save to stream") - + self.story = self.adapter.getStory() # get full story now, + # just before writing. if self.getConfig('zip_output'): out = StringIO.StringIO() self.writeStoryImpl(out) diff --git a/index.html b/index.html index d6ce7712..0acbc2a5 100644 --- a/index.html +++ b/index.html @@ -55,9 +55,6 @@

    This version is a new re-org/re-write of the code.

    -

    - fictionalley.org still isn't support yet. -

    Login/Password is asked for when required now, as is 'Are you an Adult?' where required.

    @@ -105,9 +102,10 @@
    fictionalley.org
    Use the URL of the story's chapter list, such as -
    http://www.fictionalley.org/authors/drt/DA.html. Or the story text URL for - fictionalley.org one-shots, such as +
    http://www.fictionalley.org/authors/drt/DA.html. +
    Or a chapter URL (or one-shot text), such as
    http://www.fictionalley.org/authors/drt/JOTP01a.html. +
    Both will work for both chaptered and one-shot stories now.
    fanfiction.net
    @@ -151,8 +149,8 @@
    Use the URL of the story's chapter list, such as
    http://www.mediaminer.org/fanfic/view_st.php/166653. - Or the story URL for one-shots, such as -
    http://www.mediaminer.org/fanfic/view_st.php/167618. +
    Or the story URL for one-shots, such as +
    http://www.mediaminer.org/fanfic/view_st.php/167618 or
    http://www.mediaminer.org/fanfic/view_ch.php/1234123/123444#fic_c
    adastrafanfic.com
    diff --git a/main.py b/main.py index 0aad9b5f..98ddc3de 100644 --- a/main.py +++ b/main.py @@ -402,7 +402,7 @@ class FanfictionDownloaderTask(UserConfigServer): try: # adapter.getStory() is what does all the heavy lifting. - writer = writers.getWriter(format,config,adapter.getStory()) + writer = writers.getWriter(format,config,adapter) except Exception, e: logging.exception(e) download.failure = str(e) @@ -410,6 +410,8 @@ class FanfictionDownloaderTask(UserConfigServer): return download.name = writer.getOutputFileName() + logging.debug('output_filename:'+writer.getConfig('output_filename')) + logging.debug('getOutputFileName:'+writer.getOutputFileName()) download.title = adapter.getStory().getMetadata('title') download.author = adapter.getStory().getMetadata('author') download.put() diff --git a/newdownload.py b/newdownload.py index ee2adb00..7d930f84 100644 --- a/newdownload.py +++ b/newdownload.py @@ -16,7 +16,8 @@ # import logging -logging.basicConfig(level=logging.DEBUG,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") +## XXX cli option for logging level. +logging.basicConfig(level=logging.WARN,format="%(levelname)s:%(filename)s(%(lineno)d):%(message)s") import sys, os from optparse import OptionParser @@ -31,7 +32,7 @@ from fanficdownloader import adapters,writers,exceptions import ConfigParser def writeStory(config,adapter,writeformat): - writer = writers.getWriter(writeformat,config,adapter.getStory()) + writer = writers.getWriter(writeformat,config,adapter) writer.writeStory() del writer From df9e8778657a19890f3beee87ea33974a42da837 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 13 May 2011 21:34:58 -0500 Subject: [PATCH 150/482] Fixes to various from first round testing and some code clean up. --- .../adapters/adapter_adastrafanficcom.py | 6 +- .../adapters/adapter_fictionalleyorg.py | 4 - .../adapters/adapter_fictionpresscom.py | 4 - .../adapters/adapter_ficwadcom.py | 9 +-- .../adapter_harrypotterfanfictioncom.py | 13 ++-- .../adapters/adapter_mediaminerorg.py | 77 +++++++++++++------ .../adapters/adapter_potionsandsnitchesnet.py | 7 +- .../adapter_thewriterscoffeeshopcom.py | 4 - .../adapters/adapter_twilightednet.py | 2 +- fanficdownloader/writers/writer_epub.py | 2 +- login.html | 2 +- newdownload.py | 2 +- 12 files changed, 73 insertions(+), 59 deletions(-) diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py index 9cdbe637..5bdc1c9b 100644 --- a/fanficdownloader/adapters/adapter_adastrafanficcom.py +++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py @@ -32,7 +32,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter): def __init__(self, config, url): BaseSiteAdapter.__init__(self, config, url) self.story.setMetadata('siteabbrev','aaff') - self.decode = "utf8" + self.decode = "ISO-8859-1" self.story.addToList("category","Star Trek") self.is_adult=False @@ -48,10 +48,6 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter): def getSiteDomain(): return 'www.adastrafanfic.com' - @classmethod - def getAcceptDomains(cls): - return [cls.getSiteDomain()] - def getSiteExampleURLs(self): return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" diff --git a/fanficdownloader/adapters/adapter_fictionalleyorg.py b/fanficdownloader/adapters/adapter_fictionalleyorg.py index f7c06ba1..594de50e 100644 --- a/fanficdownloader/adapters/adapter_fictionalleyorg.py +++ b/fanficdownloader/adapters/adapter_fictionalleyorg.py @@ -53,10 +53,6 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): def getSiteDomain(): return 'www.fictionalley.org' - @classmethod - def getAcceptDomains(cls): - return [cls.getSiteDomain()] - def getSiteExampleURLs(self): return "http://"+self.getSiteDomain()+"/authors/drt/DA.html http://"+self.getSiteDomain()+"/authors/drt/JOTP01a.html" diff --git a/fanficdownloader/adapters/adapter_fictionpresscom.py b/fanficdownloader/adapters/adapter_fictionpresscom.py index 578c4863..bc9c2905 100644 --- a/fanficdownloader/adapters/adapter_fictionpresscom.py +++ b/fanficdownloader/adapters/adapter_fictionpresscom.py @@ -43,10 +43,6 @@ class FictionPressComSiteAdapter(BaseSiteAdapter): def getSiteDomain(): return 'www.fictionpress.com' - @classmethod - def getAcceptDomains(cls): - return ['www.fictionpress.com'] - def getSiteExampleURLs(self): return "http://www.fictionpress.com/s/1234/1/ http://www.fictionpress.com/s/1234/12/ http://www.fictionpress.com/s/1234/1/Story_Title" diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py index 791c07c2..27821994 100644 --- a/fanficdownloader/adapters/adapter_ficwadcom.py +++ b/fanficdownloader/adapters/adapter_ficwadcom.py @@ -44,10 +44,6 @@ class FicwadComSiteAdapter(BaseSiteAdapter): def getSiteDomain(): return 'www.ficwad.com' - @classmethod - def getAcceptDomains(cls): - return ['www.ficwad.com'] - def getSiteExampleURLs(self): return "http://www.ficwad.com/story/137169" @@ -137,8 +133,9 @@ class FicwadComSiteAdapter(BaseSiteAdapter): # warnings # [!!] [R] [V] [Y] spanreq = metap.find("span",{"class":"req"}) - for a in spanreq.findAll("a"): - self.story.addToList('warnings',a['title']) + if spanreq: # can be no warnings. + for a in spanreq.findAll("a"): + self.story.addToList('warnings',a['title']) ## perhaps not the most efficient way to parse this, using ## regexps for each rather than something more complex, but diff --git a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py index 6f1538d7..a24c63f4 100644 --- a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py +++ b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py @@ -32,7 +32,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): def __init__(self, config, url): BaseSiteAdapter.__init__(self, config, url) self.story.setMetadata('siteabbrev','hp') - self.decode = "ISO-8859-1" + self.decode = "Windows-1252" # Another site that lies to us. self.story.addToList("category","Harry Potter") self.is_adult=False @@ -115,7 +115,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): for chapter in tablelist.findAll('a', href=re.compile(r'\?chapterid=\d+')): #javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?chapterid=433441&i=1' # just in case there's tags, like in chapter titles. - chpt=re.sub(r'^.*?(\?chapterid=433441).*?',r'\1',chapter['href']) + chpt=re.sub(r'^.*?(\?chapterid=\d+).*?',r'\1',chapter['href']) self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/viewstory.php'+chpt)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -178,12 +178,13 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. - span = soup.find('div', {'id' : 'fluidtext'}) + div = soup.find('div', {'id' : 'fluidtext'}) - if None == span: + if None == div: raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) - - return utf8FromSoup(span) + + print div + return utf8FromSoup(div) def getClass(): return HarryPotterFanFictionComSiteAdapter diff --git a/fanficdownloader/adapters/adapter_mediaminerorg.py b/fanficdownloader/adapters/adapter_mediaminerorg.py index db30dc2d..273bdf78 100644 --- a/fanficdownloader/adapters/adapter_mediaminerorg.py +++ b/fanficdownloader/adapters/adapter_mediaminerorg.py @@ -32,7 +32,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): def __init__(self, config, url): BaseSiteAdapter.__init__(self, config, url) self.story.setMetadata('siteabbrev','mm') - self.decode = "utf8" + self.decode = "ISO-8859-1" # get storyId from url--url validation guarantees query correct m = re.match(self.getSiteURLPattern(),url) @@ -50,10 +50,6 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): def getSiteDomain(): return 'www.mediaminer.org' - @classmethod - def getAcceptDomains(cls): - return [cls.getSiteDomain()] - def getSiteExampleURLs(self): return "http://"+self.getSiteDomain()+"/fanfic/view_st.php/123456 http://"+self.getSiteDomain()+"/fanfic/view_ch.php/1234123/123444#fic_c" @@ -61,7 +57,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): ## http://www.mediaminer.org/fanfic/view_st.php/76882 ## http://www.mediaminer.org/fanfic/view_ch.php/167618/594087#fic_c return re.escape("http://"+self.getSiteDomain())+\ - "/fanfic/view_(st|ch)\.php/"+r"(?P\d+)(/\d+#fic_c)?$" + "/fanfic/view_(st|ch)\.php/"+r"(?P\d+)(/\d+(#fic_c)?)?$" def extractChapterUrlsAndMetadata(self): @@ -79,18 +75,26 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): # use BeautifulSoup HTML parser to make everything easier to find. soup = bs.BeautifulSoup(data) - ## Title - title = soup.find('title').string - ## MediaMiner - Fan Fic: Par Tout Autre Nom - ## MediaMiner: Question and Answer ( One-Shot ) - ## MediaMiner: Moaning to Wake the Dead ( Chapter 1 ) - title = re.match(r'^MediaMiner(?: - Fan Fic)?:(.*?)(?: \( .*? \))?$',title).group(1) - - # [ A - All Readers ], strip '[ ' ' ]' + # [ A - All Readers ], strip '[' ']' + ## Above title because we remove the smtxt font to get title. rating = soup.find("font",{"class":"smtxt"}).string[1:-1] - self.story.setMetadata('title',title) self.story.setMetadata('rating',rating) + ## Title - Good grief. Title varies by chaptered, 1chapter and 'type=one shot'. + ##
    + ## + ## + ## + ## + title = soup.find('td',{'class':'ffh'}) + for font in title.findAll('font'): + font.extract() # removes 'font' tags from inside the td. + if title.has_key('colspan') or 'src.php/t/ONE_SHOT' in data: + titlet = title.text + else: + titlet = ':'.join(title.text.split(':')[:-1]) # strip trailing 'Chapter X', but only when no colspan and not one-shot + self.story.setMetadata('title',titlet) + # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"/fanfic/src.php/u/\d+")) self.story.setMetadata('authorId',a['href'].split('/')[-1]) @@ -103,7 +107,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): # Find the chapters select = soup.find('select',{'name':'cid'}) if not select: - self.chapterUrls.append((title,self.url)) + self.chapterUrls.append(( self.story.getMetadata('title'),self.url)) else: for option in select.findAll("option"): chapter = stripHTML(option.string) @@ -135,7 +139,6 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): # Everything else is in metastr = stripHTML(soup.find("tr",{"bgcolor":"#EEEED4"})).replace('\n',' ').replace('\r',' ').replace('\t',' ') - print metastr # Latest Revision: August 03, 2010 m = re.match(r".*?(?:Latest Revision|Uploaded On): ([a-zA-Z]+ \d\d, \d\d\d\d)",metastr) if m: @@ -171,21 +174,45 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): logging.debug('Getting chapter text from: %s' % url) - soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + data=self._fetchUrl(url) + soup = bs.BeautifulStoneSoup(data, selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. anchor = soup.find('a',{'name':'fic_c'}) if None == anchor: raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) - - for div in anchor.findAllNext('div',{'align':'left'}): - div.name='p' # convert to

    mediaminer uses div with a - # margin for paragraphs. - anchor.append(div) # cheat! stuff all the content divs - # into anchor just as a holder. + + ## find divs with align=left, those are paragraphs in newer stories. + divlist = anchor.findAllNext('div',{'align':'left'}) + if divlist: + for div in divlist: + div.name='p' # convert to

    mediaminer uses div with + # a margin for paragraphs. + anchor.append(div) # cheat! stuff all the content + # divs into anchor just as a + # holder. + del div['style'] + del div['align'] + anchor.name='div' + return utf8FromSoup(anchor) + + else: + logging.debug('Using kludgey text find for older mediaminer story.') + ## Some older mediaminer stories are unparsable with BeautifulSoup. + ## Really nasty formatting. Sooo... Cheat! Parse it ourselves a bit first. + ## Story stuff falls between: + data = "

    " + data[data.find(''):] +"
    " + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + for tag in soup.findAll('td',{'class':'ffh'}) + \ + soup.findAll('div',{'class':'acl'}) + \ + soup.findAll('div',{'class':'footer smtxt'}) + \ + soup.findAll('table',{'class':'tbbrdr'}): + tag.extract() # remove tag from soup. + + return utf8FromSoup(soup) - return utf8FromSoup(anchor) def getClass(): return MediaMinerOrgSiteAdapter diff --git a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py index 8becddc1..5d898b34 100644 --- a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py +++ b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py @@ -32,7 +32,7 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): def __init__(self, config, url): BaseSiteAdapter.__init__(self, config, url) self.story.setMetadata('siteabbrev','pns') - self.decode = "utf8" + self.decode = "ISO-8859-1" self.story.addToList("category","Harry Potter") # get storyId from url--url validation guarantees query is only sid=1234 @@ -171,6 +171,11 @@ class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. span = soup.find('div', {'id' : 'story'}) + for p in span.findAll('p'): + if p.has_key('style'): + del p['style'] + if p.has_key('class'): + del p['class'] if None == span: raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) diff --git a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py index f11d37e6..0c1d857a 100644 --- a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py +++ b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py @@ -49,10 +49,6 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): def getSiteDomain(): return 'www.thewriterscoffeeshop.com' - @classmethod - def getAcceptDomains(cls): - return [cls.getSiteDomain()] - def getSiteExampleURLs(self): return "http://"+self.getSiteDomain()+"/library/viewstory.php?sid=1234" diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py index 6b716912..96d838de 100644 --- a/fanficdownloader/adapters/adapter_twilightednet.py +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -32,7 +32,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter): def __init__(self, config, url): BaseSiteAdapter.__init__(self, config, url) self.story.setMetadata('siteabbrev','tw') - self.decode = "utf8" + self.decode = "ISO-8859-1" ## tw *lies*. It claims to be UTF8 in the headers, but it isn't. "utf8" self.story.addToList("category","Twilight") self.username = "NoneGiven" # if left empty, site doesn't return any message at all. self.password = "" diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index 7a23ce97..28bab789 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -144,7 +144,7 @@ h6 { text-align: center; } -

    ${chapter}

    +

    ${chapter}

    ''') self.EPUB_CHAPTER_END = string.Template(''' diff --git a/login.html b/login.html index bd316b9d..6bbf5f28 100644 --- a/login.html +++ b/login.html @@ -2,7 +2,7 @@ - Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + Login Needed Fanfiction Downloader + + + +
    @@ -27,12 +31,12 @@
    + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + + + +
    +

    Edit Config

    +
    + Editing configuration for {{ nickname }}. +
    +
    + +
    +
    + +
    + +
    + + +
    +

    Default System configuration

    +
    +{{ defaultsini }}
    +
    +
    + +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Fanficdownloader team +
    + +
    + + +
    +
    + + diff --git a/epubmerge.py b/epubmerge.py new file mode 100644 index 00000000..a9c51933 --- /dev/null +++ b/epubmerge.py @@ -0,0 +1,378 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import re +#import StringIO +from optparse import OptionParser + +import zlib +import zipfile +from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED +from time import time + +from exceptions import KeyError + +from xml.dom.minidom import parse, parseString, getDOMImplementation + +def main(argv): + # read in args, anything starting with -- will be treated as --= + usage = "usage: %prog [options] [...]" + parser = OptionParser(usage) + parser.add_option("-o", "--output", dest="outputopt", default="merge.epub", + help="Set OUTPUT file, Default: merge.epub", metavar="OUTPUT") + parser.add_option("-t", "--title", dest="titleopt", default=None, + help="Use TITLE as the metadata title. Default: ' Anthology'", metavar="TITLE") + parser.add_option("-d", "--description", dest="descopt", default=None, + help="Use DESC as the metadata description. Default: ' by ' for each epub.", metavar="DESC") + parser.add_option("-a", "--author", + action="append", dest="authoropts", default=[], + help="Use AUTHOR as a metadata author, multiple authors may be given, Default: ", metavar="AUTHOR") + parser.add_option("-f", "--first", + action="store_true", dest="fromfirst", default=False, + help="Take all metadata from first input epub",) + parser.add_option("-n", "--titles-in-toc", + action="store_true", dest="titlenavpoints", + help="Put an entry in the TOC for each epub, in addition to each epub's chapters.",) + parser.add_option("-s", "--strip-title-toc", + action="store_true", dest="striptitletoc", + help="Strip any title_page.xhtml and toc_page.xhtml files.",) + + (options, args) = parser.parse_args() + + ## Add .epub if not already there. + if not options.outputopt.lower().endswith(".epub"): + options.outputopt=options.outputopt+".epub" + + print "output file: "+options.outputopt + doMerge(options.outputopt, + args, + options.authoropts, + options.titleopt, + options.descopt, + options.fromfirst, + options.titlenavpoints, + options.striptitletoc) + + # output = StringIO.StringIO() + # files = [] + # for file in args: + # f = open(file,"rb") + # fio = StringIO.StringIO(f.read()) + # f.close() + # files.append(fio) + + # doMerge(output,files,authoropts,titleopt,descopt,fromfirst,titlenavpoints,striptitletoc) + + # out = open(outputopt,"wb") + # out.write(output.getvalue()) + +def doMerge(outputio,files,authoropts=[],titleopt=None,descopt=None, + fromfirst=False, + titlenavpoints=True, + striptitletoc=False, + forceunique=True): + ''' + outputio = output file name or StringIO. + files = list of input file names or StringIOs. + authoropts = list of authors to use, otherwise add from all input + titleopt = title, otherwise ' Anthology' + descopt = description, otherwise ' by <author>' list for all input + fromfirst if true, take all metadata (including author, title, desc) from first input + titlenavpoints if true, put in a new TOC entry for each epub + striptitletoc if true, strip out any (title|toc)_page.xhtml files + forceunique if true, guarantee uniqueness of contents by adding a dir for each input + ''' + ## Python 2.5 ZipFile is rather more primative than later + ## versions. It can operate on a file, or on a StringIO, but + ## not on an open stream. OTOH, I suspect we would have had + ## problems with closing and opening again to change the + ## compression type anyway. + + filecount=0 + source=None + + ## Write mimetype file, must be first and uncompressed. + ## Older versions of python(2.4/5) don't allow you to specify + ## compression by individual file. + ## Overwrite if existing output file. + outputepub = ZipFile(outputio, "w", compression=ZIP_STORED) + outputepub.debug = 3 + outputepub.writestr("mimetype", "application/epub+zip") + outputepub.close() + + ## Re-open file for content. + outputepub = ZipFile(outputio, "a", compression=ZIP_DEFLATED) + outputepub.debug = 3 + + ## Create META-INF/container.xml file. The only thing it does is + ## point to content.opf + containerdom = getDOMImplementation().createDocument(None, "container", None) + containertop = containerdom.documentElement + containertop.setAttribute("version","1.0") + containertop.setAttribute("xmlns","urn:oasis:names:tc:opendocument:xmlns:container") + rootfiles = containerdom.createElement("rootfiles") + containertop.appendChild(rootfiles) + rootfiles.appendChild(newTag(containerdom,"rootfile",{"full-path":"content.opf", + "media-type":"application/oebps-package+xml"})) + outputepub.writestr("META-INF/container.xml",containerdom.toprettyxml(indent=' ',encoding='utf-8')) + + ## Process input epubs. + + items = [] # list of (id, href, type) tuples(all strings) -- From .opfs' manifests + items.append(("ncx","toc.ncx","application/x-dtbncx+xml")) ## we'll generate the toc.ncx file, + ## but it needs to be in the items manifest. + itemrefs = [] # list of strings -- idrefs from .opfs' spines + navmaps = [] # list of navMap DOM elements -- TOC data for each from toc.ncx files + + booktitles = [] # list of strings -- Each book's title + allauthors = [] # list of lists of strings -- Each book's list of authors. + + filelist = [] + + booknum=1 + firstmetadom = None + for file in files: + if file == None : continue + + book = "%d" % booknum + bookdir = "" + bookid = "" + if forceunique: + bookdir = "%d/" % booknum + bookid = "a%d" % booknum + #print "book %d" % booknum + + epub = ZipFile(file, 'r') + + ## Find the .opf file. + container = epub.read("META-INF/container.xml") + containerdom = parseString(container) + rootfilenodelist = containerdom.getElementsByTagName("rootfile") + rootfilename = rootfilenodelist[0].getAttribute("full-path") + + ## Save the path to the .opf file--hrefs inside it are relative to it. + relpath = os.path.dirname(rootfilename) + if( len(relpath) > 0 ): + relpath=relpath+"/" + + metadom = parseString(epub.read(rootfilename)) + if booknum==1: + firstmetadom = metadom.getElementsByTagName("metadata")[0] + try: + source=firstmetadom.getElementsByTagName("dc:source")[0].firstChild.data.encode("utf-8") + except: + source="" + #print "Source:%s"%source + + ## Save indiv book title + booktitles.append(metadom.getElementsByTagName("dc:title")[0].firstChild.data) + + ## Save authors. + authors=[] + for creator in metadom.getElementsByTagName("dc:creator"): + if( creator.getAttribute("opf:role") == "aut" ): + authors.append(creator.firstChild.data) + allauthors.append(authors) + + for item in metadom.getElementsByTagName("item"): + if( item.getAttribute("media-type") == "application/x-dtbncx+xml" ): + # TOC file is only one with this type--as far as I know. + # grab the whole navmap, deal with it later. + tocdom = parseString(epub.read(relpath+item.getAttribute("href"))) + + for navpoint in tocdom.getElementsByTagName("navPoint"): + navpoint.setAttribute("id",bookid+navpoint.getAttribute("id")) + + for content in tocdom.getElementsByTagName("content"): + content.setAttribute("src",bookdir+relpath+content.getAttribute("src")) + + navmaps.append(tocdom.getElementsByTagName("navMap")[0]) + else: + id=bookid+item.getAttribute("id") + href=bookdir+relpath+item.getAttribute("href") + href=href.encode('utf8') + #print "href:"+href + if not striptitletoc or not re.match(r'.*/(title|toc)_page\.xhtml', + item.getAttribute("href")): + if href not in filelist: + try: + outputepub.writestr(href, + epub.read(relpath+item.getAttribute("href"))) + if re.match(r'.*/(file|chapter)\d+\.xhtml',href): + filecount+=1 + items.append((id,href,item.getAttribute("media-type"))) + filelist.append(href) + except KeyError, ke: + pass # Skip missing files. + + for itemref in metadom.getElementsByTagName("itemref"): + if not striptitletoc or not re.match(r'(title|toc)_page', itemref.getAttribute("idref")): + itemrefs.append(bookid+itemref.getAttribute("idref")) + + booknum=booknum+1; + + ## create content.opf file. + uniqueid="epubmerge-uid-%d" % time() # real sophisticated uid scheme. + contentdom = getDOMImplementation().createDocument(None, "package", None) + package = contentdom.documentElement + if fromfirst and firstmetadom: + metadata = firstmetadom + firstpackage = firstmetadom.parentNode + package.setAttribute("version",firstpackage.getAttribute("version")) + package.setAttribute("xmlns",firstpackage.getAttribute("xmlns")) + package.setAttribute("unique-identifier",firstpackage.getAttribute("unique-identifier")) + else: + package.setAttribute("version","2.0") + package.setAttribute("xmlns","http://www.idpf.org/2007/opf") + package.setAttribute("unique-identifier","epubmerge-id") + metadata=newTag(contentdom,"metadata", + attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/", + "xmlns:opf":"http://www.idpf.org/2007/opf"}) + metadata.appendChild(newTag(contentdom,"dc:identifier",text=uniqueid,attrs={"id":"epubmerge-id"})) + if( titleopt is None ): + titleopt = booktitles[0]+" Anthology" + metadata.appendChild(newTag(contentdom,"dc:title",text=titleopt)) + + # If cmdline authors, use those instead of those collected from the epubs + # (allauthors kept for TOC & description gen below. + if( len(authoropts) > 1 ): + useauthors=[authoropts] + else: + useauthors=allauthors + + usedauthors=dict() + for authorlist in useauthors: + for author in authorlist: + if( not usedauthors.has_key(author) ): + usedauthors[author]=author + metadata.appendChild(newTag(contentdom,"dc:creator", + attrs={"opf:role":"aut"}, + text=author)) + + metadata.appendChild(newTag(contentdom,"dc:contributor",text="epubmerge",attrs={"opf:role":"bkp"})) + metadata.appendChild(newTag(contentdom,"dc:rights",text="Copyrights as per source stories")) + metadata.appendChild(newTag(contentdom,"dc:language",text="en")) + + if not descopt: + # created now, but not filled in until TOC generation to save loops. + description = newTag(contentdom,"dc:description",text="Anthology containing:\n") + else: + description = newTag(contentdom,"dc:description",text=descopt) + metadata.appendChild(description) + + package.appendChild(metadata) + + manifest = contentdom.createElement("manifest") + package.appendChild(manifest) + for item in items: + (id,href,type)=item + manifest.appendChild(newTag(contentdom,"item", + attrs={'id':id, + 'href':href, + 'media-type':type})) + + spine = newTag(contentdom,"spine",attrs={"toc":"ncx"}) + package.appendChild(spine) + for itemref in itemrefs: + spine.appendChild(newTag(contentdom,"itemref", + attrs={"idref":itemref, + "linear":"yes"})) + + ## create toc.ncx file + tocncxdom = getDOMImplementation().createDocument(None, "ncx", None) + ncx = tocncxdom.documentElement + ncx.setAttribute("version","2005-1") + ncx.setAttribute("xmlns","http://www.daisy.org/z3986/2005/ncx/") + head = tocncxdom.createElement("head") + ncx.appendChild(head) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:uid", "content":uniqueid})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:depth", "content":"1"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:totalPageCount", "content":"0"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:maxPageNumber", "content":"0"})) + + docTitle = tocncxdom.createElement("docTitle") + docTitle.appendChild(newTag(tocncxdom,"text",text=titleopt)) + ncx.appendChild(docTitle) + + tocnavMap = tocncxdom.createElement("navMap") + ncx.appendChild(tocnavMap) + + ## TOC navPoints can be nested, but this flattens them for + ## simplicity, plus adds a navPoint for each epub. + booknum=0 + for navmap in navmaps: + navpoints = navmap.getElementsByTagName("navPoint") + if titlenavpoints: + ## Copy first navPoint of each epub, give a different id and + ## text: bookname by authorname + newnav = navpoints[0].cloneNode(True) + newnav.setAttribute("id","book"+newnav.getAttribute("id")) + ## For purposes of TOC titling & desc, use first book author + newtext = newTag(tocncxdom,"text",text=booktitles[booknum]+" by "+allauthors[booknum][0]) + text = newnav.getElementsByTagName("text")[0] + text.parentNode.replaceChild(newtext,text) + tocnavMap.appendChild(newnav) + + if not descopt and not fromfirst: + description.appendChild(contentdom.createTextNode(booktitles[booknum]+" by "+allauthors[booknum][0]+"\n")) + + for navpoint in navpoints: + #print "navpoint:%s"%navpoint.getAttribute("id") + if not striptitletoc or not re.match(r'(title|toc)_page',navpoint.getAttribute("id")): + tocnavMap.appendChild(navpoint) + booknum=booknum+1; + + ## Force strict ordering of playOrder + playorder=1 + for navpoint in tocncxdom.getElementsByTagName("navPoint"): + navpoint.setAttribute("playOrder","%d" % playorder) + if( not navpoint.getAttribute("id").startswith("book") ): + playorder = playorder + 1 + + ## content.opf written now due to description being filled in + ## during TOC generation to save loops. + outputepub.writestr("content.opf",contentdom.toxml('utf-8')) + outputepub.writestr("toc.ncx",tocncxdom.toxml('utf-8')) + + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in outputepub.filelist: + zf.create_system = 0 + outputepub.close() + + return (source,filecount) + +## Utility method for creating new tags. +def newTag(dom,name,attrs=None,text=None): + tag = dom.createElement(name) + if( attrs is not None ): + for attr in attrs.keys(): + tag.setAttribute(attr,attrs[attr]) + if( text is not None ): + tag.appendChild(dom.createTextNode(text)) + return tag + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/example.ini b/example.ini new file mode 100644 index 00000000..cf63ac6b --- /dev/null +++ b/example.ini @@ -0,0 +1,34 @@ +## This is an example of what your personal configuration might look +## like. + +## Most common, I expect will be using this to save username/passwords +## for different sites. +[www.twilighted.net] +#username:YourPenname +#password:YourPassword + +[www.ficwad.com] +#username:YourUsername +#password:YourPassword + +[www.adastrafanfic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. +#is_adult:true + +## The [defaults] section here will override the system [defaults], +## but not format, site for site:format sections. +[defaults] +## Directories only useful in commandline or zip files. +#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext} +#output_filename: books/${site}/${authorId}/${title}-${storyId}${formatext} + +## For example, zip_output here will turn on zip for html and txt, but +## not epub because the system [epub] section explicitly says +## zip_output: false (epubs *are* specially formated zip files.) +#zip_output: true +#zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip + +## This section will override anything in the system defaults or other +## sections here. +[overrides] diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py new file mode 100644 index 00000000..4b17b853 --- /dev/null +++ b/fanficdownloader/BeautifulSoup.py @@ -0,0 +1,2014 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.2.0" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "<?%s?>" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!--%s-->" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!%s>" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '</%s>' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "<foo><bar></foo>" actually means + "<foo><bar></bar></foo>". + + [Another possible explanation is "<foo><bar /></foo>", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile('<!\s+([^<>]*)>'), + lambda x: '<!' + x.group(1) + '>') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + + <br/> (No space between name of closing tag and tag close) + <! --Comment--> (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: + <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'. + <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'. + <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'. + + <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. + <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' + <td><tr><td> *<td>* should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print "</%s> is not real!" % name + self.handle_data('</%s>' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '<![CDATA[': + k = self.rawdata.find(']]>', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a <p> tag should implicitly close the previous <p> tag. + + <p>Para1<p>Para2 + should be transformed into: + <p>Para1</p><p>Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a <blockquote> tag should _not_ implicitly close the previous + <blockquote> tag. + + Alice said: <blockquote>Bob said: <blockquote>Blah + should NOT be transformed into: + Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a <tr> tag should + implicitly close the previous <tr> tag within the same <table>, + but not close a <tr> tag in another table. + + <table><tr>Blah<tr>Blah + should be transformed into: + <table><tr>Blah</tr><tr>Blah + but, + <tr>Blah<table><tr>Blah + should NOT be transformed into + <tr>Blah<table></tr><tr>Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + <b>Foo<b>Bar</b></b> + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "<b>Foo<b>Bar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '</b></b>' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + <script> tags contain Javascript and should not be parsed, that + META tags may contain encoding information, and so on. + + This also makes it better for subclassing than BeautifulStoneSoup + or BeautifulSoup.""" + + RESET_NESTING_TAGS = buildTagMap('noscript') + NESTABLE_TAGS = {} + +class BeautifulSOAP(BeautifulStoneSoup): + """This class will push a tag with only a single string child into + the tag's parent as an attribute. The attribute's name is the tag + name, and the value is the string child. An example should give + the flavor of the change: + + <foo><bar>baz</bar></foo> + => + <foo bar="baz"><bar>baz</bar></foo> + + You can then access fooTag['bar'] instead of fooTag.barTag.string. + + This is, of course, useful for scraping structures that tend to + use subelements instead of attributes, such as SOAP messages. Note + that it modifies its input, so don't print the modified version + out. + + I'm not sure how many people really want to use this class; let me + know if you do. Mainly I like the name.""" + + def popTag(self): + if len(self.tagStack) > 1: + tag = self.tagStack[-1] + parent = self.tagStack[-2] + parent._getAttrMap() + if (isinstance(tag, Tag) and len(tag.contents) == 1 and + isinstance(tag.contents[0], NavigableString) and + not parent.attrMap.has_key(tag.name)): + parent[tag.name] = tag.contents[0] + BeautifulStoneSoup.popTag(self) + +#Enterprise class names! It has come to our attention that some people +#think the names of the Beautiful Soup parser classes are too silly +#and "unprofessional" for use in enterprise screen-scraping. We feel +#your pain! For such-minded folk, the Beautiful Soup Consortium And +#All-Night Kosher Bakery recommends renaming this file to +#"RobustParser.py" (or, in cases of extreme enterprisiness, +#"RobustParserBeanInterface.class") and using the following +#enterprise-friendly class aliases: +class RobustXMLParser(BeautifulStoneSoup): + pass +class RobustHTMLParser(BeautifulSoup): + pass +class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): + pass +class RobustInsanelyWackAssHTMLParser(MinimalSoup): + pass +class SimplifyingSOAPParser(BeautifulSOAP): + pass + +###################################################### +# +# Bonus library: Unicode, Dammit +# +# This class forces XML data into a standard format (usually to UTF-8 +# or Unicode). It is heavily based on code from Mark Pilgrim's +# Universal Feed Parser. It does not rewrite the XML or HTML to +# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi +# (XML) and BeautifulSoup.start_meta (HTML). + +# Autodetects character encodings. +# Download from http://chardet.feedparser.org/ +try: + import chardet +# import chardet.constants +# chardet.constants._debug = 1 +except ImportError: + chardet = None + +# cjkcodecs and iconv_codec make Python know about more character encodings. +# Both are available from http://cjkpython.i18n.org/ +# They're built in if you use Python 2.4. +try: + import cjkcodecs.aliases +except ImportError: + pass +try: + import iconv_codec +except ImportError: + pass + +class UnicodeDammit: + """A class for detecting the encoding of a *ML document and + converting it to a Unicode string. If the source encoding is + windows-1252, can replace MS smart quotes with their HTML or XML + equivalents.""" + + # This dictionary maps commonly seen values for "charset" in HTML + # meta tags to the corresponding Python codec names. It only covers + # values that aren't in Python's aliases and can't be determined + # by the heuristics in find_codec. + CHARSET_ALIASES = { "macintosh" : "mac-roman", + "x-sjis" : "shift-jis" } + + def __init__(self, markup, overrideEncodings=[], + smartQuotesTo='xml', isHTML=False): + self.declaredHTMLEncoding = None + self.markup, documentEncoding, sniffedEncoding = \ + self._detectEncoding(markup, isHTML) + self.smartQuotesTo = smartQuotesTo + self.triedEncodings = [] + if markup == '' or isinstance(markup, unicode): + self.originalEncoding = None + self.unicode = unicode(markup) + return + + u = None + for proposedEncoding in overrideEncodings: + u = self._convertFrom(proposedEncoding) + if u: break + if not u: + for proposedEncoding in (documentEncoding, sniffedEncoding): + u = self._convertFrom(proposedEncoding) + if u: break + + # If no luck and we have auto-detection library, try that: + if not u and chardet and not isinstance(self.markup, unicode): + u = self._convertFrom(chardet.detect(self.markup)['encoding']) + + # As a last resort, try utf-8 and windows-1252: + if not u: + for proposed_encoding in ("utf-8", "windows-1252"): + u = self._convertFrom(proposed_encoding) + if u: break + + self.unicode = u + if not u: self.originalEncoding = None + + def _subMSChar(self, orig): + """Changes a MS smart quote character to an XML or HTML + entity.""" + sub = self.MS_CHARS.get(orig) + if isinstance(sub, tuple): + if self.smartQuotesTo == 'xml': + sub = '&#x%s;' % sub[1] + else: + sub = '&%s;' % sub[0] + return sub + + def _convertFrom(self, proposed): + proposed = self.find_codec(proposed) + if not proposed or proposed in self.triedEncodings: + return None + self.triedEncodings.append(proposed) + markup = self.markup + + # Convert smart quotes to HTML if coming from an encoding + # that might have them. + if self.smartQuotesTo and proposed.lower() in("windows-1252", + "iso-8859-1", + "iso-8859-2"): + markup = re.compile("([\x80-\x9f])").sub \ + (lambda(x): self._subMSChar(x.group(1)), + markup) + + try: + # print "Trying to convert document to %s" % proposed + u = self._toUnicode(markup, proposed) + self.markup = u + self.originalEncoding = proposed + except Exception, e: + # print "That didn't work!" + # print e + return None + #print "Correct encoding: %s" % proposed + return self.markup + + def _toUnicode(self, data, encoding): + '''Given a string and its encoding, decodes the string into Unicode. + %encoding is a string recognized by encodings.aliases''' + + # strip Byte Order Mark (if present) + if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == '\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == '\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == '\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + newdata = unicode(data, encoding) + return newdata + + def _detectEncoding(self, xml_data, isHTML=False): + """Given a document, tries to detect its XML encoding.""" + xml_encoding = sniffed_xml_encoding = None + try: + if xml_data[:4] == '\x4c\x6f\xa7\x94': + # EBCDIC + xml_data = self._ebcdic_to_ascii(xml_data) + elif xml_data[:4] == '\x00\x3c\x00\x3f': + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ + and (xml_data[2:4] != '\x00\x00'): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x3f\x00': + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ + (xml_data[2:4] != '\x00\x00'): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\x00\x3c': + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x00\x00': + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\xfe\xff': + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\xff\xfe\x00\x00': + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == '\xef\xbb\xbf': + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' + xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + else: + sniffed_xml_encoding = 'ascii' + pass + except: + xml_encoding_match = None + xml_encoding_match = re.compile( + '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) + if not xml_encoding_match and isHTML: + regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I) + xml_encoding_match = regexp.search(xml_data) + if xml_encoding_match is not None: + xml_encoding = xml_encoding_match.groups()[0].lower() + if isHTML: + self.declaredHTMLEncoding = xml_encoding + if sniffed_xml_encoding and \ + (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', + 'iso-10646-ucs-4', 'ucs-4', 'csucs4', + 'utf-16', 'utf-32', 'utf_16', 'utf_32', + 'utf16', 'u16')): + xml_encoding = sniffed_xml_encoding + return xml_data, xml_encoding, sniffed_xml_encoding + + + def find_codec(self, charset): + return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ + or (charset and self._codec(charset.replace("-", ""))) \ + or (charset and self._codec(charset.replace("-", "_"))) \ + or charset + + def _codec(self, charset): + if not charset: return charset + codec = None + try: + codecs.lookup(charset) + codec = charset + except (LookupError, ValueError): + pass + return codec + + EBCDIC_TO_ASCII_MAP = None + def _ebcdic_to_ascii(self, s): + c = self.__class__ + if not c.EBCDIC_TO_ASCII_MAP: + emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, + 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, + 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, + 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, + 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, + 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, + 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, + 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, + 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, + 201,202,106,107,108,109,110,111,112,113,114,203,204,205, + 206,207,208,209,126,115,116,117,118,119,120,121,122,210, + 211,212,213,214,215,216,217,218,219,220,221,222,223,224, + 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, + 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, + 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, + 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, + 250,251,252,253,254,255) + import string + c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ + ''.join(map(chr, range(256))), ''.join(map(chr, emap))) + return s.translate(c.EBCDIC_TO_ASCII_MAP) + + MS_CHARS = { '\x80' : ('euro', '20AC'), + '\x81' : ' ', + '\x82' : ('sbquo', '201A'), + '\x83' : ('fnof', '192'), + '\x84' : ('bdquo', '201E'), + '\x85' : ('hellip', '2026'), + '\x86' : ('dagger', '2020'), + '\x87' : ('Dagger', '2021'), + '\x88' : ('circ', '2C6'), + '\x89' : ('permil', '2030'), + '\x8A' : ('Scaron', '160'), + '\x8B' : ('lsaquo', '2039'), + '\x8C' : ('OElig', '152'), + '\x8D' : '?', + '\x8E' : ('#x17D', '17D'), + '\x8F' : '?', + '\x90' : '?', + '\x91' : ('lsquo', '2018'), + '\x92' : ('rsquo', '2019'), + '\x93' : ('ldquo', '201C'), + '\x94' : ('rdquo', '201D'), + '\x95' : ('bull', '2022'), + '\x96' : ('ndash', '2013'), + '\x97' : ('mdash', '2014'), + '\x98' : ('tilde', '2DC'), + '\x99' : ('trade', '2122'), + '\x9a' : ('scaron', '161'), + '\x9b' : ('rsaquo', '203A'), + '\x9c' : ('oelig', '153'), + '\x9d' : '?', + '\x9e' : ('#x17E', '17E'), + '\x9f' : ('Yuml', ''),} + +####################################################################### + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) + print soup.prettify() diff --git a/fanficdownloader/__init__.py b/fanficdownloader/__init__.py new file mode 100644 index 00000000..40a96afc --- /dev/null +++ b/fanficdownloader/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py new file mode 100644 index 00000000..0800ca4c --- /dev/null +++ b/fanficdownloader/adapters/__init__.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os, re, sys, glob +from os.path import dirname, basename, normpath +import logging +import urlparse as up + +import fanficdownloader.exceptions as exceptions + +## This bit of complexity allows adapters to be added by just adding +## the source file. It eliminates the long if/else clauses we used to +## need to pick out the adapter. + +## List of registered site adapters. + +__class_list = [] + +def getAdapter(config,url): + ## fix up leading protocol. + fixedurl = re.sub(r"(?i)^[htp]+[:/]+","http://",url.strip()) + if not fixedurl.startswith("http"): + fixedurl = "http://%s"%url + ## remove any trailing '#' locations. + fixedurl = re.sub(r"#.*$","",fixedurl) + + ## remove any trailing '&' parameters--?sid=999 will be left. + ## that's all that any of the current adapters need or want. + fixedurl = re.sub(r"&.*$","",fixedurl) + + parsedUrl = up.urlparse(fixedurl) + domain = parsedUrl.netloc.lower() + if( domain != parsedUrl.netloc ): + fixedurl = fixedurl.replace(parsedUrl.netloc,domain) + + logging.debug("site:"+domain) + cls = getClassFor(domain) + if not cls: + logging.debug("trying site:www."+domain) + cls = getClassFor("www."+domain) + fixedurl = fixedurl.replace("http://","http://www.") + if cls: + adapter = cls(config,fixedurl) # raises InvalidStoryURL + return adapter + # No adapter found. + raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] ) + +def getClassFor(domain): + for cls in __class_list: + if cls.matchesSite(domain): + return cls + +## Automatically import each adapter_*.py file. +## Each implement getClass() to their class + +filelist = glob.glob(dirname(__file__)+'/adapter_*.py') +sys.path.insert(0,normpath(dirname(__file__))) + +for file in filelist: + #print "file: "+basename(file)[:-3] + module = __import__(basename(file)[:-3]) + __class_list.append(module.getClass()) + +del sys.path[0] diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py new file mode 100644 index 00000000..be600c6e --- /dev/null +++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class AdAstraFanficComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','aaff') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Star Trek") + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.adastrafanfic.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&warning=5" + else: + addurl="" + + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data: + raise exceptions.AdultCheckRequired(self.url) + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## <meta name='description' content='<p>Description</p> ...' > + ## Summary, strangely, is in the content attr of a <meta name='description'> tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = '' + while value and not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + # sometimes poorly formated desc (<p> w/o </p>) leads + # to all labels being included. + svalue=svalue[:svalue.find('<span class="label">')] + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.strip(), "%m/%d/%Y")) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%m/%d/%Y")) + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return AdAstraFanficComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_fanficcastletvnet.py b/fanficdownloader/adapters/adapter_fanficcastletvnet.py new file mode 100644 index 00000000..96d5d1b2 --- /dev/null +++ b/fanficdownloader/adapters/adapter_fanficcastletvnet.py @@ -0,0 +1,282 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +# By virtue of being recent and requiring both is_adult and user/pass, +# adapter_fanficcastletvnet.py is the best choice for learning to +# write adapters--especially for sites that use the eFiction system. +# Most sites that have ".../viewstory.php?sid=123" in the story URL +# are eFiction. + +# For non-eFiction sites, it can be considerably more complex, but +# this is still a good starting point. + +# In general an 'adapter' needs to do these five things: + +# - 'Register' correctly with the downloader +# - Site Login (if needed) +# - 'Are you adult?' check (if needed--some do one, some the other, some both) +# - Grab the chapter list +# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page) +# - Grab the chapter texts + +# Search for XXX comments--that's where things are most likely to need changing. + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return FanficCastleTVNetAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','csltv') # XXX + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Castle") # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%b %d, %Y" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'fanfic.castletv.net' # XXX + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" # XXX + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + if "Age Consent Required" in data: # XXX + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + ## Not all sites use Genre, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + ## Not all sites use Warnings, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(div) diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py new file mode 100644 index 00000000..42e2b88e --- /dev/null +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -0,0 +1,228 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 +import time + +import fanficdownloader.BeautifulSoup as bs +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class FanFictionNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','ffnet') + + # get storyId from url--url validation guarantees second part is storyId + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + + # normalized story URL. + self._setURL("http://"+self.getSiteDomain()\ + +"/s/"+self.story.getMetadata('storyId')+"/1/") + + # ffnet update emails have the latest chapter URL. + # Frequently, when they arrive, not all the servers have the + # latest chapter yet and going back to chapter 1 to pull the + # chapter list doesn't get the latest. So save and use the + # original URL given to pull chapter list & metadata. + self.origurl = url + if "http://m." in self.origurl: + ## accept m(mobile)url, but use www. + self.origurl = self.origurl.replace("http://m.","http://www.") + + @staticmethod + def getSiteDomain(): + return 'www.fanfiction.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.fanfiction.net','m.fanfiction.net'] + + def getSiteExampleURLs(self): + return "http://www.fanfiction.net/s/1234/1/ http://www.fanfiction.net/s/1234/12/ http://www.fanfiction.net/s/1234/1/Story_Title" + + def getSiteURLPattern(self): + return r"http://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[a-zA-Z0-9_-]+)?/?$" + + def extractChapterUrlsAndMetadata(self): + + # fetch the chapter. From that we will get almost all the + # metadata and chapter list + + url = self.origurl + logging.debug("URL: "+url) + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(data) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(url) + else: + raise e + + if "Unable to locate story with id of " in data: + raise exceptions.StoryDoesNotExist(url) + + if "Chapter not found. Please check to see you are not using an outdated url." in data: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! 'Chapter not found. Please check to see you are not using an outdated url.'" % url) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"^/u/\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[2]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + + # start by finding a script towards the bottom that has a + # bunch of useful stuff in it. + + # var storyid = 6577076; + # var chapter = 1; + # var chapters = 17; + # var words = 42787; + # var userid = 2645830; + # var title = 'The+Invitation'; + # var title_t = 'The Invitation'; + # var summary = 'Dudley Dursley would be the first to say he lived a very normal life. But what happens when he gets invited to his cousin Harry Potter\'s wedding? Will Dudley get the courage to apologize for the torture he caused all those years ago? Harry/Ginny story.'; + # var categoryid = 224; + # var cat_title = 'Harry Potter'; + # var datep = '12-21-10'; + # var dateu = '04-06-11'; + # var author = 'U n F a b u l o u s M e'; + + for script in soup.findAll('script', src=None): + if not script: + continue + if not script.string: + continue + if 'var storyid' in script.string: + for line in script.string.split('\n'): + m = re.match(r"^ +var ([^ ]+) = '?(.*?)'?;$",line) + if m == None : continue + var,value = m.groups() + # remove javascript escaping from values. + value = re.sub(r'\\(.)',r'\1',value) + #print var,value + if 'words' in var: + self.story.setMetadata('numWords', value) + if 'title_t' in var: + self.story.setMetadata('title', value) + if 'summary' in var: + self.story.setMetadata('description', value) + if 'datep' in var: + self.story.setMetadata('datePublished',makeDate(value, '%m-%d-%y')) + if 'dateu' in var: + self.story.setMetadata('dateUpdated',makeDate(value, '%m-%d-%y')) + if 'cat_title' in var: + if "Crossover" in value: + value = re.sub(r' Crossover$','',value) + for c in value.split(' and '): + self.story.addToList('category',c) + # Screws up when the category itself + # contains ' and '. But that's rare + # and the only alternative is to find + # the 'Crossover' category URL and + # parse that page to search for <a> + # with href /crossovers/(name)/(num)/ + # <a href="/crossovers/Harry_Potter/224/">Harry Potter</a> + # <a href="/crossovers/Naruto/1402/">Naruto</a> + else: + self.story.addToList('category',value) + break # for script in soup.findAll('script', src=None): + + # Find the chapter selector + select = soup.find('select', { 'name' : 'chapter' } ) + + if select is None: + # no selector found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = u'http://%s/s/%s/%s/' % ( self.getSiteDomain(), + self.story.getMetadata('storyId'), + o['value']) + # just in case there's tags, like <i> in chapter titles. + title = u"%s" % o + title = re.sub(r'<[^>]+>','',title) + self.chapterUrls.append((title,url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Pull some additional data from html. Find Rating and look around it. + + a = soup.find('a', href='http://www.fictionratings.com/') + self.story.setMetadata('rating',a.string) + + # after Rating, the same bit of text containing id:123456 contains + # Complete--if completed. + if 'Complete' in a.findNext(text=re.compile(r'id:'+self.story.getMetadata('storyId'))): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + # Parse genre(s) from <meta name="description" content="..." + # <meta name="description" content="Chapter 1 of a Harry Potter - Family/Friendship fanfiction. Dudley Dursley would be the first to say he lived a very normal life. But what happens when he gets invited to his cousin Harry Potter's wedding? Will Dudley get the courage to apologize for the torture he caused all those years ago? Harry/Ginny story.."> + # <meta name="description" content="A Gundam Wing/AC and Gundam Seed - Romance/Sci-Fi crossover fanfiction with characters: & Kira Y.. Story summary: One-Shoot dividido en dos partes. Kira va en camino a rescatar a Lacus, pero él no es el unico. Dos personajes de diferentes universos Gundams. SEED vs ZERO."> + # <meta name="description" content="Chapter 1 of a Alvin and the chipmunks and Alpha and Omega crossover fanfiction with characters: Alvin S. & Humphrey. You'll just have to read to find out... No Flames Plesae... and tell me what you want to see by PM'ing me...."> + # genre is after first -, but before first 'fanfiction'. + m = re.match(r"^(?:Chapter \d+ of a|A) (?:.*?) (?:- (?P<genres>.*?)) (?:crossover )?fanfiction", + soup.find('meta',{'name':'description'})['content']) + if m != None: + genres=m.group('genres') + # Hurt/Comfort is one genre. + genres=re.sub('Hurt/Comfort','Hurt-Comfort',genres) + for g in genres.split('/'): + self.story.addToList('genre',g) + + return + + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + time.sleep(0.5) ## ffnet(and, I assume, fpcom) tends to fail + ## more if hit too fast. This is in + ## additional to what ever the + ## slow_down_sleep_time setting is. + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(data) + + ## Remove the 'share' button. + sharediv = soup.find('div', {'class' : 'a2a_kit a2a_default_style'}) + if sharediv: + sharediv.extract() + else: + logging.debug('share button div not found') + + div = soup.find('div', {'id' : 'storytext'}) + + if None == div: + logging.debug('div id=storytext not found. data:%s'%data) + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(div) + +def getClass(): + return FanFictionNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_fictionalleyorg.py b/fanficdownloader/adapters/adapter_fictionalleyorg.py new file mode 100644 index 00000000..14a03ec5 --- /dev/null +++ b/fanficdownloader/adapters/adapter_fictionalleyorg.py @@ -0,0 +1,226 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fa') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Harry Potter") + self.is_adult=False + + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('authorId',m.group('auth')) + self.story.setMetadata('storyId',m.group('id')) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + # normalized story URL. + self._setURL(url) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + @staticmethod + def getSiteDomain(): + return 'www.fictionalley.org' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/authors/drt/DA.html http://"+self.getSiteDomain()+"/authors/drt/JOTP01a.html" + + def getSiteURLPattern(self): + # http://www.fictionalley.org/authors/drt/DA.html + # http://www.fictionalley.org/authors/drt/JOTP01a.html + return re.escape("http://"+self.getSiteDomain())+"/authors/(?P<auth>[a-zA-Z0-9_]+)/(?P<id>[a-zA-Z0-9_]+)\.html" + + def _postFetchWithIAmOld(self,url): + if self.is_adult or self.getConfig("is_adult"): + params={'iamold':'Yes', + 'action':'ageanswer'} + logging.info("Attempting to get cookie for %s" % url) + ## posting on list doesn't work, but doesn't hurt, either. + data = self._postUrl(url,params) + else: + data = self._fetchUrl(url) + return data + + def extractChapterUrlsAndMetadata(self): + + ## could be either chapter list page or one-shot text page. + url = self.url + logging.debug("URL: "+url) + + try: + data = self._postFetchWithIAmOld(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + chapterdata = data + # If chapter list page, get the first chapter to look for adult check + chapterlinklist = soup.findAll('a',{'class':'chapterlink'}) + if chapterlinklist: + chapterdata = self._postFetchWithIAmOld(chapterlinklist[0]['href']) + + if "Are you over seventeen years old" in chapterdata: + raise exceptions.AdultCheckRequired(self.url) + + if not chapterlinklist: + # no chapter list, chapter URL: change to list link. + # second a tag inside div breadcrumbs + storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1] + self._setURL(storya['href']) + url=self.url + logging.debug("Normalizing to URL: "+url) + ## title's right there... + self.story.setMetadata('title',storya.string) + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(data) + chapterlinklist = soup.findAll('a',{'class':'chapterlink'}) + else: + ## still need title from somewhere. If chapterlinklist, + ## then chapterdata contains a chapter, find title the + ## same way. + chapsoup = bs.BeautifulSoup(chapterdata) + storya = chapsoup.find('div',{'class':'breadcrumbs'}).findAll('a')[1] + self.story.setMetadata('title',storya.string) + del chapsoup + + del chapterdata + + ## authorid already set. + ## <h1 class="title" align="center">Just Off The Platform II by <a href="http://www.fictionalley.org/authors/drt/">DrT</a></h1> + authora=soup.find('h1',{'class':'title'}).find('a') + self.story.setMetadata('author',authora.string) + self.story.setMetadata('authorUrl',authora['href']) + + if len(chapterlinklist) == 1: + self.chapterUrls.append((self.story.getMetadata('title'),chapterlinklist[0]['href'])) + else: + # Find the chapters: + for chapter in chapterlinklist: + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Go scrape the rest of the metadata from the author's page. + data = self._fetchUrl(self.story.getMetadata('authorUrl')) + soup = bs.BeautifulSoup(data) + + # <dl><dt><a class = "Rid story" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/TMH.html"> + # [Rid] The Magical Hottiez</a> by <a class = "pen_name" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/">Aafro Man Ziegod</a> </small></dt> + # <dd><small class = "storyinfo"><a href = "http://www.fictionalley.org/ratings.html" target = "_new">Rating:</a> PG-13 - Spoilers: PS/SS, CoS, PoA, GoF, QTTA, FB - 4264 hits - 5060 words<br /> + # Genre: Humor, Romance - Main character(s): None - Ships: None - Era: Multiple Eras<br /></small> + # Chaos ensues after Witch Weekly, seeking to increase readers, decides to create a boyband out of five seemingly talentless wizards: Harry Potter, Draco Malfoy, Ron Weasley, Neville Longbottom, and Oliver "Toss Your Knickers Here" Wood.<br /> + # <small class = "storyinfo">Published: June 3, 2002 (between Goblet of Fire and Order of Phoenix) - Updated: June 3, 2002</small> + # </dd></dl> + + storya = soup.find('a',{'href':self.story.getMetadata('storyUrl')}) + storydd = storya.findNext('dd') + + # Rating: PG - Spoilers: None - 2525 hits - 736 words + # Genre: Humor - Main character(s): H, R - Ships: None - Era: Multiple Eras + # Harry and Ron are back at it again! They reeeeeeally don't want to be back, because they know what's awaiting them. "VH1 Goes Inside..." is back! Why? 'Cos there are soooo many more couples left to pick on. + # Published: September 25, 2004 (between Order of Phoenix and Half-Blood Prince) - Updated: September 25, 2004 + + ## change to text and regexp find. + metastr = stripHTML(storydd).replace('\n',' ').replace('\t',' ') + + m = re.match(r".*?Rating: (.+?) -.*?",metastr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genre: (.+?) -.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Published: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr) + if m: + self.story.setMetadata('datePublished',makeDate(m.group(1), "%B %d, %Y")) + + m = re.match(r".*?Updated: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr) + if m: + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%B %d, %Y")) + + m = re.match(r".*? (\d+) words Genre.*?",metastr) + if m: + self.story.setMetadata('numWords', m.group(1)) + + for small in storydd.findAll('small'): + small.extract() ## removes the <small> tags, leaving only the summary. + self.story.setMetadata('description',stripHTML(storydd)) + + return + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # find <!-- headerend --> & <!-- footerstart --> and + # replaced with matching div pair for easier parsing. + # Yes, it's an evil kludge, but what can ya do? Using + # something other than div prevents soup from pairing + # our div with poor html inside the story text. + data = data.replace('<!-- headerend -->','<crazytagstringnobodywouldstumbleonaccidently id="storytext">').replace('<!-- footerstart -->','</crazytagstringnobodywouldstumbleonaccidently>') + + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + body = soup.findAll('body') ## some stories use a nested body and body + ## tag, in which case we don't + ## need crazytagstringnobodywouldstumbleonaccidently + ## and use the second one instead. + if len(body)>1: + text = body[1] + text.name='div' # force to be a div to avoid multiple body tags. + else: + text = soup.find('crazytagstringnobodywouldstumbleonaccidently', {'id' : 'storytext'}) + text.name='div' # change to div tag. + + if not data or not text: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(text) + +def getClass(): + return FictionAlleyOrgSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_fictionpresscom.py b/fanficdownloader/adapters/adapter_fictionpresscom.py new file mode 100644 index 00000000..76b2353a --- /dev/null +++ b/fanficdownloader/adapters/adapter_fictionpresscom.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 +import time + +## They're from the same people and pretty much identical. +from adapter_fanfictionnet import FanFictionNetSiteAdapter + +class FictionPressComSiteAdapter(FanFictionNetSiteAdapter): + + def __init__(self, config, url): + FanFictionNetSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fpcom') + + @staticmethod + def getSiteDomain(): + return 'www.fictionpress.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.fictionpress.com','m.fictionpress.com'] + + def getSiteExampleURLs(self): + return "http://www.fictionpress.com/s/1234/1/ http://www.fictionpress.com/s/1234/12/ http://www.fictionpress.com/s/1234/1/Story_Title" + + def getSiteURLPattern(self): + return r"http://(www|m)?\.fictionpress\.com/s/\d+(/\d+)?(/|/[a-zA-Z0-9_-]+)?/?$" + +def getClass(): + return FictionPressComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py new file mode 100644 index 00000000..27821994 --- /dev/null +++ b/fanficdownloader/adapters/adapter_ficwadcom.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 +import time +import httplib, urllib + +import fanficdownloader.BeautifulSoup as bs +import fanficdownloader.exceptions as exceptions +from fanficdownloader.htmlcleanup import stripHTML + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class FicwadComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fw') + + # get storyId from url--url validation guarantees second part is storyId + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + + self.username = "NoneGiven" + self.password = "" + + @staticmethod + def getSiteDomain(): + return 'www.ficwad.com' + + def getSiteExampleURLs(self): + return "http://www.ficwad.com/story/137169" + + def getSiteURLPattern(self): + return re.escape(r"http://"+self.getSiteDomain())+"/story/\d+?$" + + def performLogin(self,url): + params = {} + + if self.password: + params['username'] = self.username + params['password'] = self.password + else: + params['username'] = self.getConfig("username") + params['password'] = self.getConfig("password") + + loginUrl = 'http://' + self.getSiteDomain() + '/account/login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['username'])) + d = self._postUrl(loginUrl,params) + + if "Login attempt failed..." in d: + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['username'])) + raise exceptions.FailedToLogin(url,params['username']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + # fetch the chapter. From that we will get almost all the + # metadata and chapter list + + url = self.url + logging.debug("URL: "+url) + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + h3 = soup.find('h3') + storya = h3.find('a',href=re.compile("^/story/\d+$")) + if storya : # if there's a story link in the h3 header, this is a chapter page. + # normalize story URL on chapter list. + self.story.setMetadata('storyId',storya['href'].split('/',)[2]) + url = "http://"+self.getSiteDomain()+storya['href'] + logging.debug("Normalizing to URL: "+url) + self._setURL(url) + try: + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # if blocked, attempt login. + if soup.find("li",{"class":"blocked"}): + if self.performLogin(url): # performLogin raises + # FailedToLogin if it fails. + soup = bs.BeautifulSoup(self._fetchUrl(url)) + + # title - first h4 tag will be title. + titleh4 = soup.find('h4') + self.story.setMetadata('title', titleh4.a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"^/author/\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[2]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + # description + storydiv = soup.find("div",{"id":"story"}) + self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string) + + # most of the meta data is here: + metap = storydiv.find("p",{"class":"meta"}) + self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string) + + # warnings + # <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span> + spanreq = metap.find("span",{"class":"req"}) + if spanreq: # can be no warnings. + for a in spanreq.findAll("a"): + self.story.addToList('warnings',a['title']) + + ## perhaps not the most efficient way to parse this, using + ## regexps for each rather than something more complex, but + ## IMO, it's more readable and amenable to change. + metastr = stripHTML(str(metap)).replace('\n',' ').replace('\t',' ') + #print "metap: (%s)"%metastr + + m = re.match(r".*?Rating: (.+?) -.*?",metastr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genres: (.+?) -.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Published: ([0-9/]+?) -.*?",metastr) + if m: + self.story.setMetadata('datePublished',makeDate(m.group(1), "%Y/%m/%d")) + + # Updated can have more than one space after it. <shrug> + m = re.match(r".*?Updated: ([0-9/]+?) +-.*?",metastr) + if m: + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%Y/%m/%d")) + + m = re.match(r".*? - ([0-9/]+?) words.*?",metastr) + if m: + self.story.setMetadata('numWords',m.group(1)) + + if metastr.endswith("Complete"): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + # get the chapter list first this time because that's how we + # detect the need to login. + storylistul = soup.find('ul',{'id':'storylist'}) + if not storylistul: + # no list found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + chapterlistlis = storylistul.findAll('li') + for chapterli in chapterlistlis: + if "blocked" in chapterli['class']: + # paranoia check. We should already be logged in by now. + raise exceptions.FailedToLogin(url,self.username) + else: + #print "chapterli.h4.a (%s)"%chapterli.h4.a + self.chapterUrls.append((chapterli.h4.a.string, + u'http://%s%s'%(self.getSiteDomain(), + chapterli.h4.a['href']))) + #print "self.chapterUrls:%s"%self.chapterUrls + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + return + + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + time.sleep(0.5) ## ffnet tends to fail more if hit too fast. + ## This is in additional to what ever the + ## slow_down_sleep_time setting is. + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'storytext'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return FicwadComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py new file mode 100644 index 00000000..07391845 --- /dev/null +++ b/fanficdownloader/adapters/adapter_fimfictionnet.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 +import cookielib as cl + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +def getClass(): + return FimFictionNetSiteAdapter + +class FimFictionNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fimficnet') + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + self._setURL("http://"+self.getSiteDomain()+"/story/"+self.story.getMetadata('storyId')+"/") + self.is_adult = False + + @staticmethod + def getSiteDomain(): + return 'www.fimfiction.net' + + @classmethod + def getAcceptDomains(cls): + # mobile.fimifction.com isn't actually a valid domain, but we can still get the story id from URLs anyway + return ['www.fimfiction.net','mobile.fimfiction.net', 'www.fimfiction.com', 'mobile.fimfiction.com'] + + def getSiteExampleURLs(self): + return "http://www.fimfiction.net/story/1234/story-title-here http://www.fimfiction.net/story/1234/ http://www.fimfiction.com/story/1234/1/ http://mobile.fimfiction.net/story/1234/1/story-title-here/chapter-title-here" + + def getSiteURLPattern(self): + return r"http://(www|mobile)\.fimfiction\.(net|com)/story/\d+/?.*" + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + cookieproc = urllib2.HTTPCookieProcessor() + cookie = cl.Cookie(version=0, name='view_mature', value='true', + port=None, port_specified=False, + domain=self.getSiteDomain(), domain_specified=False, domain_initial_dot=False, + path='/story', path_specified=True, + secure=False, + expires=time.time()+10000, + discard=False, + comment=None, + comment_url=None, + rest={'HttpOnly': None}, + rfc2109=False) + cookieproc.cookiejar.set_cookie(cookie) + self.opener = urllib2.build_opener(cookieproc) + + try: + data = self._fetchUrl(self.url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Warning: mysql_fetch_array(): supplied argument is not a valid MySQL result resource" in data: + raise exceptions.StoryDoesNotExist(self.url) + + if "This story has been marked as having adult content." in data: + raise exceptions.AdultCheckRequired(self.url) + + soup = bs.BeautifulSoup(data).find("div", {"class":"content_box post_content_box"}) + + title, author = [link.text for link in soup.find("h2").findAll("a")] + self.story.setMetadata("title", title) + self.story.setMetadata("author", author) + self.story.setMetadata("authorId", author) # The author's name will be unique + self.story.setMetadata("authorUrl", "http://%s/user/%s" % (self.getSiteDomain(),author)) + + chapterDates = [] + + for chapter in soup.findAll("a", {"class":"chapter_link"}): + chapterDates.append(chapter.span.extract().text.strip("()")) + self.chapterUrls.append((chapter.text.strip(), "http://"+self.getSiteDomain() + chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + for character in [character_icon['title'] for character_icon in soup.findAll("a", {"class":"character_icon"})]: + self.story.addToList("characters", character) + for category in [category.text for category in soup.find("div", {"class":"categories"}).findAll("a")]: + self.story.addToList("category", category) + self.story.addToList("category", "My Little Pony") + + + # The very last list element in the list of chapters contains the status, rating and word count e.g.: + # + # <li> + # Incomplete | Rating: + # <span style="color:#c78238;">Teen</span> + # <div class="word_count"><b>5,203</b>words total</div> + # </li> + # + + status_bar = soup.findAll('li')[-1] + # In the case of fimfiction.net, possible statuses are 'Completed', 'Incomplete', 'On Hiatus' and 'Cancelled' + # For the sake of bringing it in line with the other adapters, 'Incomplete' and 'On Hiatus' become 'In-Progress' + # and 'Complete' beomes 'Completed'. 'Cancelled' seems an important enough (not to mention more strictly true) + # status to leave unchanged. + status = status_bar.text.split("|")[0].strip().replace("Incomplete", "In-Progress").replace("On Hiatus", "In-Progress").replace("Complete", "Completed") + self.story.setMetadata('status', status) + self.story.setMetadata('rating', status_bar.span.text) + self.story.setMetadata('numWords', status_bar.div.b.text) + + description_soup = soup.find("div", {"class":"description"}) + # Sometimes the description has an expanding element + # This removes the ellipsis and the expand button + try: + description_soup.find('span', {"id":re.compile(r"description_more_elipses_\d+")}).extract() # Web designer can't spell 'ellipsis' + description_soup.find('a', {"class":"more"}).extract() + except: + pass + self.story.setMetadata('description', description_soup.text) + + # Unfortunately, nowhere on the page is the year mentioned. Because we would much rather update the story needlessly + # than miss an update, we hardcode the year of creation and update to be 2011. + + # Get the date of creation from the first chapter + datePublished_text = chapterDates[0] + day, month = datePublished_text.split() + day = re.sub(r"[^\d.]+", '', day) + datePublished = makeDate("2011"+month+day, "%Y%b%d") + self.story.setMetadata("datePublished", datePublished) + dateUpdated_soup = bs.BeautifulSoup(data).find("div", {"class":"calendar"}) + dateUpdated_soup.find('span').extract() + dateUpdated = makeDate("2011"+dateUpdated_soup.text, "%Y%b%d") + self.story.setMetadata("dateUpdated", dateUpdated) + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'id' : 'chapter_container'}) + if soup == None: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + return utf8FromSoup(soup) + \ No newline at end of file diff --git a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py new file mode 100644 index 00000000..155ebf7f --- /dev/null +++ b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','hp') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Harry Potter") + self.is_adult=False + + # get storyId from url--url validation guarantees query is only psid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.harrypotterfanfiction.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.harrypotterfanfiction.com','harrypotterfanfiction.com'] + + def getSiteExampleURLs(self): + return "http://www.harrypotterfanfiction.com/viewstory.php?psid=1234 http://harrypotterfanfiction.com/viewstory.php?psid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'\?psid='+self.story.getMetadata('storyId'))) + self.story.setMetadata('title',a.string) + ## javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?psid=290995' + if "This story may contain adult themes." in a['href'] and not (self.is_adult or self.getConfig("is_adult")): + raise exceptions.AdultCheckRequired(self.url) + + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?showuid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + ## hpcom doesn't give us total words--but it does give + ## us words/chapter. I'd rather add than fetch and + ## parse another page. + words=0 + for tr in soup.find('table',{'class':'text'}).findAll('tr'): + tdstr = tr.findAll('td')[2].string + if tdstr and tdstr.isdigit(): + words+=int(tdstr) + self.story.setMetadata('numWords',str(words)) + + # Find the chapters: + tablelist = soup.find('table',{'class':'text'}) + for chapter in tablelist.findAll('a', href=re.compile(r'\?chapterid=\d+')): + #javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?chapterid=433441&i=1' + # just in case there's tags, like <i> in chapter titles. + chpt=re.sub(r'^.*?(\?chapterid=\d+).*?',r'\1',chapter['href']) + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/viewstory.php'+chpt)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Finding the metadata is a bit of a pain. Desc is the only thing this color. + desctable= soup.find('table',{'bgcolor':'#f0e8e8'}) + self.story.setMetadata('description',stripHTML(desctable)) + + ## Finding the metadata is a bit of a pain. Most of the meta + ## data is in a center.table without a bgcolor. + for center in soup.findAll('center'): + table = center.find('table',{'bgcolor':None}) + if table: + metastr = stripHTML(str(table)).replace('\n',' ').replace('\t',' ') + # Rating: 12+ Story Reviews: 3 + # Chapters: 3 + # Characters: Andromeda, Ted, Bellatrix, R. Lestrange, Lucius, Narcissa, OC + # Genre(s): Fluff, Romance, Young Adult Era: OtherPairings: Other Pairing, Lucius/Narcissa + # Status: Completed + # First Published: 2010.09.02 + # Last Published Chapter: 2010.09.28 + # Last Updated: 2010.09.28 + # Favorite Story Of: 1 users + # Warnings: Scenes of a Mild Sexual Nature + + m = re.match(r".*?Status: Completed.*?",metastr) + if m: + self.story.setMetadata('status','Completed') + else: + self.story.setMetadata('status','In-Progress') + + m = re.match(r".*?Rating: (.+?) Story Reviews.*?",metastr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genre\(s\): (.+?) Era.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Warnings: (.+).*?",metastr) + if m: + for w in m.group(1).split(','): + if w != 'Now Warnings': + self.story.addToList('warnings',w) + + m = re.match(r".*?First Published: ([0-9\.]+).*?",metastr) + if m: + self.story.setMetadata('datePublished',makeDate(m.group(1), "%Y.%m.%d")) + + # Updated can have more than one space after it. <shrug> + m = re.match(r".*?Last Updated: ([0-9\.]+).*?",metastr) + if m: + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%Y.%m.%d")) + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + ## most adapters use BeautifulStoneSoup here, but non-Stone + ## allows nested div tags. + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'fluidtext'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(div) + +def getClass(): + return HarryPotterFanFictionComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_mediaminerorg.py b/fanficdownloader/adapters/adapter_mediaminerorg.py new file mode 100644 index 00000000..62aa50ce --- /dev/null +++ b/fanficdownloader/adapters/adapter_mediaminerorg.py @@ -0,0 +1,234 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class MediaMinerOrgSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','mm') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('storyId',m.group('id')) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfic/view_st.php/'+self.story.getMetadata('storyId')) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + @staticmethod + def getSiteDomain(): + return 'www.mediaminer.org' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/fanfic/view_st.php/123456 http://"+self.getSiteDomain()+"/fanfic/view_ch.php/1234123/123444#fic_c" + + def getSiteURLPattern(self): + ## http://www.mediaminer.org/fanfic/view_st.php/76882 + ## http://www.mediaminer.org/fanfic/view_ch.php/167618/594087#fic_c + return re.escape("http://"+self.getSiteDomain())+\ + "/fanfic/view_(st|ch)\.php/"+r"(?P<id>\d+)(/\d+(#fic_c)?)?$" + + def extractChapterUrlsAndMetadata(self): + + url = self.url + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # [ A - All Readers ], strip '[' ']' + ## Above title because we remove the smtxt font to get title. + smtxt = soup.find("font",{"class":"smtxt"}) + if not smtxt: + raise exceptions.StoryDoesNotExist(self.url) + rating = smtxt.string[1:-1] + self.story.setMetadata('rating',rating) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"/fanfic/src.php/u/\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[-1]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + ## Title - Good grief. Title varies by chaptered, 1chapter and 'type=one shot'--and even 'one-shot's can have titled chapter. + ## But, if colspan=2, there's no chapter title. + ## <td class="ffh">Atmosphere: Chapter 1</b> <font class="smtxt">[ P - Pre-Teen ]</font></td> + ## <td colspan=2 class="ffh">Hearts of Ice <font class="smtxt">[ P - Pre-Teen ]</font></td> + ## <td colspan=2 class="ffh">Suzaku no Princess <font class="smtxt">[ P - Pre-Teen ]</font></td> + ## <td class="ffh">The Kraut, The Bartender, and The Drunkard: Chapter 1</b> <font class="smtxt">[ P - Pre-Teen ]</font></td> + ## <td class="ffh">Betrayal and Justice: A Cold Heart</b> <font size="-1">( Chapter 1 )</font> <font class="smtxt">[ A - All Readers ]</font></td> + ## <td class="ffh">Question and Answer: Question and Answer</b> <font size="-1">( One-Shot )</font> <font class="smtxt">[ A - All Readers ]</font></td> + title = soup.find('td',{'class':'ffh'}) + for font in title.findAll('font'): + font.extract() # removes 'font' tags from inside the td. + if title.has_key('colspan'): + titlet = title.text + else: + ## No colspan, it's part chapter title--even if it's a one-shot. + titlet = ':'.join(title.text.split(':')[:-1]) # strip trailing 'Chapter X' or chapter title + self.story.setMetadata('title',titlet) + ## The story title is difficult to reliably parse from the + ## story pages. Getting it from the author page is, but costs + ## another fetch. + # authsoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + # titlea = authsoup.find('a',{'href':'/fanfic/view_st.php/'+self.story.getMetadata('storyId')}) + # self.story.setMetadata('title',titlea.text) + + # save date from first for later. + firstdate=None + + # Find the chapters + select = soup.find('select',{'name':'cid'}) + if not select: + self.chapterUrls.append(( self.story.getMetadata('title'),self.url)) + else: + for option in select.findAll("option"): + chapter = stripHTML(option.string) + ## chapter can be: Chapter 7 [Jan 23, 2011] + ## or: Vigilant Moonlight ( Chapter 1 ) [Jan 30, 2004] + ## or even: Prologue ( Prologue ) [Jul 31, 2010] + m = re.match(r'^(.*?) (\( .*? \) )?\[(.*?)\]$',chapter) + chapter = m.group(1) + # save date from first for later. + if not firstdate: + firstdate = m.group(3) + self.chapterUrls.append((chapter,'http://'+self.host+'/fanfic/view_ch.php/'+self.story.getMetadata('storyId')+'/'+option['value'])) + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # category + # <a href="/fanfic/src.php/a/567">Ranma 1/2</a> + for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/a/")): + self.story.addToList('category',a.string) + + # genre + # <a href="/fanfic/src.php/a/567">Ranma 1/2</a> + for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/g/")): + self.story.addToList('genre',a.string) + + # if firstdate, then the block below will only have last updated. + if firstdate: + self.story.setMetadata('datePublished', makeDate(firstdate, "%b %d, %Y")) + # Everything else is in <tr bgcolor="#EEEED4"> + + metastr = stripHTML(soup.find("tr",{"bgcolor":"#EEEED4"})).replace('\n',' ').replace('\r',' ').replace('\t',' ') + # Latest Revision: August 03, 2010 + m = re.match(r".*?(?:Latest Revision|Uploaded On): ([a-zA-Z]+ \d\d, \d\d\d\d)",metastr) + if m: + self.story.setMetadata('dateUpdated', makeDate(m.group(1), "%B %d, %Y")) + if not firstdate: + self.story.setMetadata('datePublished', + self.story.getMetadataRaw('dateUpdated')) + + else: + self.story.setMetadata('dateUpdated', + self.story.getMetadataRaw('datePublished')) + + # Words: 123456 + m = re.match(r".*?\| Words: (\d+) \|",metastr) + if m: + self.story.setMetadata('numWords', m.group(1)) + + # Summary: .... + m = re.match(r".*?Summary: (.*)$",metastr) + if m: + self.story.setMetadata('description', m.group(1)) + + # completed + m = re.match(r".*?Status: Completed.*?",metastr) + if m: + self.story.setMetadata('status','Completed') + else: + self.story.setMetadata('status','In-Progress') + + return + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data=self._fetchUrl(url) + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + anchor = soup.find('a',{'name':'fic_c'}) + + if None == anchor: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + ## find divs with align=left, those are paragraphs in newer stories. + divlist = anchor.findAllNext('div',{'align':'left'}) + if divlist: + for div in divlist: + div.name='p' # convert to <p> mediaminer uses div with + # a margin for paragraphs. + anchor.append(div) # cheat! stuff all the content + # divs into anchor just as a + # holder. + del div['style'] + del div['align'] + anchor.name='div' + return utf8FromSoup(anchor) + + else: + logging.debug('Using kludgey text find for older mediaminer story.') + ## Some older mediaminer stories are unparsable with BeautifulSoup. + ## Really nasty formatting. Sooo... Cheat! Parse it ourselves a bit first. + ## Story stuff falls between: + data = "<div id='HERE'>" + data[data.find('<a name="fic_c">'):] +"</div>" + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + for tag in soup.findAll('td',{'class':'ffh'}) + \ + soup.findAll('div',{'class':'acl'}) + \ + soup.findAll('div',{'class':'footer smtxt'}) + \ + soup.findAll('table',{'class':'tbbrdr'}): + tag.extract() # remove tag from soup. + + return utf8FromSoup(soup) + + +def getClass(): + return MediaMinerOrgSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py new file mode 100644 index 00000000..6cb4b415 --- /dev/null +++ b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py @@ -0,0 +1,186 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','pns') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Harry Potter") + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.potionsandsnitches.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.potionsandsnitches.net','potionsandsnitches.net'] + + def getSiteExampleURLs(self): + return "http://www.potionsandsnitches.net/fanfiction/viewstory.php?sid=1234 http://potionsandsnitches.net/fanfiction/viewstory.php?sid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("potionsandsnitches.net/fanfiction/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/fanfiction/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfiction/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## <meta name='description' content='<p>Description</p> ...' > + ## Summary, strangely, is in the content attr of a <meta name='description'> tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next div class='listbox' + svalue = "" + while not defaultGetattr(value,'class') == 'listbox': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), "%b %d %Y")) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), "%b %d %Y")) + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(div) + +def getClass(): + return PotionsAndSnitchesNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py new file mode 100644 index 00000000..6ec8a24b --- /dev/null +++ b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py @@ -0,0 +1,219 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','thpc') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self.dateformat = "%b %d %Y" + + + @staticmethod + def getSiteDomain(): + return 'fanfiction.tenhawkpresents.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&ageconsent=ok&warning=3" + else: + addurl="" + + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + addurl = "&ageconsent=ok&warning=4" + url = self.url+'&index=1'+addurl + logging.debug("Changing URL: "+url) + self.performLogin(url) + data = self._fetchUrl(url) + + if "This story contains mature content which may include violence, sexual situations, and coarse language" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId'))) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return TenhawkPresentsComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py new file mode 100644 index 00000000..1debdfc7 --- /dev/null +++ b/fanficdownloader/adapters/adapter_test1.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime +import time +import logging + +import fanficdownloader.BeautifulSoup as bs +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TestSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','tst1') + self.crazystring = u" crazy tests:[bare amp(&) quote(') amp(&) gt(>) lt(<) ATnT(AT&T) pound(£)]" + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + self.username='' + self.is_adult=False + + @staticmethod + def getSiteDomain(): + return 'test1.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"?sid=1234" + + def getSiteURLPattern(self): + return BaseSiteAdapter.getSiteURLPattern(self)+r'/?\?sid=\d+$' + + def extractChapterUrlsAndMetadata(self): + + if self.story.getMetadata('storyId') == '665' and not (self.is_adult or self.getConfig("is_adult")): + logging.warn("self.is_adult:%s"%self.is_adult) + raise exceptions.AdultCheckRequired(self.url) + + if self.story.getMetadata('storyId') == '666': + raise exceptions.StoryDoesNotExist(self.url) + + if self.getConfig("username"): + self.username = self.getConfig("username") + + if self.story.getMetadata('storyId') == '668' and self.username != "Me" : + raise exceptions.FailedToLogin(self.url,self.username) + + if self.story.getMetadata('storyId') == '664': + self.story.setMetadata(u'title',"Test Story Title "+self.crazystring) + else: + self.story.setMetadata(u'title',"Test Story Title") + self.story.setMetadata('storyUrl',self.url) + self.story.setMetadata('description',u'Description '+self.crazystring+u''' Done + +Some more longer description. "I suck at summaries!" "Better than it sounds!" "My first fic" +''') + self.story.setMetadata('datePublished',makeDate("1972-01-31","%Y-%m-%d")) + self.story.setMetadata('dateCreated',datetime.datetime.now()) + if self.story.getMetadata('storyId') == '669': + self.story.setMetadata('dateUpdated',datetime.datetime.now()) + else: + self.story.setMetadata('dateUpdated',makeDate("1975-01-31","%Y-%m-%d")) + self.story.setMetadata('numWords','123456') + self.story.setMetadata('status','In-Completed') + self.story.setMetadata('rating','Tweenie') + + self.story.setMetadata('author','Test Author aa') + self.story.setMetadata('authorId','98765') + self.story.setMetadata('authorUrl','http://author/url') + + self.story.addToList('warnings','Swearing') + self.story.addToList('warnings','Violence') + + self.story.addToList('category','Harry Potter') + self.story.addToList('category','Furbie') + self.story.addToList('category','Crossover') + + self.story.addToList('genre','Fantasy') + self.story.addToList('genre','SF') + self.story.addToList('genre','Noir') + + self.chapterUrls = [(u'Prologue '+self.crazystring,self.url+"&chapter=1"), + ('Chapter 1, Xenos on Cinnabar',self.url+"&chapter=2"), + ('Chapter 2, Sinmay on Kintikin',self.url+"&chapter=3"), + ('Chapter 3, Over Cinnabar',self.url+"&chapter=4"), + ('Chapter 4',self.url+"&chapter=5"), + ('Chapter 5',self.url+"&chapter=6"), + # ('Chapter 6',self.url+"&chapter=6"), + # ('Chapter 7',self.url+"&chapter=6"), + # ('Chapter 8',self.url+"&chapter=6"), + # ('Chapter 9',self.url+"&chapter=6"), + # ('Chapter 0',self.url+"&chapter=6"), + # ('Chapter a',self.url+"&chapter=6"), + # ('Chapter b',self.url+"&chapter=6"), + # ('Chapter c',self.url+"&chapter=6"), + # ('Chapter d',self.url+"&chapter=6"), + # ('Chapter e',self.url+"&chapter=6"), + # ('Chapter f',self.url+"&chapter=6"), + # ('Chapter g',self.url+"&chapter=6"), + # ('Chapter h',self.url+"&chapter=6"), + # ('Chapter i',self.url+"&chapter=6"), + # ('Chapter j',self.url+"&chapter=6"), + # ('Chapter k',self.url+"&chapter=6"), + # ('Chapter l',self.url+"&chapter=6"), + # ('Chapter m',self.url+"&chapter=6"), + # ('Chapter n',self.url+"&chapter=6"), + ] + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + if self.story.getMetadata('storyId') == '667': + raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url) + + if self.story.getMetadata('storyId') == '670': + time.sleep(2.0) + + if "chapter=1" in url : + text=u''' +<div> +<h3>Prologue</h3> +<p>This is a fake adapter for testing purposes. Different storyId's will give different errors:</p> +<p>http://test1.com?sid=664 - Crazy string title</p> +<p>http://test1.com?sid=665 - raises AdultCheckRequired</p> +<p>http://test1.com?sid=666 - raises StoryDoesNotExist</p> +<p>http://test1.com?sid=667 - raises FailedToDownload on chapter 1</p> +<p>http://test1.com?sid=668 - raises FailedToLogin unless username='Me'</p> +<p>http://test1.com?sid=669 - Succeeds with Updated Date=now</p> +<p>http://test1.com?sid=670 - Succeeds, but sleeps 2sec on each chapter</p> +<p>And other storyId will succeed with the same output.</p> +</div> +''' + else: + text=u''' +<div> +<h3>Chapter</h3> +<p><center>Centered text</center></p> +<p>Lorem '''+self.crazystring+''' <i>italics</i>, <b>bold</b>, <u>underline</u> consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p> +br breaks<br><br> +br breaks<br><br> +<hr> +horizontal rules +<hr> +<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p> +<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p> +</div> +''' + soup = bs.BeautifulStoneSoup(text,selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + return utf8FromSoup(soup) + +def getClass(): + return TestSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py new file mode 100644 index 00000000..5a399a6c --- /dev/null +++ b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','twcs') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/library/viewstory.php?sid='+self.story.getMetadata('storyId')) + self.dateformat = "%B %d, %Y" + + + @staticmethod + def getSiteDomain(): + return 'www.thewriterscoffeeshop.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/library/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/library/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/library/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&ageconsent=ok&warning=3" + else: + addurl="" + + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Age Consent Required" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/library/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/library/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return TheWritersCoffeeShopComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py new file mode 100644 index 00000000..2a58adfc --- /dev/null +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -0,0 +1,212 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TwilightedNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','tw') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Twilight") + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.twilighted.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.twilighted.net','twilighted.net'] + + def getSiteExampleURLs(self): + return "http://www.twilighted.net/viewstory.php?sid=1234 http://twilighted.net/viewstory.php?sid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("twilighted.net/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + ## twilighted.net doesn't use genre. + # if 'Genre' in label: + # genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + # genrestext = [genre.string for genre in genres] + # self.genre = ', '.join(genrestext) + # for genre in genrestext: + # self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.strip(), "%B %d, %Y")) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%B %d, %Y")) + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return TwilightedNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_twiwritenet.py b/fanficdownloader/adapters/adapter_twiwritenet.py new file mode 100644 index 00000000..5fe9c3da --- /dev/null +++ b/fanficdownloader/adapters/adapter_twiwritenet.py @@ -0,0 +1,226 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +from fanficdownloader.htmlcleanup import stripHTML +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TwiwriteNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','twrt') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Twilight") + self.username = "NoneGiven" # if left empty, twiwrite.net doesn't return any message at all. + self.password = "" + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.twiwrite.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.twiwrite.net','twiwrite.net'] + + def getSiteExampleURLs(self): + return "http://www.twiwrite.net/viewstory.php?sid=1234 http://twiwrite.net/viewstory.php?sid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("twiwrite.net/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## <meta name='description' content='<p>Description</p> ...' > + ## Summary, strangely, is in the content attr of a <meta name='description'> tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=8')) + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warning',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.strip(), "%B %d, %Y")) + + if 'Updated' in label: + # there's a stray [ at the end. + value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%B %d, %Y")) + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return TwiwriteNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_whoficcom.py b/fanficdownloader/adapters/adapter_whoficcom.py new file mode 100644 index 00000000..fdc3fb21 --- /dev/null +++ b/fanficdownloader/adapters/adapter_whoficcom.py @@ -0,0 +1,202 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +import fanficdownloader.BeautifulSoup as bs +import fanficdownloader.exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class WhoficComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','whof') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + @staticmethod + def getSiteDomain(): + return 'www.whofic.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+"\d+$" + + def extractChapterUrlsAndMetadata(self): + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + # fetch the first chapter. From that we will: + # - determine title, authorname, authorid + # - get chapter list, if not one-shot. + + url = self.url+'&chapter=1' + logging.debug("URL: "+url) + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # pull title(title) and author from the HTML title. + title = soup.find('title').string + logging.debug('Title: %s' % title) + title = title.split('::')[1].strip() + self.story.setMetadata('title',title.split(' by ')[0].strip()) + self.story.setMetadata('author',title.split(' by ')[1].strip()) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + + # Find the chapter selector + select = soup.find('select', { 'name' : 'chapter' } ) + + if select is None: + # no selector found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = self.url + "&chapter=%s" % o['value'] + # just in case there's tags, like <i> in chapter titles. + title = "%s" % o + title = re.sub(r'<[^>]+>','',title) + self.chapterUrls.append((title,url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Whofic.com puts none of the other meta data in the chapters + ## or even the story chapter index page. Need to scrape the + ## author page to find it. + + # <table width="100%" bordercolor="#333399" border="0" cellspacing="0" cellpadding="2"><tr><td> + # <b><a href="viewstory.php?sid=38220">Accompaniment 2</a></b> by <a href="viewuser.php?uid=12412">clandestinemiscreant</a> [<a href="reviews.php?sid=38220">Reviews</a> - <a href="reviews.php?sid=38220">0</a>] <br> + # This is a series of short stories written as an accompaniment to Season 2, Season 28 for us oldies, and each is unrelated except for that one factor. Each story is canon, in that it does not change established events at time of airing, based on things mentioned and/or implied and missing or deleted scenes that were not seen in the final aired episodes.<br> + # <font size="-1"><b><a href="categories.php?catid=15">Tenth Doctor</a></b> - All Ages - None - Humor, Hurt/Comfort, Romance<br> + # <i>Characters:</i> Rose Tyler<br> + # <i>Series:</i> None<br> + # <i>Published:</i> 2010.08.15 - <i>Updated:</i> 2010.08.16 - <i>Chapters:</i> 4 - <i>Completed:</i> Yes - <i>Word Count:</i> 4890 </font> + # </td></tr></table> + + logging.debug("Author URL: "+self.story.getMetadata('authorUrl')) + soup = bs.BeautifulStoneSoup(self._fetchUrl(self.story.getMetadata('authorUrl')), + selfClosingTags=('br')) # normalize <br> tags to <br /> + # find this story in the list, parse it's metadata based on + # lots of assumptions about the html, since there's little + # tagging. + # Found a story once that had the story URL in the desc for a + # series on the same author's page. Now using the reviews + # link instead to find the appropriate metadata. + a = soup.find('a', href=re.compile(r'reviews.php\?sid='+self.story.getMetadata('storyId'))) + metadata = a.findParent('td') + metadatachunks = utf8FromSoup(metadata).split('<br />') + # process metadata for this story. + self.story.setMetadata('description', metadatachunks[1]) + + # First line of the stuff with ' - ' separators + moremeta = metadatachunks[2] + moremeta = re.sub(r'<[^>]+>','',moremeta) # strip tags. + + moremetaparts = moremeta.split(' - ') + + # first part is category--whofic.com has categories + # Doctor One-11, Torchwood, etc. We're going to + # prepend any with 'Doctor' or 'Era' (Multi-Era, Other + # Era) as 'Doctor Who'. + # + # Also push each in as 'extra tags'. + category = moremetaparts[0] + if 'Doctor' in category or 'Era' in category : + self.story.addToList('category','Doctor Who') + + for cat in category.split(', '): + self.story.addToList('category',cat) + + # next in that line is age rating. + self.story.setMetadata('rating',moremetaparts[1]) + + # after that is a possible list fo specific warnings, + # Explicit Violence, Swearing, etc + if "None" not in moremetaparts[2]: + for warn in moremetaparts[2].split(', '): + self.story.addToList('warnings',warn) + + # then genre. It's another comma list. All together + # in genre, plus each in extra tags. + genre=moremetaparts[3] + for g in genre.split(r', '): + self.story.addToList('genre',g) + + # the next line is stuff with ' - ' separators *and* names--with tags. + moremeta = metadatachunks[5] + moremeta = re.sub(r'<[^>]+>','',moremeta) # strip tags. + + moremetaparts = moremeta.split(' - ') + + for part in moremetaparts: + (name,value) = part.split(': ') + name=name.strip() + value=value.strip() + if name == 'Published': + self.story.setMetadata('datePublished', makeDate(value, '%Y.%m.%d')) + if name == 'Updated': + self.story.setMetadata('dateUpdated', makeDate(value, '%Y.%m.%d')) + if name == 'Completed': + if value == 'Yes': + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + if name == 'Word Count': + self.story.setMetadata('numWords', value) + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + + # hardly a great identifier, I know, but whofic really doesn't + # give us anything better to work with. + span = soup.find('span', {'style' : 'font-size: 100%;'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return WhoficComSiteAdapter + diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py new file mode 100644 index 00000000..cd76abcc --- /dev/null +++ b/fanficdownloader/adapters/base_adapter.py @@ -0,0 +1,250 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +import datetime +import time +import logging +import urllib +import urllib2 as u2 +import urlparse as up + +try: + from google.appengine.api import apiproxy_stub_map + def urlfetch_timeout_hook(service, call, request, response): + if call != 'Fetch': + return + # Make the default deadline 10 seconds instead of 5. + if not request.has_deadline(): + request.set_deadline(10.0) + + apiproxy_stub_map.apiproxy.GetPreCallHooks().Append( + 'urlfetch_timeout_hook', urlfetch_timeout_hook, 'urlfetch') + logging.info("Hook to make default deadline 10.0 installed.") +except: + pass + #logging.info("Hook to make default deadline 10.0 NOT installed--not using appengine") + +from fanficdownloader.story import Story +from fanficdownloader.configurable import Configurable +from fanficdownloader.htmlcleanup import removeEntities, removeAllEntities, stripHTML +from fanficdownloader.exceptions import InvalidStoryURL + +try: + import fanficdownloader.chardet as chardet +except ImportError: + chardet = None + +class BaseSiteAdapter(Configurable): + + @classmethod + def matchesSite(cls,site): + return site in cls.getAcceptDomains() + + @classmethod + def getAcceptDomains(cls): + return [cls.getSiteDomain()] + + def validateURL(self): + return re.match(self.getSiteURLPattern(), self.url) + + def __init__(self, config, url): + Configurable.__init__(self, config) + self.addConfigSection(self.getSiteDomain()) + self.addConfigSection("overrides") + + self.opener = u2.build_opener(u2.HTTPCookieProcessor()) + self.storyDone = False + self.metadataDone = False + self.story = Story() + self.story.setMetadata('site',self.getSiteDomain()) + self.story.setMetadata('dateCreated',datetime.datetime.now()) + self.chapterUrls = [] # tuples of (chapter title,chapter url) + self.chapterFirst = None + self.chapterLast = None + ## order of preference for decoding. + self.decode = ["utf8", + "Windows-1252"] # 1252 is a superset of + # iso-8859-1. Most sites that + # claim to be iso-8859-1 (and + # some that claim to be utf8) + # are really windows-1252. + self._setURL(url) + if not self.validateURL(): + raise InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + def _setURL(self,url): + self.url = url + self.parsedUrl = up.urlparse(url) + self.host = self.parsedUrl.netloc + self.path = self.parsedUrl.path + self.story.setMetadata('storyUrl',self.url) + +## website encoding(s)--in theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8". The special value 'auto' +## will call chardet and use the encoding it reports if it has +90% +## confidence. 'auto' is not reliable. + def _decode(self,data): + if self.getConfig('website_encodings'): + decode = self.getConfigList('website_encodings') + else: + decode = self.decode + + for code in decode: + try: + #print code + if code == "auto": + if not chardet: + logging.info("chardet not available, skipping 'auto' encoding") + continue + detected = chardet.detect(data) + #print detected + if detected['confidence'] > 0.9: + code=detected['encoding'] + else: + continue + return data.decode(code) + except: + logging.debug("code failed:"+code) + pass + logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode) + return "".join([x for x in data if ord(x) < 128]) + + # Assumes application/x-www-form-urlencoded. parameters, headers are dict()s + def _postUrl(self, url, parameters={}, headers={}): + if self.getConfig('slow_down_sleep_time'): + time.sleep(float(self.getConfig('slow_down_sleep_time'))) + + ## u2.Request assumes POST when data!=None. Also assumes data + ## is application/x-www-form-urlencoded. + if 'Content-type' not in headers: + headers['Content-type']='application/x-www-form-urlencoded' + if 'Accept' not in headers: + headers['Accept']="text/html,*/*" + req = u2.Request(url, + data=urllib.urlencode(parameters), + headers=headers) + return self._decode(self.opener.open(req).read()) + + # parameters is a dict() + def _fetchUrl(self, url, parameters=None): + if self.getConfig('slow_down_sleep_time'): + time.sleep(float(self.getConfig('slow_down_sleep_time'))) + + excpt=None + for sleeptime in [0, 0.5, 4, 9]: + time.sleep(sleeptime) + try: + if parameters: + return self._decode(self.opener.open(url,urllib.urlencode(parameters)).read()) + else: + return self._decode(self.opener.open(url).read()) + except Exception, e: + excpt=e + logging.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e))) + + logging.error("Giving up on %s" %url) + logging.exception(excpt) + raise(excpt) + + # Limit chapters to download. Input starts at 1, list starts at 0 + def setChaptersRange(self,first=None,last=None): + if first: + self.chapterFirst=int(first)-1 + if last: + self.chapterLast=int(last)-1 + + # Does the download the first time it's called. + def getStory(self): + if not self.storyDone: + self.getStoryMetadataOnly() + for index, (title,url) in enumerate(self.chapterUrls): + if (self.chapterFirst!=None and index < self.chapterFirst) or \ + (self.chapterLast!=None and index > self.chapterLast): + self.story.addChapter(removeEntities(title), + None) + else: + self.story.addChapter(removeEntities(title), + removeEntities(self.getChapterText(url))) + self.storyDone = True + return self.story + + def getStoryMetadataOnly(self): + if not self.metadataDone: + self.extractChapterUrlsAndMetadata() + self.metadataDone = True + return self.story + + ############################### + + @staticmethod + def getSiteDomain(): + "Needs to be overriden in each adapter class." + return 'no such domain' + + ## URL pattern validation is done *after* picking an adaptor based + ## on domain instead of *as* the adaptor selector so we can offer + ## the user example(s) for that particular site. + ## Override validateURL(self) instead if you need more control. + def getSiteURLPattern(self): + "Used to validate URL. Should be override in each adapter class." + return '^http://'+re.escape(self.getSiteDomain()) + + def getSiteExampleURLs(self): + """ + Needs to be overriden in each adapter class. It's the adapter + writer's responsibility to make sure the example(s) pass the + URL validate. + """ + return 'no such example' + + def extractChapterUrlsAndMetadata(self): + "Needs to be overriden in each adapter class. Populates self.story metadata and self.chapterUrls" + pass + + def getChapterText(self, url): + "Needs to be overriden in each adapter class." + pass + +def makeDate(string,format): + return datetime.datetime.strptime(string,format) + +acceptable_attributes = ['href','name'] + +# this gives us a unicode object, not just a string containing bytes. +# (I gave soup a unicode string, you'd think it could give it back...) +def utf8FromSoup(soup): + for t in soup.findAll(recursive=True): + for attr in t._getAttrMap().keys(): + if attr not in acceptable_attributes: + del t[attr] ## strip all tag attributes except href and name + # these are not acceptable strict XHTML. But we do already have + # CSS classes of the same names defined in constants.py + if t.name in ('u'): + t['class']=t.name + t.name='span' + if t.name in ('center'): + t['class']=t.name + t.name='div' + # removes paired, but empty tags. + if t.string != None and len(t.string.strip()) == 0 : + t.extract() + return soup.__str__('utf8').decode('utf-8') diff --git a/fanficdownloader/chardet/__init__.py b/fanficdownloader/chardet/__init__.py new file mode 100644 index 00000000..953b3994 --- /dev/null +++ b/fanficdownloader/chardet/__init__.py @@ -0,0 +1,26 @@ +######################## BEGIN LICENSE BLOCK ######################## +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +__version__ = "2.0.1" + +def detect(aBuf): + import universaldetector + u = universaldetector.UniversalDetector() + u.reset() + u.feed(aBuf) + u.close() + return u.result diff --git a/fanficdownloader/chardet/big5freq.py b/fanficdownloader/chardet/big5freq.py new file mode 100644 index 00000000..c1b0f3ce --- /dev/null +++ b/fanficdownloader/chardet/big5freq.py @@ -0,0 +1,923 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Big5 frequency table +# by Taiwan's Mandarin Promotion Council +# <http://www.edu.tw:81/mandr/> +# +# 128 --> 0.42261 +# 256 --> 0.57851 +# 512 --> 0.74851 +# 1024 --> 0.89384 +# 2048 --> 0.97583 +# +# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98 +# Random Distribution Ration = 512/(5401-512)=0.105 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR + +BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75 + +#Char to FreqOrder table +BIG5_TABLE_SIZE = 5376 + +Big5CharToFreqOrder = ( \ + 1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16 +3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32 +1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48 + 63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64 +3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80 +4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96 +5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112 + 630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128 + 179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144 + 995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160 +2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176 +1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192 +3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208 + 706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224 +1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240 +3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256 +2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272 + 437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288 +3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304 +1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320 +5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336 + 266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352 +5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368 +1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384 + 32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400 + 188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416 +3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432 +3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448 + 324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464 +2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480 +2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496 + 314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512 + 287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528 +3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544 +1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560 +1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576 +1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592 +2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608 + 265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624 +4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640 +1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656 +5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672 +2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688 + 383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704 + 98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720 + 523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736 + 710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752 +5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768 + 379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784 +1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800 + 585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816 + 690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832 +5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848 +1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864 + 544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880 +3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896 +4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912 +3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928 + 279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944 + 610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960 +1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976 +4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992 +3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008 +3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024 +2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040 +5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056 +3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072 +5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088 +1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104 +2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120 +1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136 + 78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152 +1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168 +4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184 +3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200 + 534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216 + 165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232 + 626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248 +2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264 +5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280 +1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296 +2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312 +1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328 +1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344 +5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360 +5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376 +5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392 +3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408 +4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424 +4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440 +2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456 +5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472 +3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488 + 598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504 +5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520 +5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536 +1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552 +2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568 +3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584 +4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600 +5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616 +3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632 +4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648 +1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664 +1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680 +4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696 +1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712 + 240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728 +1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744 +1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760 +3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776 + 619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792 +5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808 +2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824 +1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840 +1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856 +5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872 + 829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888 +4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904 + 375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920 +2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936 + 444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952 +1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968 +1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984 + 730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000 +4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016 +4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032 +1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048 +3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064 +5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080 +5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096 +1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112 +2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128 +1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144 +3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160 +2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176 +3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192 +2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208 +4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224 +4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240 +3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256 + 97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272 +3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288 + 424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304 +3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320 +4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336 +3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352 +1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368 +5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384 + 199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400 +5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416 +1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432 + 391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448 +4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464 +4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480 + 397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496 +2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512 +2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528 +3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544 +1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560 +4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576 +2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592 +1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608 +1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624 +2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640 +3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656 +1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672 +5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688 +1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704 +4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720 +1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736 + 135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752 +1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768 +4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784 +4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800 +2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816 +1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832 +4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848 + 660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864 +5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880 +2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896 +3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912 +4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928 + 790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944 +5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960 +5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976 +1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992 +4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008 +4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024 +2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040 +3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056 +3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072 +2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088 +1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104 +4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120 +3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136 +3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152 +2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168 +4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184 +5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200 +3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216 +2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232 +3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248 +1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264 +2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280 +3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296 +4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312 +2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328 +2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344 +5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360 +1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376 +2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392 +1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408 +3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424 +4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440 +2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456 +3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472 +3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488 +2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504 +4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520 +2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536 +3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552 +4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568 +5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584 +3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600 + 194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616 +1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632 +4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648 +1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664 +4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680 +5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696 + 510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712 +5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728 +5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744 +2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760 +3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776 +2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792 +2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808 + 681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824 +1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840 +4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856 +3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872 +3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888 + 838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904 +2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920 + 625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936 +2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952 +4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968 +1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984 +4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000 +1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016 +3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032 + 574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048 +3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064 +5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080 +5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096 +3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112 +3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128 +1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144 +2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160 +5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176 +1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192 +1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208 +3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224 + 919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240 +1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256 +4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272 +5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288 +2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304 +3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320 + 516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336 +1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352 +2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368 +2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384 +5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400 +5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416 +5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432 +2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448 +2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464 +1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480 +4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496 +3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512 +3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528 +4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544 +4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560 +2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576 +2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592 +5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608 +4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624 +5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640 +4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656 + 502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672 + 121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688 +1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704 +3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720 +4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736 +1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752 +5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768 +2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784 +2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800 +3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816 +5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832 +1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848 +3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864 +5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880 +1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896 +5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912 +2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928 +3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944 +2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960 +3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976 +3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992 +3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008 +4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024 + 803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040 +2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056 +4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072 +3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088 +5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104 +1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120 +5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136 + 425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152 +1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168 + 479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184 +4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200 +1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216 +4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232 +1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248 + 433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264 +3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280 +4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296 +5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312 + 938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328 +3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344 + 890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360 +2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 #last 512 +#Everything below is of no interest for detection purpose +2522,1613,4812,5799,3345,3945,2523,5800,4162,5801,1637,4163,2471,4813,3946,5802, # 5392 +2500,3034,3800,5803,5804,2195,4814,5805,2163,5806,5807,5808,5809,5810,5811,5812, # 5408 +5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828, # 5424 +5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844, # 5440 +5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856,5857,5858,5859,5860, # 5456 +5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872,5873,5874,5875,5876, # 5472 +5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888,5889,5890,5891,5892, # 5488 +5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,5906,5907,5908, # 5504 +5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,5921,5922,5923,5924, # 5520 +5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936,5937,5938,5939,5940, # 5536 +5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952,5953,5954,5955,5956, # 5552 +5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968,5969,5970,5971,5972, # 5568 +5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984,5985,5986,5987,5988, # 5584 +5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004, # 5600 +6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020, # 5616 +6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032,6033,6034,6035,6036, # 5632 +6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052, # 5648 +6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068, # 5664 +6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084, # 5680 +6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100, # 5696 +6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116, # 5712 +6117,6118,6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,6132, # 5728 +6133,6134,6135,6136,6137,6138,6139,6140,6141,6142,6143,6144,6145,6146,6147,6148, # 5744 +6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,6164, # 5760 +6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,6180, # 5776 +6181,6182,6183,6184,6185,6186,6187,6188,6189,6190,6191,6192,6193,6194,6195,6196, # 5792 +6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,6211,6212, # 5808 +6213,6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,3670,6224,6225,6226,6227, # 5824 +6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,6242,6243, # 5840 +6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,6254,6255,6256,6257,6258,6259, # 5856 +6260,6261,6262,6263,6264,6265,6266,6267,6268,6269,6270,6271,6272,6273,6274,6275, # 5872 +6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,4815,6286,6287,6288,6289,6290, # 5888 +6291,6292,4816,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,6303,6304,6305, # 5904 +6306,6307,6308,6309,6310,6311,4817,4818,6312,6313,6314,6315,6316,6317,6318,4819, # 5920 +6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,6334, # 5936 +6335,6336,6337,4820,6338,6339,6340,6341,6342,6343,6344,6345,6346,6347,6348,6349, # 5952 +6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,6364,6365, # 5968 +6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,6380,6381, # 5984 +6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,6396,6397, # 6000 +6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,3441,6411,6412, # 6016 +6413,6414,6415,6416,6417,6418,6419,6420,6421,6422,6423,6424,6425,4440,6426,6427, # 6032 +6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,6439,6440,6441,6442,6443, # 6048 +6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,4821,6455,6456,6457,6458, # 6064 +6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,6473,6474, # 6080 +6475,6476,6477,3947,3948,6478,6479,6480,6481,3272,4441,6482,6483,6484,6485,4442, # 6096 +6486,6487,6488,6489,6490,6491,6492,6493,6494,6495,6496,4822,6497,6498,6499,6500, # 6112 +6501,6502,6503,6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516, # 6128 +6517,6518,6519,6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532, # 6144 +6533,6534,6535,6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548, # 6160 +6549,6550,6551,6552,6553,6554,6555,6556,2784,6557,4823,6558,6559,6560,6561,6562, # 6176 +6563,6564,6565,6566,6567,6568,6569,3949,6570,6571,6572,4824,6573,6574,6575,6576, # 6192 +6577,6578,6579,6580,6581,6582,6583,4825,6584,6585,6586,3950,2785,6587,6588,6589, # 6208 +6590,6591,6592,6593,6594,6595,6596,6597,6598,6599,6600,6601,6602,6603,6604,6605, # 6224 +6606,6607,6608,6609,6610,6611,6612,4826,6613,6614,6615,4827,6616,6617,6618,6619, # 6240 +6620,6621,6622,6623,6624,6625,4164,6626,6627,6628,6629,6630,6631,6632,6633,6634, # 6256 +3547,6635,4828,6636,6637,6638,6639,6640,6641,6642,3951,2984,6643,6644,6645,6646, # 6272 +6647,6648,6649,4165,6650,4829,6651,6652,4830,6653,6654,6655,6656,6657,6658,6659, # 6288 +6660,6661,6662,4831,6663,6664,6665,6666,6667,6668,6669,6670,6671,4166,6672,4832, # 6304 +3952,6673,6674,6675,6676,4833,6677,6678,6679,4167,6680,6681,6682,3198,6683,6684, # 6320 +6685,6686,6687,6688,6689,6690,6691,6692,6693,6694,6695,6696,6697,4834,6698,6699, # 6336 +6700,6701,6702,6703,6704,6705,6706,6707,6708,6709,6710,6711,6712,6713,6714,6715, # 6352 +6716,6717,6718,6719,6720,6721,6722,6723,6724,6725,6726,6727,6728,6729,6730,6731, # 6368 +6732,6733,6734,4443,6735,6736,6737,6738,6739,6740,6741,6742,6743,6744,6745,4444, # 6384 +6746,6747,6748,6749,6750,6751,6752,6753,6754,6755,6756,6757,6758,6759,6760,6761, # 6400 +6762,6763,6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777, # 6416 +6778,6779,6780,6781,4168,6782,6783,3442,6784,6785,6786,6787,6788,6789,6790,6791, # 6432 +4169,6792,6793,6794,6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806, # 6448 +6807,6808,6809,6810,6811,4835,6812,6813,6814,4445,6815,6816,4446,6817,6818,6819, # 6464 +6820,6821,6822,6823,6824,6825,6826,6827,6828,6829,6830,6831,6832,6833,6834,6835, # 6480 +3548,6836,6837,6838,6839,6840,6841,6842,6843,6844,6845,6846,4836,6847,6848,6849, # 6496 +6850,6851,6852,6853,6854,3953,6855,6856,6857,6858,6859,6860,6861,6862,6863,6864, # 6512 +6865,6866,6867,6868,6869,6870,6871,6872,6873,6874,6875,6876,6877,3199,6878,6879, # 6528 +6880,6881,6882,4447,6883,6884,6885,6886,6887,6888,6889,6890,6891,6892,6893,6894, # 6544 +6895,6896,6897,6898,6899,6900,6901,6902,6903,6904,4170,6905,6906,6907,6908,6909, # 6560 +6910,6911,6912,6913,6914,6915,6916,6917,6918,6919,6920,6921,6922,6923,6924,6925, # 6576 +6926,6927,4837,6928,6929,6930,6931,6932,6933,6934,6935,6936,3346,6937,6938,4838, # 6592 +6939,6940,6941,4448,6942,6943,6944,6945,6946,4449,6947,6948,6949,6950,6951,6952, # 6608 +6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,6967,6968, # 6624 +6969,6970,6971,6972,6973,6974,6975,6976,6977,6978,6979,6980,6981,6982,6983,6984, # 6640 +6985,6986,6987,6988,6989,6990,6991,6992,6993,6994,3671,6995,6996,6997,6998,4839, # 6656 +6999,7000,7001,7002,3549,7003,7004,7005,7006,7007,7008,7009,7010,7011,7012,7013, # 6672 +7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,7028,7029, # 6688 +7030,4840,7031,7032,7033,7034,7035,7036,7037,7038,4841,7039,7040,7041,7042,7043, # 6704 +7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,7059, # 6720 +7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,2985,7071,7072,7073,7074, # 6736 +7075,7076,7077,7078,7079,7080,4842,7081,7082,7083,7084,7085,7086,7087,7088,7089, # 6752 +7090,7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105, # 6768 +7106,7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,4450,7119,7120, # 6784 +7121,7122,7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136, # 6800 +7137,7138,7139,7140,7141,7142,7143,4843,7144,7145,7146,7147,7148,7149,7150,7151, # 6816 +7152,7153,7154,7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167, # 6832 +7168,7169,7170,7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183, # 6848 +7184,7185,7186,7187,7188,4171,4172,7189,7190,7191,7192,7193,7194,7195,7196,7197, # 6864 +7198,7199,7200,7201,7202,7203,7204,7205,7206,7207,7208,7209,7210,7211,7212,7213, # 6880 +7214,7215,7216,7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229, # 6896 +7230,7231,7232,7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245, # 6912 +7246,7247,7248,7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261, # 6928 +7262,7263,7264,7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277, # 6944 +7278,7279,7280,7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293, # 6960 +7294,7295,7296,4844,7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308, # 6976 +7309,7310,7311,7312,7313,7314,7315,7316,4451,7317,7318,7319,7320,7321,7322,7323, # 6992 +7324,7325,7326,7327,7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339, # 7008 +7340,7341,7342,7343,7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,4173,7354, # 7024 +7355,4845,7356,7357,7358,7359,7360,7361,7362,7363,7364,7365,7366,7367,7368,7369, # 7040 +7370,7371,7372,7373,7374,7375,7376,7377,7378,7379,7380,7381,7382,7383,7384,7385, # 7056 +7386,7387,7388,4846,7389,7390,7391,7392,7393,7394,7395,7396,7397,7398,7399,7400, # 7072 +7401,7402,7403,7404,7405,3672,7406,7407,7408,7409,7410,7411,7412,7413,7414,7415, # 7088 +7416,7417,7418,7419,7420,7421,7422,7423,7424,7425,7426,7427,7428,7429,7430,7431, # 7104 +7432,7433,7434,7435,7436,7437,7438,7439,7440,7441,7442,7443,7444,7445,7446,7447, # 7120 +7448,7449,7450,7451,7452,7453,4452,7454,3200,7455,7456,7457,7458,7459,7460,7461, # 7136 +7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,7472,7473,7474,4847,7475,7476, # 7152 +7477,3133,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,7488,7489,7490,7491, # 7168 +7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,3347,7503,7504,7505,7506, # 7184 +7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,7520,7521,4848, # 7200 +7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,7536,7537, # 7216 +7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,3801,4849,7550,7551, # 7232 +7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, # 7248 +7568,7569,3035,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582, # 7264 +7583,7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598, # 7280 +7599,7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614, # 7296 +7615,7616,4850,7617,7618,3802,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628, # 7312 +7629,7630,7631,7632,4851,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643, # 7328 +7644,7645,7646,7647,7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659, # 7344 +7660,7661,7662,7663,7664,7665,7666,7667,7668,7669,7670,4453,7671,7672,7673,7674, # 7360 +7675,7676,7677,7678,7679,7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690, # 7376 +7691,7692,7693,7694,7695,7696,7697,3443,7698,7699,7700,7701,7702,4454,7703,7704, # 7392 +7705,7706,7707,7708,7709,7710,7711,7712,7713,2472,7714,7715,7716,7717,7718,7719, # 7408 +7720,7721,7722,7723,7724,7725,7726,7727,7728,7729,7730,7731,3954,7732,7733,7734, # 7424 +7735,7736,7737,7738,7739,7740,7741,7742,7743,7744,7745,7746,7747,7748,7749,7750, # 7440 +3134,7751,7752,4852,7753,7754,7755,4853,7756,7757,7758,7759,7760,4174,7761,7762, # 7456 +7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,7777,7778, # 7472 +7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,7792,7793,7794, # 7488 +7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,4854,7806,7807,7808,7809, # 7504 +7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824,7825, # 7520 +4855,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7536 +7841,7842,7843,7844,7845,7846,7847,3955,7848,7849,7850,7851,7852,7853,7854,7855, # 7552 +7856,7857,7858,7859,7860,3444,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870, # 7568 +7871,7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886, # 7584 +7887,7888,7889,7890,7891,4175,7892,7893,7894,7895,7896,4856,4857,7897,7898,7899, # 7600 +7900,2598,7901,7902,7903,7904,7905,7906,7907,7908,4455,7909,7910,7911,7912,7913, # 7616 +7914,3201,7915,7916,7917,7918,7919,7920,7921,4858,7922,7923,7924,7925,7926,7927, # 7632 +7928,7929,7930,7931,7932,7933,7934,7935,7936,7937,7938,7939,7940,7941,7942,7943, # 7648 +7944,7945,7946,7947,7948,7949,7950,7951,7952,7953,7954,7955,7956,7957,7958,7959, # 7664 +7960,7961,7962,7963,7964,7965,7966,7967,7968,7969,7970,7971,7972,7973,7974,7975, # 7680 +7976,7977,7978,7979,7980,7981,4859,7982,7983,7984,7985,7986,7987,7988,7989,7990, # 7696 +7991,7992,7993,7994,7995,7996,4860,7997,7998,7999,8000,8001,8002,8003,8004,8005, # 7712 +8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,8016,4176,8017,8018,8019,8020, # 7728 +8021,8022,8023,4861,8024,8025,8026,8027,8028,8029,8030,8031,8032,8033,8034,8035, # 7744 +8036,4862,4456,8037,8038,8039,8040,4863,8041,8042,8043,8044,8045,8046,8047,8048, # 7760 +8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,8064, # 7776 +8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,8080, # 7792 +8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,8096, # 7808 +8097,8098,8099,4864,4177,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110, # 7824 +8111,8112,8113,8114,8115,8116,8117,8118,8119,8120,4178,8121,8122,8123,8124,8125, # 7840 +8126,8127,8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141, # 7856 +8142,8143,8144,8145,4865,4866,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155, # 7872 +8156,8157,8158,8159,8160,8161,8162,8163,8164,8165,4179,8166,8167,8168,8169,8170, # 7888 +8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181,4457,8182,8183,8184,8185, # 7904 +8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201, # 7920 +8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213,8214,8215,8216,8217, # 7936 +8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229,8230,8231,8232,8233, # 7952 +8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245,8246,8247,8248,8249, # 7968 +8250,8251,8252,8253,8254,8255,8256,3445,8257,8258,8259,8260,8261,8262,4458,8263, # 7984 +8264,8265,8266,8267,8268,8269,8270,8271,8272,4459,8273,8274,8275,8276,3550,8277, # 8000 +8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,4460,8290,8291,8292, # 8016 +8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,4867, # 8032 +8308,8309,8310,8311,8312,3551,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322, # 8048 +8323,8324,8325,8326,4868,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337, # 8064 +8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353, # 8080 +8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,4869,4461,8364,8365,8366,8367, # 8096 +8368,8369,8370,4870,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382, # 8112 +8383,8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398, # 8128 +8399,8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,4871,8411,8412,8413, # 8144 +8414,8415,8416,8417,8418,8419,8420,8421,8422,4462,8423,8424,8425,8426,8427,8428, # 8160 +8429,8430,8431,8432,8433,2986,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443, # 8176 +8444,8445,8446,8447,8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459, # 8192 +8460,8461,8462,8463,8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475, # 8208 +8476,8477,8478,4180,8479,8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490, # 8224 +8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506, # 8240 +8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522, # 8256 +8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538, # 8272 +8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554, # 8288 +8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,4872,8565,8566,8567,8568,8569, # 8304 +8570,8571,8572,8573,4873,8574,8575,8576,8577,8578,8579,8580,8581,8582,8583,8584, # 8320 +8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597,8598,8599,8600, # 8336 +8601,8602,8603,8604,8605,3803,8606,8607,8608,8609,8610,8611,8612,8613,4874,3804, # 8352 +8614,8615,8616,8617,8618,8619,8620,8621,3956,8622,8623,8624,8625,8626,8627,8628, # 8368 +8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,2865,8639,8640,8641,8642,8643, # 8384 +8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,4463,8657,8658, # 8400 +8659,4875,4876,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672, # 8416 +8673,8674,8675,8676,8677,8678,8679,8680,8681,4464,8682,8683,8684,8685,8686,8687, # 8432 +8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, # 8448 +8704,8705,8706,8707,8708,8709,2261,8710,8711,8712,8713,8714,8715,8716,8717,8718, # 8464 +8719,8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,4181, # 8480 +8734,8735,8736,8737,8738,8739,8740,8741,8742,8743,8744,8745,8746,8747,8748,8749, # 8496 +8750,8751,8752,8753,8754,8755,8756,8757,8758,8759,8760,8761,8762,8763,4877,8764, # 8512 +8765,8766,8767,8768,8769,8770,8771,8772,8773,8774,8775,8776,8777,8778,8779,8780, # 8528 +8781,8782,8783,8784,8785,8786,8787,8788,4878,8789,4879,8790,8791,8792,4880,8793, # 8544 +8794,8795,8796,8797,8798,8799,8800,8801,4881,8802,8803,8804,8805,8806,8807,8808, # 8560 +8809,8810,8811,8812,8813,8814,8815,3957,8816,8817,8818,8819,8820,8821,8822,8823, # 8576 +8824,8825,8826,8827,8828,8829,8830,8831,8832,8833,8834,8835,8836,8837,8838,8839, # 8592 +8840,8841,8842,8843,8844,8845,8846,8847,4882,8848,8849,8850,8851,8852,8853,8854, # 8608 +8855,8856,8857,8858,8859,8860,8861,8862,8863,8864,8865,8866,8867,8868,8869,8870, # 8624 +8871,8872,8873,8874,8875,8876,8877,8878,8879,8880,8881,8882,8883,8884,3202,8885, # 8640 +8886,8887,8888,8889,8890,8891,8892,8893,8894,8895,8896,8897,8898,8899,8900,8901, # 8656 +8902,8903,8904,8905,8906,8907,8908,8909,8910,8911,8912,8913,8914,8915,8916,8917, # 8672 +8918,8919,8920,8921,8922,8923,8924,4465,8925,8926,8927,8928,8929,8930,8931,8932, # 8688 +4883,8933,8934,8935,8936,8937,8938,8939,8940,8941,8942,8943,2214,8944,8945,8946, # 8704 +8947,8948,8949,8950,8951,8952,8953,8954,8955,8956,8957,8958,8959,8960,8961,8962, # 8720 +8963,8964,8965,4884,8966,8967,8968,8969,8970,8971,8972,8973,8974,8975,8976,8977, # 8736 +8978,8979,8980,8981,8982,8983,8984,8985,8986,8987,8988,8989,8990,8991,8992,4885, # 8752 +8993,8994,8995,8996,8997,8998,8999,9000,9001,9002,9003,9004,9005,9006,9007,9008, # 8768 +9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,4182,9022,9023, # 8784 +9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039, # 8800 +9040,9041,9042,9043,9044,9045,9046,9047,9048,9049,9050,9051,9052,9053,9054,9055, # 8816 +9056,9057,9058,9059,9060,9061,9062,9063,4886,9064,9065,9066,9067,9068,9069,4887, # 8832 +9070,9071,9072,9073,9074,9075,9076,9077,9078,9079,9080,9081,9082,9083,9084,9085, # 8848 +9086,9087,9088,9089,9090,9091,9092,9093,9094,9095,9096,9097,9098,9099,9100,9101, # 8864 +9102,9103,9104,9105,9106,9107,9108,9109,9110,9111,9112,9113,9114,9115,9116,9117, # 8880 +9118,9119,9120,9121,9122,9123,9124,9125,9126,9127,9128,9129,9130,9131,9132,9133, # 8896 +9134,9135,9136,9137,9138,9139,9140,9141,3958,9142,9143,9144,9145,9146,9147,9148, # 8912 +9149,9150,9151,4888,9152,9153,9154,9155,9156,9157,9158,9159,9160,9161,9162,9163, # 8928 +9164,9165,9166,9167,9168,9169,9170,9171,9172,9173,9174,9175,4889,9176,9177,9178, # 8944 +9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194, # 8960 +9195,9196,9197,9198,9199,9200,9201,9202,9203,4890,9204,9205,9206,9207,9208,9209, # 8976 +9210,9211,9212,9213,9214,9215,9216,9217,9218,9219,9220,9221,9222,4466,9223,9224, # 8992 +9225,9226,9227,9228,9229,9230,9231,9232,9233,9234,9235,9236,9237,9238,9239,9240, # 9008 +9241,9242,9243,9244,9245,4891,9246,9247,9248,9249,9250,9251,9252,9253,9254,9255, # 9024 +9256,9257,4892,9258,9259,9260,9261,4893,4894,9262,9263,9264,9265,9266,9267,9268, # 9040 +9269,9270,9271,9272,9273,4467,9274,9275,9276,9277,9278,9279,9280,9281,9282,9283, # 9056 +9284,9285,3673,9286,9287,9288,9289,9290,9291,9292,9293,9294,9295,9296,9297,9298, # 9072 +9299,9300,9301,9302,9303,9304,9305,9306,9307,9308,9309,9310,9311,9312,9313,9314, # 9088 +9315,9316,9317,9318,9319,9320,9321,9322,4895,9323,9324,9325,9326,9327,9328,9329, # 9104 +9330,9331,9332,9333,9334,9335,9336,9337,9338,9339,9340,9341,9342,9343,9344,9345, # 9120 +9346,9347,4468,9348,9349,9350,9351,9352,9353,9354,9355,9356,9357,9358,9359,9360, # 9136 +9361,9362,9363,9364,9365,9366,9367,9368,9369,9370,9371,9372,9373,4896,9374,4469, # 9152 +9375,9376,9377,9378,9379,4897,9380,9381,9382,9383,9384,9385,9386,9387,9388,9389, # 9168 +9390,9391,9392,9393,9394,9395,9396,9397,9398,9399,9400,9401,9402,9403,9404,9405, # 9184 +9406,4470,9407,2751,9408,9409,3674,3552,9410,9411,9412,9413,9414,9415,9416,9417, # 9200 +9418,9419,9420,9421,4898,9422,9423,9424,9425,9426,9427,9428,9429,3959,9430,9431, # 9216 +9432,9433,9434,9435,9436,4471,9437,9438,9439,9440,9441,9442,9443,9444,9445,9446, # 9232 +9447,9448,9449,9450,3348,9451,9452,9453,9454,9455,9456,9457,9458,9459,9460,9461, # 9248 +9462,9463,9464,9465,9466,9467,9468,9469,9470,9471,9472,4899,9473,9474,9475,9476, # 9264 +9477,4900,9478,9479,9480,9481,9482,9483,9484,9485,9486,9487,9488,3349,9489,9490, # 9280 +9491,9492,9493,9494,9495,9496,9497,9498,9499,9500,9501,9502,9503,9504,9505,9506, # 9296 +9507,9508,9509,9510,9511,9512,9513,9514,9515,9516,9517,9518,9519,9520,4901,9521, # 9312 +9522,9523,9524,9525,9526,4902,9527,9528,9529,9530,9531,9532,9533,9534,9535,9536, # 9328 +9537,9538,9539,9540,9541,9542,9543,9544,9545,9546,9547,9548,9549,9550,9551,9552, # 9344 +9553,9554,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568, # 9360 +9569,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,9581,9582,9583,9584, # 9376 +3805,9585,9586,9587,9588,9589,9590,9591,9592,9593,9594,9595,9596,9597,9598,9599, # 9392 +9600,9601,9602,4903,9603,9604,9605,9606,9607,4904,9608,9609,9610,9611,9612,9613, # 9408 +9614,4905,9615,9616,9617,9618,9619,9620,9621,9622,9623,9624,9625,9626,9627,9628, # 9424 +9629,9630,9631,9632,4906,9633,9634,9635,9636,9637,9638,9639,9640,9641,9642,9643, # 9440 +4907,9644,9645,9646,9647,9648,9649,9650,9651,9652,9653,9654,9655,9656,9657,9658, # 9456 +9659,9660,9661,9662,9663,9664,9665,9666,9667,9668,9669,9670,9671,9672,4183,9673, # 9472 +9674,9675,9676,9677,4908,9678,9679,9680,9681,4909,9682,9683,9684,9685,9686,9687, # 9488 +9688,9689,9690,4910,9691,9692,9693,3675,9694,9695,9696,2945,9697,9698,9699,9700, # 9504 +9701,9702,9703,9704,9705,4911,9706,9707,9708,9709,9710,9711,9712,9713,9714,9715, # 9520 +9716,9717,9718,9719,9720,9721,9722,9723,9724,9725,9726,9727,9728,9729,9730,9731, # 9536 +9732,9733,9734,9735,4912,9736,9737,9738,9739,9740,4913,9741,9742,9743,9744,9745, # 9552 +9746,9747,9748,9749,9750,9751,9752,9753,9754,9755,9756,9757,9758,4914,9759,9760, # 9568 +9761,9762,9763,9764,9765,9766,9767,9768,9769,9770,9771,9772,9773,9774,9775,9776, # 9584 +9777,9778,9779,9780,9781,9782,4915,9783,9784,9785,9786,9787,9788,9789,9790,9791, # 9600 +9792,9793,4916,9794,9795,9796,9797,9798,9799,9800,9801,9802,9803,9804,9805,9806, # 9616 +9807,9808,9809,9810,9811,9812,9813,9814,9815,9816,9817,9818,9819,9820,9821,9822, # 9632 +9823,9824,9825,9826,9827,9828,9829,9830,9831,9832,9833,9834,9835,9836,9837,9838, # 9648 +9839,9840,9841,9842,9843,9844,9845,9846,9847,9848,9849,9850,9851,9852,9853,9854, # 9664 +9855,9856,9857,9858,9859,9860,9861,9862,9863,9864,9865,9866,9867,9868,4917,9869, # 9680 +9870,9871,9872,9873,9874,9875,9876,9877,9878,9879,9880,9881,9882,9883,9884,9885, # 9696 +9886,9887,9888,9889,9890,9891,9892,4472,9893,9894,9895,9896,9897,3806,9898,9899, # 9712 +9900,9901,9902,9903,9904,9905,9906,9907,9908,9909,9910,9911,9912,9913,9914,4918, # 9728 +9915,9916,9917,4919,9918,9919,9920,9921,4184,9922,9923,9924,9925,9926,9927,9928, # 9744 +9929,9930,9931,9932,9933,9934,9935,9936,9937,9938,9939,9940,9941,9942,9943,9944, # 9760 +9945,9946,4920,9947,9948,9949,9950,9951,9952,9953,9954,9955,4185,9956,9957,9958, # 9776 +9959,9960,9961,9962,9963,9964,9965,4921,9966,9967,9968,4473,9969,9970,9971,9972, # 9792 +9973,9974,9975,9976,9977,4474,9978,9979,9980,9981,9982,9983,9984,9985,9986,9987, # 9808 +9988,9989,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000,10001,10002,10003, # 9824 +10004,10005,10006,10007,10008,10009,10010,10011,10012,10013,10014,10015,10016,10017,10018,10019, # 9840 +10020,10021,4922,10022,4923,10023,10024,10025,10026,10027,10028,10029,10030,10031,10032,10033, # 9856 +10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044,10045,10046,10047,10048,4924, # 9872 +10049,10050,10051,10052,10053,10054,10055,10056,10057,10058,10059,10060,10061,10062,10063,10064, # 9888 +10065,10066,10067,10068,10069,10070,10071,10072,10073,10074,10075,10076,10077,10078,10079,10080, # 9904 +10081,10082,10083,10084,10085,10086,10087,4475,10088,10089,10090,10091,10092,10093,10094,10095, # 9920 +10096,10097,4476,10098,10099,10100,10101,10102,10103,10104,10105,10106,10107,10108,10109,10110, # 9936 +10111,2174,10112,10113,10114,10115,10116,10117,10118,10119,10120,10121,10122,10123,10124,10125, # 9952 +10126,10127,10128,10129,10130,10131,10132,10133,10134,10135,10136,10137,10138,10139,10140,3807, # 9968 +4186,4925,10141,10142,10143,10144,10145,10146,10147,4477,4187,10148,10149,10150,10151,10152, # 9984 +10153,4188,10154,10155,10156,10157,10158,10159,10160,10161,4926,10162,10163,10164,10165,10166, #10000 +10167,10168,10169,10170,10171,10172,10173,10174,10175,10176,10177,10178,10179,10180,10181,10182, #10016 +10183,10184,10185,10186,10187,10188,10189,10190,10191,10192,3203,10193,10194,10195,10196,10197, #10032 +10198,10199,10200,4478,10201,10202,10203,10204,4479,10205,10206,10207,10208,10209,10210,10211, #10048 +10212,10213,10214,10215,10216,10217,10218,10219,10220,10221,10222,10223,10224,10225,10226,10227, #10064 +10228,10229,10230,10231,10232,10233,10234,4927,10235,10236,10237,10238,10239,10240,10241,10242, #10080 +10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258, #10096 +10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,4480, #10112 +4928,4929,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287, #10128 +10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303, #10144 +10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319, #10160 +10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,4930, #10176 +10335,10336,10337,10338,10339,10340,10341,10342,4931,10343,10344,10345,10346,10347,10348,10349, #10192 +10350,10351,10352,10353,10354,10355,3088,10356,2786,10357,10358,10359,10360,4189,10361,10362, #10208 +10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,4932,10376,10377, #10224 +10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,4933, #10240 +10393,10394,10395,4934,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407, #10256 +10408,10409,10410,10411,10412,3446,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422, #10272 +10423,4935,10424,10425,10426,10427,10428,10429,10430,4936,10431,10432,10433,10434,10435,10436, #10288 +10437,10438,10439,10440,10441,10442,10443,4937,10444,10445,10446,10447,4481,10448,10449,10450, #10304 +10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466, #10320 +10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482, #10336 +10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495,10496,10497,10498, #10352 +10499,10500,10501,10502,10503,10504,10505,4938,10506,10507,10508,10509,10510,2552,10511,10512, #10368 +10513,10514,10515,10516,3447,10517,10518,10519,10520,10521,10522,10523,10524,10525,10526,10527, #10384 +10528,10529,10530,10531,10532,10533,10534,10535,10536,10537,10538,10539,10540,10541,10542,10543, #10400 +4482,10544,4939,10545,10546,10547,10548,10549,10550,10551,10552,10553,10554,10555,10556,10557, #10416 +10558,10559,10560,10561,10562,10563,10564,10565,10566,10567,3676,4483,10568,10569,10570,10571, #10432 +10572,3448,10573,10574,10575,10576,10577,10578,10579,10580,10581,10582,10583,10584,10585,10586, #10448 +10587,10588,10589,10590,10591,10592,10593,10594,10595,10596,10597,10598,10599,10600,10601,10602, #10464 +10603,10604,10605,10606,10607,10608,10609,10610,10611,10612,10613,10614,10615,10616,10617,10618, #10480 +10619,10620,10621,10622,10623,10624,10625,10626,10627,4484,10628,10629,10630,10631,10632,4940, #10496 +10633,10634,10635,10636,10637,10638,10639,10640,10641,10642,10643,10644,10645,10646,10647,10648, #10512 +10649,10650,10651,10652,10653,10654,10655,10656,4941,10657,10658,10659,2599,10660,10661,10662, #10528 +10663,10664,10665,10666,3089,10667,10668,10669,10670,10671,10672,10673,10674,10675,10676,10677, #10544 +10678,10679,10680,4942,10681,10682,10683,10684,10685,10686,10687,10688,10689,10690,10691,10692, #10560 +10693,10694,10695,10696,10697,4485,10698,10699,10700,10701,10702,10703,10704,4943,10705,3677, #10576 +10706,10707,10708,10709,10710,10711,10712,4944,10713,10714,10715,10716,10717,10718,10719,10720, #10592 +10721,10722,10723,10724,10725,10726,10727,10728,4945,10729,10730,10731,10732,10733,10734,10735, #10608 +10736,10737,10738,10739,10740,10741,10742,10743,10744,10745,10746,10747,10748,10749,10750,10751, #10624 +10752,10753,10754,10755,10756,10757,10758,10759,10760,10761,4946,10762,10763,10764,10765,10766, #10640 +10767,4947,4948,10768,10769,10770,10771,10772,10773,10774,10775,10776,10777,10778,10779,10780, #10656 +10781,10782,10783,10784,10785,10786,10787,10788,10789,10790,10791,10792,10793,10794,10795,10796, #10672 +10797,10798,10799,10800,10801,10802,10803,10804,10805,10806,10807,10808,10809,10810,10811,10812, #10688 +10813,10814,10815,10816,10817,10818,10819,10820,10821,10822,10823,10824,10825,10826,10827,10828, #10704 +10829,10830,10831,10832,10833,10834,10835,10836,10837,10838,10839,10840,10841,10842,10843,10844, #10720 +10845,10846,10847,10848,10849,10850,10851,10852,10853,10854,10855,10856,10857,10858,10859,10860, #10736 +10861,10862,10863,10864,10865,10866,10867,10868,10869,10870,10871,10872,10873,10874,10875,10876, #10752 +10877,10878,4486,10879,10880,10881,10882,10883,10884,10885,4949,10886,10887,10888,10889,10890, #10768 +10891,10892,10893,10894,10895,10896,10897,10898,10899,10900,10901,10902,10903,10904,10905,10906, #10784 +10907,10908,10909,10910,10911,10912,10913,10914,10915,10916,10917,10918,10919,4487,10920,10921, #10800 +10922,10923,10924,10925,10926,10927,10928,10929,10930,10931,10932,4950,10933,10934,10935,10936, #10816 +10937,10938,10939,10940,10941,10942,10943,10944,10945,10946,10947,10948,10949,4488,10950,10951, #10832 +10952,10953,10954,10955,10956,10957,10958,10959,4190,10960,10961,10962,10963,10964,10965,10966, #10848 +10967,10968,10969,10970,10971,10972,10973,10974,10975,10976,10977,10978,10979,10980,10981,10982, #10864 +10983,10984,10985,10986,10987,10988,10989,10990,10991,10992,10993,10994,10995,10996,10997,10998, #10880 +10999,11000,11001,11002,11003,11004,11005,11006,3960,11007,11008,11009,11010,11011,11012,11013, #10896 +11014,11015,11016,11017,11018,11019,11020,11021,11022,11023,11024,11025,11026,11027,11028,11029, #10912 +11030,11031,11032,4951,11033,11034,11035,11036,11037,11038,11039,11040,11041,11042,11043,11044, #10928 +11045,11046,11047,4489,11048,11049,11050,11051,4952,11052,11053,11054,11055,11056,11057,11058, #10944 +4953,11059,11060,11061,11062,11063,11064,11065,11066,11067,11068,11069,11070,11071,4954,11072, #10960 +11073,11074,11075,11076,11077,11078,11079,11080,11081,11082,11083,11084,11085,11086,11087,11088, #10976 +11089,11090,11091,11092,11093,11094,11095,11096,11097,11098,11099,11100,11101,11102,11103,11104, #10992 +11105,11106,11107,11108,11109,11110,11111,11112,11113,11114,11115,3808,11116,11117,11118,11119, #11008 +11120,11121,11122,11123,11124,11125,11126,11127,11128,11129,11130,11131,11132,11133,11134,4955, #11024 +11135,11136,11137,11138,11139,11140,11141,11142,11143,11144,11145,11146,11147,11148,11149,11150, #11040 +11151,11152,11153,11154,11155,11156,11157,11158,11159,11160,11161,4956,11162,11163,11164,11165, #11056 +11166,11167,11168,11169,11170,11171,11172,11173,11174,11175,11176,11177,11178,11179,11180,4957, #11072 +11181,11182,11183,11184,11185,11186,4958,11187,11188,11189,11190,11191,11192,11193,11194,11195, #11088 +11196,11197,11198,11199,11200,3678,11201,11202,11203,11204,11205,11206,4191,11207,11208,11209, #11104 +11210,11211,11212,11213,11214,11215,11216,11217,11218,11219,11220,11221,11222,11223,11224,11225, #11120 +11226,11227,11228,11229,11230,11231,11232,11233,11234,11235,11236,11237,11238,11239,11240,11241, #11136 +11242,11243,11244,11245,11246,11247,11248,11249,11250,11251,4959,11252,11253,11254,11255,11256, #11152 +11257,11258,11259,11260,11261,11262,11263,11264,11265,11266,11267,11268,11269,11270,11271,11272, #11168 +11273,11274,11275,11276,11277,11278,11279,11280,11281,11282,11283,11284,11285,11286,11287,11288, #11184 +11289,11290,11291,11292,11293,11294,11295,11296,11297,11298,11299,11300,11301,11302,11303,11304, #11200 +11305,11306,11307,11308,11309,11310,11311,11312,11313,11314,3679,11315,11316,11317,11318,4490, #11216 +11319,11320,11321,11322,11323,11324,11325,11326,11327,11328,11329,11330,11331,11332,11333,11334, #11232 +11335,11336,11337,11338,11339,11340,11341,11342,11343,11344,11345,11346,11347,4960,11348,11349, #11248 +11350,11351,11352,11353,11354,11355,11356,11357,11358,11359,11360,11361,11362,11363,11364,11365, #11264 +11366,11367,11368,11369,11370,11371,11372,11373,11374,11375,11376,11377,3961,4961,11378,11379, #11280 +11380,11381,11382,11383,11384,11385,11386,11387,11388,11389,11390,11391,11392,11393,11394,11395, #11296 +11396,11397,4192,11398,11399,11400,11401,11402,11403,11404,11405,11406,11407,11408,11409,11410, #11312 +11411,4962,11412,11413,11414,11415,11416,11417,11418,11419,11420,11421,11422,11423,11424,11425, #11328 +11426,11427,11428,11429,11430,11431,11432,11433,11434,11435,11436,11437,11438,11439,11440,11441, #11344 +11442,11443,11444,11445,11446,11447,11448,11449,11450,11451,11452,11453,11454,11455,11456,11457, #11360 +11458,11459,11460,11461,11462,11463,11464,11465,11466,11467,11468,11469,4963,11470,11471,4491, #11376 +11472,11473,11474,11475,4964,11476,11477,11478,11479,11480,11481,11482,11483,11484,11485,11486, #11392 +11487,11488,11489,11490,11491,11492,4965,11493,11494,11495,11496,11497,11498,11499,11500,11501, #11408 +11502,11503,11504,11505,11506,11507,11508,11509,11510,11511,11512,11513,11514,11515,11516,11517, #11424 +11518,11519,11520,11521,11522,11523,11524,11525,11526,11527,11528,11529,3962,11530,11531,11532, #11440 +11533,11534,11535,11536,11537,11538,11539,11540,11541,11542,11543,11544,11545,11546,11547,11548, #11456 +11549,11550,11551,11552,11553,11554,11555,11556,11557,11558,11559,11560,11561,11562,11563,11564, #11472 +4193,4194,11565,11566,11567,11568,11569,11570,11571,11572,11573,11574,11575,11576,11577,11578, #11488 +11579,11580,11581,11582,11583,11584,11585,11586,11587,11588,11589,11590,11591,4966,4195,11592, #11504 +11593,11594,11595,11596,11597,11598,11599,11600,11601,11602,11603,11604,3090,11605,11606,11607, #11520 +11608,11609,11610,4967,11611,11612,11613,11614,11615,11616,11617,11618,11619,11620,11621,11622, #11536 +11623,11624,11625,11626,11627,11628,11629,11630,11631,11632,11633,11634,11635,11636,11637,11638, #11552 +11639,11640,11641,11642,11643,11644,11645,11646,11647,11648,11649,11650,11651,11652,11653,11654, #11568 +11655,11656,11657,11658,11659,11660,11661,11662,11663,11664,11665,11666,11667,11668,11669,11670, #11584 +11671,11672,11673,11674,4968,11675,11676,11677,11678,11679,11680,11681,11682,11683,11684,11685, #11600 +11686,11687,11688,11689,11690,11691,11692,11693,3809,11694,11695,11696,11697,11698,11699,11700, #11616 +11701,11702,11703,11704,11705,11706,11707,11708,11709,11710,11711,11712,11713,11714,11715,11716, #11632 +11717,11718,3553,11719,11720,11721,11722,11723,11724,11725,11726,11727,11728,11729,11730,4969, #11648 +11731,11732,11733,11734,11735,11736,11737,11738,11739,11740,4492,11741,11742,11743,11744,11745, #11664 +11746,11747,11748,11749,11750,11751,11752,4970,11753,11754,11755,11756,11757,11758,11759,11760, #11680 +11761,11762,11763,11764,11765,11766,11767,11768,11769,11770,11771,11772,11773,11774,11775,11776, #11696 +11777,11778,11779,11780,11781,11782,11783,11784,11785,11786,11787,11788,11789,11790,4971,11791, #11712 +11792,11793,11794,11795,11796,11797,4972,11798,11799,11800,11801,11802,11803,11804,11805,11806, #11728 +11807,11808,11809,11810,4973,11811,11812,11813,11814,11815,11816,11817,11818,11819,11820,11821, #11744 +11822,11823,11824,11825,11826,11827,11828,11829,11830,11831,11832,11833,11834,3680,3810,11835, #11760 +11836,4974,11837,11838,11839,11840,11841,11842,11843,11844,11845,11846,11847,11848,11849,11850, #11776 +11851,11852,11853,11854,11855,11856,11857,11858,11859,11860,11861,11862,11863,11864,11865,11866, #11792 +11867,11868,11869,11870,11871,11872,11873,11874,11875,11876,11877,11878,11879,11880,11881,11882, #11808 +11883,11884,4493,11885,11886,11887,11888,11889,11890,11891,11892,11893,11894,11895,11896,11897, #11824 +11898,11899,11900,11901,11902,11903,11904,11905,11906,11907,11908,11909,11910,11911,11912,11913, #11840 +11914,11915,4975,11916,11917,11918,11919,11920,11921,11922,11923,11924,11925,11926,11927,11928, #11856 +11929,11930,11931,11932,11933,11934,11935,11936,11937,11938,11939,11940,11941,11942,11943,11944, #11872 +11945,11946,11947,11948,11949,4976,11950,11951,11952,11953,11954,11955,11956,11957,11958,11959, #11888 +11960,11961,11962,11963,11964,11965,11966,11967,11968,11969,11970,11971,11972,11973,11974,11975, #11904 +11976,11977,11978,11979,11980,11981,11982,11983,11984,11985,11986,11987,4196,11988,11989,11990, #11920 +11991,11992,4977,11993,11994,11995,11996,11997,11998,11999,12000,12001,12002,12003,12004,12005, #11936 +12006,12007,12008,12009,12010,12011,12012,12013,12014,12015,12016,12017,12018,12019,12020,12021, #11952 +12022,12023,12024,12025,12026,12027,12028,12029,12030,12031,12032,12033,12034,12035,12036,12037, #11968 +12038,12039,12040,12041,12042,12043,12044,12045,12046,12047,12048,12049,12050,12051,12052,12053, #11984 +12054,12055,12056,12057,12058,12059,12060,12061,4978,12062,12063,12064,12065,12066,12067,12068, #12000 +12069,12070,12071,12072,12073,12074,12075,12076,12077,12078,12079,12080,12081,12082,12083,12084, #12016 +12085,12086,12087,12088,12089,12090,12091,12092,12093,12094,12095,12096,12097,12098,12099,12100, #12032 +12101,12102,12103,12104,12105,12106,12107,12108,12109,12110,12111,12112,12113,12114,12115,12116, #12048 +12117,12118,12119,12120,12121,12122,12123,4979,12124,12125,12126,12127,12128,4197,12129,12130, #12064 +12131,12132,12133,12134,12135,12136,12137,12138,12139,12140,12141,12142,12143,12144,12145,12146, #12080 +12147,12148,12149,12150,12151,12152,12153,12154,4980,12155,12156,12157,12158,12159,12160,4494, #12096 +12161,12162,12163,12164,3811,12165,12166,12167,12168,12169,4495,12170,12171,4496,12172,12173, #12112 +12174,12175,12176,3812,12177,12178,12179,12180,12181,12182,12183,12184,12185,12186,12187,12188, #12128 +12189,12190,12191,12192,12193,12194,12195,12196,12197,12198,12199,12200,12201,12202,12203,12204, #12144 +12205,12206,12207,12208,12209,12210,12211,12212,12213,12214,12215,12216,12217,12218,12219,12220, #12160 +12221,4981,12222,12223,12224,12225,12226,12227,12228,12229,12230,12231,12232,12233,12234,12235, #12176 +4982,12236,12237,12238,12239,12240,12241,12242,12243,12244,12245,4983,12246,12247,12248,12249, #12192 +4984,12250,12251,12252,12253,12254,12255,12256,12257,12258,12259,12260,12261,12262,12263,12264, #12208 +4985,12265,4497,12266,12267,12268,12269,12270,12271,12272,12273,12274,12275,12276,12277,12278, #12224 +12279,12280,12281,12282,12283,12284,12285,12286,12287,4986,12288,12289,12290,12291,12292,12293, #12240 +12294,12295,12296,2473,12297,12298,12299,12300,12301,12302,12303,12304,12305,12306,12307,12308, #12256 +12309,12310,12311,12312,12313,12314,12315,12316,12317,12318,12319,3963,12320,12321,12322,12323, #12272 +12324,12325,12326,12327,12328,12329,12330,12331,12332,4987,12333,12334,12335,12336,12337,12338, #12288 +12339,12340,12341,12342,12343,12344,12345,12346,12347,12348,12349,12350,12351,12352,12353,12354, #12304 +12355,12356,12357,12358,12359,3964,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369, #12320 +12370,3965,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384, #12336 +12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400, #12352 +12401,12402,12403,12404,12405,12406,12407,12408,4988,12409,12410,12411,12412,12413,12414,12415, #12368 +12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431, #12384 +12432,12433,12434,12435,12436,12437,12438,3554,12439,12440,12441,12442,12443,12444,12445,12446, #12400 +12447,12448,12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462, #12416 +12463,12464,4989,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477, #12432 +12478,12479,12480,4990,12481,12482,12483,12484,12485,12486,12487,12488,12489,4498,12490,12491, #12448 +12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507, #12464 +12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523, #12480 +12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,12535,12536,12537,12538,12539, #12496 +12540,12541,12542,12543,12544,12545,12546,12547,12548,12549,12550,12551,4991,12552,12553,12554, #12512 +12555,12556,12557,12558,12559,12560,12561,12562,12563,12564,12565,12566,12567,12568,12569,12570, #12528 +12571,12572,12573,12574,12575,12576,12577,12578,3036,12579,12580,12581,12582,12583,3966,12584, #12544 +12585,12586,12587,12588,12589,12590,12591,12592,12593,12594,12595,12596,12597,12598,12599,12600, #12560 +12601,12602,12603,12604,12605,12606,12607,12608,12609,12610,12611,12612,12613,12614,12615,12616, #12576 +12617,12618,12619,12620,12621,12622,12623,12624,12625,12626,12627,12628,12629,12630,12631,12632, #12592 +12633,12634,12635,12636,12637,12638,12639,12640,12641,12642,12643,12644,12645,12646,4499,12647, #12608 +12648,12649,12650,12651,12652,12653,12654,12655,12656,12657,12658,12659,12660,12661,12662,12663, #12624 +12664,12665,12666,12667,12668,12669,12670,12671,12672,12673,12674,12675,12676,12677,12678,12679, #12640 +12680,12681,12682,12683,12684,12685,12686,12687,12688,12689,12690,12691,12692,12693,12694,12695, #12656 +12696,12697,12698,4992,12699,12700,12701,12702,12703,12704,12705,12706,12707,12708,12709,12710, #12672 +12711,12712,12713,12714,12715,12716,12717,12718,12719,12720,12721,12722,12723,12724,12725,12726, #12688 +12727,12728,12729,12730,12731,12732,12733,12734,12735,12736,12737,12738,12739,12740,12741,12742, #12704 +12743,12744,12745,12746,12747,12748,12749,12750,12751,12752,12753,12754,12755,12756,12757,12758, #12720 +12759,12760,12761,12762,12763,12764,12765,12766,12767,12768,12769,12770,12771,12772,12773,12774, #12736 +12775,12776,12777,12778,4993,2175,12779,12780,12781,12782,12783,12784,12785,12786,4500,12787, #12752 +12788,12789,12790,12791,12792,12793,12794,12795,12796,12797,12798,12799,12800,12801,12802,12803, #12768 +12804,12805,12806,12807,12808,12809,12810,12811,12812,12813,12814,12815,12816,12817,12818,12819, #12784 +12820,12821,12822,12823,12824,12825,12826,4198,3967,12827,12828,12829,12830,12831,12832,12833, #12800 +12834,12835,12836,12837,12838,12839,12840,12841,12842,12843,12844,12845,12846,12847,12848,12849, #12816 +12850,12851,12852,12853,12854,12855,12856,12857,12858,12859,12860,12861,4199,12862,12863,12864, #12832 +12865,12866,12867,12868,12869,12870,12871,12872,12873,12874,12875,12876,12877,12878,12879,12880, #12848 +12881,12882,12883,12884,12885,12886,12887,4501,12888,12889,12890,12891,12892,12893,12894,12895, #12864 +12896,12897,12898,12899,12900,12901,12902,12903,12904,12905,12906,12907,12908,12909,12910,12911, #12880 +12912,4994,12913,12914,12915,12916,12917,12918,12919,12920,12921,12922,12923,12924,12925,12926, #12896 +12927,12928,12929,12930,12931,12932,12933,12934,12935,12936,12937,12938,12939,12940,12941,12942, #12912 +12943,12944,12945,12946,12947,12948,12949,12950,12951,12952,12953,12954,12955,12956,1772,12957, #12928 +12958,12959,12960,12961,12962,12963,12964,12965,12966,12967,12968,12969,12970,12971,12972,12973, #12944 +12974,12975,12976,12977,12978,12979,12980,12981,12982,12983,12984,12985,12986,12987,12988,12989, #12960 +12990,12991,12992,12993,12994,12995,12996,12997,4502,12998,4503,12999,13000,13001,13002,13003, #12976 +4504,13004,13005,13006,13007,13008,13009,13010,13011,13012,13013,13014,13015,13016,13017,13018, #12992 +13019,13020,13021,13022,13023,13024,13025,13026,13027,13028,13029,3449,13030,13031,13032,13033, #13008 +13034,13035,13036,13037,13038,13039,13040,13041,13042,13043,13044,13045,13046,13047,13048,13049, #13024 +13050,13051,13052,13053,13054,13055,13056,13057,13058,13059,13060,13061,13062,13063,13064,13065, #13040 +13066,13067,13068,13069,13070,13071,13072,13073,13074,13075,13076,13077,13078,13079,13080,13081, #13056 +13082,13083,13084,13085,13086,13087,13088,13089,13090,13091,13092,13093,13094,13095,13096,13097, #13072 +13098,13099,13100,13101,13102,13103,13104,13105,13106,13107,13108,13109,13110,13111,13112,13113, #13088 +13114,13115,13116,13117,13118,3968,13119,4995,13120,13121,13122,13123,13124,13125,13126,13127, #13104 +4505,13128,13129,13130,13131,13132,13133,13134,4996,4506,13135,13136,13137,13138,13139,4997, #13120 +13140,13141,13142,13143,13144,13145,13146,13147,13148,13149,13150,13151,13152,13153,13154,13155, #13136 +13156,13157,13158,13159,4998,13160,13161,13162,13163,13164,13165,13166,13167,13168,13169,13170, #13152 +13171,13172,13173,13174,13175,13176,4999,13177,13178,13179,13180,13181,13182,13183,13184,13185, #13168 +13186,13187,13188,13189,13190,13191,13192,13193,13194,13195,13196,13197,13198,13199,13200,13201, #13184 +13202,13203,13204,13205,13206,5000,13207,13208,13209,13210,13211,13212,13213,13214,13215,13216, #13200 +13217,13218,13219,13220,13221,13222,13223,13224,13225,13226,13227,4200,5001,13228,13229,13230, #13216 +13231,13232,13233,13234,13235,13236,13237,13238,13239,13240,3969,13241,13242,13243,13244,3970, #13232 +13245,13246,13247,13248,13249,13250,13251,13252,13253,13254,13255,13256,13257,13258,13259,13260, #13248 +13261,13262,13263,13264,13265,13266,13267,13268,3450,13269,13270,13271,13272,13273,13274,13275, #13264 +13276,5002,13277,13278,13279,13280,13281,13282,13283,13284,13285,13286,13287,13288,13289,13290, #13280 +13291,13292,13293,13294,13295,13296,13297,13298,13299,13300,13301,13302,3813,13303,13304,13305, #13296 +13306,13307,13308,13309,13310,13311,13312,13313,13314,13315,13316,13317,13318,13319,13320,13321, #13312 +13322,13323,13324,13325,13326,13327,13328,4507,13329,13330,13331,13332,13333,13334,13335,13336, #13328 +13337,13338,13339,13340,13341,5003,13342,13343,13344,13345,13346,13347,13348,13349,13350,13351, #13344 +13352,13353,13354,13355,13356,13357,13358,13359,13360,13361,13362,13363,13364,13365,13366,13367, #13360 +5004,13368,13369,13370,13371,13372,13373,13374,13375,13376,13377,13378,13379,13380,13381,13382, #13376 +13383,13384,13385,13386,13387,13388,13389,13390,13391,13392,13393,13394,13395,13396,13397,13398, #13392 +13399,13400,13401,13402,13403,13404,13405,13406,13407,13408,13409,13410,13411,13412,13413,13414, #13408 +13415,13416,13417,13418,13419,13420,13421,13422,13423,13424,13425,13426,13427,13428,13429,13430, #13424 +13431,13432,4508,13433,13434,13435,4201,13436,13437,13438,13439,13440,13441,13442,13443,13444, #13440 +13445,13446,13447,13448,13449,13450,13451,13452,13453,13454,13455,13456,13457,5005,13458,13459, #13456 +13460,13461,13462,13463,13464,13465,13466,13467,13468,13469,13470,4509,13471,13472,13473,13474, #13472 +13475,13476,13477,13478,13479,13480,13481,13482,13483,13484,13485,13486,13487,13488,13489,13490, #13488 +13491,13492,13493,13494,13495,13496,13497,13498,13499,13500,13501,13502,13503,13504,13505,13506, #13504 +13507,13508,13509,13510,13511,13512,13513,13514,13515,13516,13517,13518,13519,13520,13521,13522, #13520 +13523,13524,13525,13526,13527,13528,13529,13530,13531,13532,13533,13534,13535,13536,13537,13538, #13536 +13539,13540,13541,13542,13543,13544,13545,13546,13547,13548,13549,13550,13551,13552,13553,13554, #13552 +13555,13556,13557,13558,13559,13560,13561,13562,13563,13564,13565,13566,13567,13568,13569,13570, #13568 +13571,13572,13573,13574,13575,13576,13577,13578,13579,13580,13581,13582,13583,13584,13585,13586, #13584 +13587,13588,13589,13590,13591,13592,13593,13594,13595,13596,13597,13598,13599,13600,13601,13602, #13600 +13603,13604,13605,13606,13607,13608,13609,13610,13611,13612,13613,13614,13615,13616,13617,13618, #13616 +13619,13620,13621,13622,13623,13624,13625,13626,13627,13628,13629,13630,13631,13632,13633,13634, #13632 +13635,13636,13637,13638,13639,13640,13641,13642,5006,13643,13644,13645,13646,13647,13648,13649, #13648 +13650,13651,5007,13652,13653,13654,13655,13656,13657,13658,13659,13660,13661,13662,13663,13664, #13664 +13665,13666,13667,13668,13669,13670,13671,13672,13673,13674,13675,13676,13677,13678,13679,13680, #13680 +13681,13682,13683,13684,13685,13686,13687,13688,13689,13690,13691,13692,13693,13694,13695,13696, #13696 +13697,13698,13699,13700,13701,13702,13703,13704,13705,13706,13707,13708,13709,13710,13711,13712, #13712 +13713,13714,13715,13716,13717,13718,13719,13720,13721,13722,13723,13724,13725,13726,13727,13728, #13728 +13729,13730,13731,13732,13733,13734,13735,13736,13737,13738,13739,13740,13741,13742,13743,13744, #13744 +13745,13746,13747,13748,13749,13750,13751,13752,13753,13754,13755,13756,13757,13758,13759,13760, #13760 +13761,13762,13763,13764,13765,13766,13767,13768,13769,13770,13771,13772,13773,13774,3273,13775, #13776 +13776,13777,13778,13779,13780,13781,13782,13783,13784,13785,13786,13787,13788,13789,13790,13791, #13792 +13792,13793,13794,13795,13796,13797,13798,13799,13800,13801,13802,13803,13804,13805,13806,13807, #13808 +13808,13809,13810,13811,13812,13813,13814,13815,13816,13817,13818,13819,13820,13821,13822,13823, #13824 +13824,13825,13826,13827,13828,13829,13830,13831,13832,13833,13834,13835,13836,13837,13838,13839, #13840 +13840,13841,13842,13843,13844,13845,13846,13847,13848,13849,13850,13851,13852,13853,13854,13855, #13856 +13856,13857,13858,13859,13860,13861,13862,13863,13864,13865,13866,13867,13868,13869,13870,13871, #13872 +13872,13873,13874,13875,13876,13877,13878,13879,13880,13881,13882,13883,13884,13885,13886,13887, #13888 +13888,13889,13890,13891,13892,13893,13894,13895,13896,13897,13898,13899,13900,13901,13902,13903, #13904 +13904,13905,13906,13907,13908,13909,13910,13911,13912,13913,13914,13915,13916,13917,13918,13919, #13920 +13920,13921,13922,13923,13924,13925,13926,13927,13928,13929,13930,13931,13932,13933,13934,13935, #13936 +13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952 +13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968 +13968,13969,13970,13971,13972) #13973 diff --git a/fanficdownloader/chardet/big5prober.py b/fanficdownloader/chardet/big5prober.py new file mode 100644 index 00000000..e6b52aad --- /dev/null +++ b/fanficdownloader/chardet/big5prober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import Big5DistributionAnalysis +from mbcssm import Big5SMModel + +class Big5Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(Big5SMModel) + self._mDistributionAnalyzer = Big5DistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "Big5" diff --git a/fanficdownloader/chardet/chardistribution.py b/fanficdownloader/chardet/chardistribution.py new file mode 100644 index 00000000..b8933418 --- /dev/null +++ b/fanficdownloader/chardet/chardistribution.py @@ -0,0 +1,200 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants +from euctwfreq import EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, EUCTW_TYPICAL_DISTRIBUTION_RATIO +from euckrfreq import EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, EUCKR_TYPICAL_DISTRIBUTION_RATIO +from gb2312freq import GB2312CharToFreqOrder, GB2312_TABLE_SIZE, GB2312_TYPICAL_DISTRIBUTION_RATIO +from big5freq import Big5CharToFreqOrder, BIG5_TABLE_SIZE, BIG5_TYPICAL_DISTRIBUTION_RATIO +from jisfreq import JISCharToFreqOrder, JIS_TABLE_SIZE, JIS_TYPICAL_DISTRIBUTION_RATIO + +ENOUGH_DATA_THRESHOLD = 1024 +SURE_YES = 0.99 +SURE_NO = 0.01 + +class CharDistributionAnalysis: + def __init__(self): + self._mCharToFreqOrder = None # Mapping table to get frequency order from char order (get from GetOrder()) + self._mTableSize = None # Size of above table + self._mTypicalDistributionRatio = None # This is a constant value which varies from language to language, used in calculating confidence. See http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html for further detail. + self.reset() + + def reset(self): + """reset analyser, clear any state""" + self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made + self._mTotalChars = 0 # Total characters encountered + self._mFreqChars = 0 # The number of characters whose frequency order is less than 512 + + def feed(self, aStr, aCharLen): + """feed a character with known length""" + if aCharLen == 2: + # we only care about 2-bytes character in our distribution analysis + order = self.get_order(aStr) + else: + order = -1 + if order >= 0: + self._mTotalChars += 1 + # order is valid + if order < self._mTableSize: + if 512 > self._mCharToFreqOrder[order]: + self._mFreqChars += 1 + + def get_confidence(self): + """return confidence based on existing data""" + # if we didn't receive any character in our consideration range, return negative answer + if self._mTotalChars <= 0: + return SURE_NO + + if self._mTotalChars != self._mFreqChars: + r = self._mFreqChars / ((self._mTotalChars - self._mFreqChars) * self._mTypicalDistributionRatio) + if r < SURE_YES: + return r + + # normalize confidence (we don't want to be 100% sure) + return SURE_YES + + def got_enough_data(self): + # It is not necessary to receive all data to draw conclusion. For charset detection, + # certain amount of data is enough + return self._mTotalChars > ENOUGH_DATA_THRESHOLD + + def get_order(self, aStr): + # We do not handle characters based on the original encoding string, but + # convert this encoding string to a number, here called order. + # This allows multiple encodings of a language to share one frequency table. + return -1 + +class EUCTWDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = EUCTWCharToFreqOrder + self._mTableSize = EUCTW_TABLE_SIZE + self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for euc-TW encoding, we are interested + # first byte range: 0xc4 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xC4': + return 94 * (ord(aStr[0]) - 0xC4) + ord(aStr[1]) - 0xA1 + else: + return -1 + +class EUCKRDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = EUCKRCharToFreqOrder + self._mTableSize = EUCKR_TABLE_SIZE + self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for euc-KR encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xB0': + return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1 + else: + return -1; + +class GB2312DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = GB2312CharToFreqOrder + self._mTableSize = GB2312_TABLE_SIZE + self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for GB2312 encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if (aStr[0] >= '\xB0') and (aStr[1] >= '\xA1'): + return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1 + else: + return -1; + +class Big5DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = Big5CharToFreqOrder + self._mTableSize = BIG5_TABLE_SIZE + self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for big5 encoding, we are interested + # first byte range: 0xa4 -- 0xfe + # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xA4': + if aStr[1] >= '\xA1': + return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0xA1 + 63 + else: + return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0x40 + else: + return -1 + +class SJISDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = JISCharToFreqOrder + self._mTableSize = JIS_TABLE_SIZE + self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for sjis encoding, we are interested + # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe + # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe + # no validation needed here. State machine has done that + if (aStr[0] >= '\x81') and (aStr[0] <= '\x9F'): + order = 188 * (ord(aStr[0]) - 0x81) + elif (aStr[0] >= '\xE0') and (aStr[0] <= '\xEF'): + order = 188 * (ord(aStr[0]) - 0xE0 + 31) + else: + return -1; + order = order + ord(aStr[1]) - 0x40 + if aStr[1] > '\x7F': + order =- 1 + return order + +class EUCJPDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = JISCharToFreqOrder + self._mTableSize = JIS_TABLE_SIZE + self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for euc-JP encoding, we are interested + # first byte range: 0xa0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xA0': + return 94 * (ord(aStr[0]) - 0xA1) + ord(aStr[1]) - 0xa1 + else: + return -1 diff --git a/fanficdownloader/chardet/charsetgroupprober.py b/fanficdownloader/chardet/charsetgroupprober.py new file mode 100644 index 00000000..51880694 --- /dev/null +++ b/fanficdownloader/chardet/charsetgroupprober.py @@ -0,0 +1,96 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from charsetprober import CharSetProber + +class CharSetGroupProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mActiveNum = 0 + self._mProbers = [] + self._mBestGuessProber = None + + def reset(self): + CharSetProber.reset(self) + self._mActiveNum = 0 + for prober in self._mProbers: + if prober: + prober.reset() + prober.active = constants.True + self._mActiveNum += 1 + self._mBestGuessProber = None + + def get_charset_name(self): + if not self._mBestGuessProber: + self.get_confidence() + if not self._mBestGuessProber: return None +# self._mBestGuessProber = self._mProbers[0] + return self._mBestGuessProber.get_charset_name() + + def feed(self, aBuf): + for prober in self._mProbers: + if not prober: continue + if not prober.active: continue + st = prober.feed(aBuf) + if not st: continue + if st == constants.eFoundIt: + self._mBestGuessProber = prober + return self.get_state() + elif st == constants.eNotMe: + prober.active = constants.False + self._mActiveNum -= 1 + if self._mActiveNum <= 0: + self._mState = constants.eNotMe + return self.get_state() + return self.get_state() + + def get_confidence(self): + st = self.get_state() + if st == constants.eFoundIt: + return 0.99 + elif st == constants.eNotMe: + return 0.01 + bestConf = 0.0 + self._mBestGuessProber = None + for prober in self._mProbers: + if not prober: continue + if not prober.active: + if constants._debug: + sys.stderr.write(prober.get_charset_name() + ' not active\n') + continue + cf = prober.get_confidence() + if constants._debug: + sys.stderr.write('%s confidence = %s\n' % (prober.get_charset_name(), cf)) + if bestConf < cf: + bestConf = cf + self._mBestGuessProber = prober + if not self._mBestGuessProber: return 0.0 + return bestConf +# else: +# self._mBestGuessProber = self._mProbers[0] +# return self._mBestGuessProber.get_confidence() diff --git a/fanficdownloader/chardet/charsetprober.py b/fanficdownloader/chardet/charsetprober.py new file mode 100644 index 00000000..3ac1683c --- /dev/null +++ b/fanficdownloader/chardet/charsetprober.py @@ -0,0 +1,60 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, re + +class CharSetProber: + def __init__(self): + pass + + def reset(self): + self._mState = constants.eDetecting + + def get_charset_name(self): + return None + + def feed(self, aBuf): + pass + + def get_state(self): + return self._mState + + def get_confidence(self): + return 0.0 + + def filter_high_bit_only(self, aBuf): + aBuf = re.sub(r'([\x00-\x7F])+', ' ', aBuf) + return aBuf + + def filter_without_english_letters(self, aBuf): + aBuf = re.sub(r'([A-Za-z])+', ' ', aBuf) + return aBuf + + def filter_with_english_letters(self, aBuf): + # TODO + return aBuf diff --git a/fanficdownloader/chardet/codingstatemachine.py b/fanficdownloader/chardet/codingstatemachine.py new file mode 100644 index 00000000..452d3b0a --- /dev/null +++ b/fanficdownloader/chardet/codingstatemachine.py @@ -0,0 +1,56 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from constants import eStart, eError, eItsMe + +class CodingStateMachine: + def __init__(self, sm): + self._mModel = sm + self._mCurrentBytePos = 0 + self._mCurrentCharLen = 0 + self.reset() + + def reset(self): + self._mCurrentState = eStart + + def next_state(self, c): + # for each byte we get its class + # if it is first byte, we also get byte length + byteCls = self._mModel['classTable'][ord(c)] + if self._mCurrentState == eStart: + self._mCurrentBytePos = 0 + self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] + # from byte's class and stateTable, we get its next state + self._mCurrentState = self._mModel['stateTable'][self._mCurrentState * self._mModel['classFactor'] + byteCls] + self._mCurrentBytePos += 1 + return self._mCurrentState + + def get_current_charlen(self): + return self._mCurrentCharLen + + def get_coding_state_machine(self): + return self._mModel['name'] diff --git a/fanficdownloader/chardet/constants.py b/fanficdownloader/chardet/constants.py new file mode 100644 index 00000000..e94e226b --- /dev/null +++ b/fanficdownloader/chardet/constants.py @@ -0,0 +1,47 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +_debug = 0 + +eDetecting = 0 +eFoundIt = 1 +eNotMe = 2 + +eStart = 0 +eError = 1 +eItsMe = 2 + +SHORTCUT_THRESHOLD = 0.95 + +import __builtin__ +if not hasattr(__builtin__, 'False'): + False = 0 + True = 1 +else: + False = __builtin__.False + True = __builtin__.True diff --git a/fanficdownloader/chardet/escprober.py b/fanficdownloader/chardet/escprober.py new file mode 100644 index 00000000..572ed7be --- /dev/null +++ b/fanficdownloader/chardet/escprober.py @@ -0,0 +1,79 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel +from charsetprober import CharSetProber +from codingstatemachine import CodingStateMachine + +class EscCharSetProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mCodingSM = [ \ + CodingStateMachine(HZSMModel), + CodingStateMachine(ISO2022CNSMModel), + CodingStateMachine(ISO2022JPSMModel), + CodingStateMachine(ISO2022KRSMModel) + ] + self.reset() + + def reset(self): + CharSetProber.reset(self) + for codingSM in self._mCodingSM: + if not codingSM: continue + codingSM.active = constants.True + codingSM.reset() + self._mActiveSM = len(self._mCodingSM) + self._mDetectedCharset = None + + def get_charset_name(self): + return self._mDetectedCharset + + def get_confidence(self): + if self._mDetectedCharset: + return 0.99 + else: + return 0.00 + + def feed(self, aBuf): + for c in aBuf: + for codingSM in self._mCodingSM: + if not codingSM: continue + if not codingSM.active: continue + codingState = codingSM.next_state(c) + if codingState == constants.eError: + codingSM.active = constants.False + self._mActiveSM -= 1 + if self._mActiveSM <= 0: + self._mState = constants.eNotMe + return self.get_state() + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + self._mDetectedCharset = codingSM.get_coding_state_machine() + return self.get_state() + + return self.get_state() diff --git a/fanficdownloader/chardet/escsm.py b/fanficdownloader/chardet/escsm.py new file mode 100644 index 00000000..9fa22952 --- /dev/null +++ b/fanficdownloader/chardet/escsm.py @@ -0,0 +1,240 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from constants import eStart, eError, eItsMe + +HZ_cls = ( \ +1,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,0,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,4,0,5,2,0, # 78 - 7f +1,1,1,1,1,1,1,1, # 80 - 87 +1,1,1,1,1,1,1,1, # 88 - 8f +1,1,1,1,1,1,1,1, # 90 - 97 +1,1,1,1,1,1,1,1, # 98 - 9f +1,1,1,1,1,1,1,1, # a0 - a7 +1,1,1,1,1,1,1,1, # a8 - af +1,1,1,1,1,1,1,1, # b0 - b7 +1,1,1,1,1,1,1,1, # b8 - bf +1,1,1,1,1,1,1,1, # c0 - c7 +1,1,1,1,1,1,1,1, # c8 - cf +1,1,1,1,1,1,1,1, # d0 - d7 +1,1,1,1,1,1,1,1, # d8 - df +1,1,1,1,1,1,1,1, # e0 - e7 +1,1,1,1,1,1,1,1, # e8 - ef +1,1,1,1,1,1,1,1, # f0 - f7 +1,1,1,1,1,1,1,1, # f8 - ff +) + +HZ_st = ( \ +eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 + 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f + 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 + 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f +) + +HZCharLenTable = (0, 0, 0, 0, 0, 0) + +HZSMModel = {'classTable': HZ_cls, + 'classFactor': 6, + 'stateTable': HZ_st, + 'charLenTable': HZCharLenTable, + 'name': "HZ-GB-2312"} + +ISO2022CN_cls = ( \ +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,3,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,4,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022CN_st = ( \ +eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 + 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 +eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f +) + +ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022CNSMModel = {'classTable': ISO2022CN_cls, + 'classFactor': 9, + 'stateTable': ISO2022CN_st, + 'charLenTable': ISO2022CNCharLenTable, + 'name': "ISO-2022-CN"} + +ISO2022JP_cls = ( \ +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,2,2, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,7,0,0,0, # 20 - 27 +3,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +6,0,4,0,8,0,0,0, # 40 - 47 +0,9,5,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022JP_st = ( \ +eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f +eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 +eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f +eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f +eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 +) + +ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022JPSMModel = {'classTable': ISO2022JP_cls, + 'classFactor': 10, + 'stateTable': ISO2022JP_st, + 'charLenTable': ISO2022JPCharLenTable, + 'name': "ISO-2022-JP"} + +ISO2022KR_cls = ( \ +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,3,0,0,0, # 20 - 27 +0,4,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,5,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022KR_st = ( \ +eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 +eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f +eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 +) + +ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) + +ISO2022KRSMModel = {'classTable': ISO2022KR_cls, + 'classFactor': 6, + 'stateTable': ISO2022KR_st, + 'charLenTable': ISO2022KRCharLenTable, + 'name': "ISO-2022-KR"} diff --git a/fanficdownloader/chardet/eucjpprober.py b/fanficdownloader/chardet/eucjpprober.py new file mode 100644 index 00000000..46a8b38b --- /dev/null +++ b/fanficdownloader/chardet/eucjpprober.py @@ -0,0 +1,85 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from constants import eStart, eError, eItsMe +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import EUCJPDistributionAnalysis +from jpcntx import EUCJPContextAnalysis +from mbcssm import EUCJPSMModel + +class EUCJPProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCJPSMModel) + self._mDistributionAnalyzer = EUCJPDistributionAnalysis() + self._mContextAnalyzer = EUCJPContextAnalysis() + self.reset() + + def reset(self): + MultiByteCharSetProber.reset(self) + self._mContextAnalyzer.reset() + + def get_charset_name(self): + return "EUC-JP" + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n') + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mContextAnalyzer.feed(self._mLastChar, charLen) + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mContextAnalyzer.feed(aBuf[i-1:i+1], charLen) + self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if self._mContextAnalyzer.got_enough_data() and \ + (self.get_confidence() > constants.SHORTCUT_THRESHOLD): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + contxtCf = self._mContextAnalyzer.get_confidence() + distribCf = self._mDistributionAnalyzer.get_confidence() + return max(contxtCf, distribCf) diff --git a/fanficdownloader/chardet/euckrfreq.py b/fanficdownloader/chardet/euckrfreq.py new file mode 100644 index 00000000..1463fa1d --- /dev/null +++ b/fanficdownloader/chardet/euckrfreq.py @@ -0,0 +1,594 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Sampling from about 20M text materials include literature and computer technology + +# 128 --> 0.79 +# 256 --> 0.92 +# 512 --> 0.986 +# 1024 --> 0.99944 +# 2048 --> 0.99999 +# +# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24 +# Random Distribution Ration = 512 / (2350-512) = 0.279. +# +# Typical Distribution Ratio + +EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0 + +EUCKR_TABLE_SIZE = 2352 + +# Char to FreqOrder table , +EUCKRCharToFreqOrder = ( \ + 13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87, +1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398, +1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734, + 945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739, + 116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622, + 708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750, +1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856, + 344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205, + 709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779, +1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19, +1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567, +1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797, +1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802, +1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899, + 885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818, +1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409, +1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697, +1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770, +1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723, + 544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416, +1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300, + 119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083, + 893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857, +1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871, + 282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420, +1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885, + 127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889, + 0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893, +1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317, +1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841, +1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910, +1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610, + 269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375, +1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939, + 887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870, + 217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934, +1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888, +1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950, +1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065, +1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002, +1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965, +1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467, + 50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285, + 639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7, + 103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979, +1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985, + 818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994, +1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250, + 423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824, + 532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003, +2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745, + 619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61, + 191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023, +2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032, +2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912, +2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224, + 719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012, + 819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050, +2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681, + 499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414, +1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068, +2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075, +1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850, +2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606, +2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449, +1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452, + 949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112, +2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121, +2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130, + 22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274, + 962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139, +2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721, +1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298, +2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463, +2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747, +2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285, +2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187, +2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10, +2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350, +1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201, +2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972, +2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219, +2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233, +2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242, +2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247, +1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178, +1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255, +2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259, +1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262, +2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702, +1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273, + 295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541, +2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117, + 432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187, +2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800, + 808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312, +2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229, +2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315, + 501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484, +2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170, +1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335, + 425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601, +1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395, +2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354, +1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476, +2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035, + 416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498, +2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310, +1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389, +2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504, +1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505, +2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145, +1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624, + 593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700, +2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221, +2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377, + 644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448, + 915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485, +1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705, +1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465, + 291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471, +2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997, +2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486, + 797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494, + 434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771, + 585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323, +2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491, + 95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510, + 161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519, +2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532, +2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199, + 704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544, +2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247, +1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441, + 249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562, +2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362, +2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583, +2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465, + 3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431, + 202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151, + 974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596, +2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406, +2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611, +2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619, +1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628, +2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, + 670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256 +#Everything below is of no interest for detection purpose +2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658, +2659,2660,2661,2662,2663,2664,2665,2666,2667,2668,2669,2670,2671,2672,2673,2674, +2675,2676,2677,2678,2679,2680,2681,2682,2683,2684,2685,2686,2687,2688,2689,2690, +2691,2692,2693,2694,2695,2696,2697,2698,2699,1542, 880,2700,2701,2702,2703,2704, +2705,2706,2707,2708,2709,2710,2711,2712,2713,2714,2715,2716,2717,2718,2719,2720, +2721,2722,2723,2724,2725,1543,2726,2727,2728,2729,2730,2731,2732,1544,2733,2734, +2735,2736,2737,2738,2739,2740,2741,2742,2743,2744,2745,2746,2747,2748,2749,2750, +2751,2752,2753,2754,1545,2755,2756,2757,2758,2759,2760,2761,2762,2763,2764,2765, +2766,1546,2767,1547,2768,2769,2770,2771,2772,2773,2774,2775,2776,2777,2778,2779, +2780,2781,2782,2783,2784,2785,2786,1548,2787,2788,2789,1109,2790,2791,2792,2793, +2794,2795,2796,2797,2798,2799,2800,2801,2802,2803,2804,2805,2806,2807,2808,2809, +2810,2811,2812,1329,2813,2814,2815,2816,2817,2818,2819,2820,2821,2822,2823,2824, +2825,2826,2827,2828,2829,2830,2831,2832,2833,2834,2835,2836,2837,2838,2839,2840, +2841,2842,2843,2844,2845,2846,2847,2848,2849,2850,2851,2852,2853,2854,2855,2856, +1549,2857,2858,2859,2860,1550,2861,2862,1551,2863,2864,2865,2866,2867,2868,2869, +2870,2871,2872,2873,2874,1110,1330,2875,2876,2877,2878,2879,2880,2881,2882,2883, +2884,2885,2886,2887,2888,2889,2890,2891,2892,2893,2894,2895,2896,2897,2898,2899, +2900,2901,2902,2903,2904,2905,2906,2907,2908,2909,2910,2911,2912,2913,2914,2915, +2916,2917,2918,2919,2920,2921,2922,2923,2924,2925,2926,2927,2928,2929,2930,1331, +2931,2932,2933,2934,2935,2936,2937,2938,2939,2940,2941,2942,2943,1552,2944,2945, +2946,2947,2948,2949,2950,2951,2952,2953,2954,2955,2956,2957,2958,2959,2960,2961, +2962,2963,2964,1252,2965,2966,2967,2968,2969,2970,2971,2972,2973,2974,2975,2976, +2977,2978,2979,2980,2981,2982,2983,2984,2985,2986,2987,2988,2989,2990,2991,2992, +2993,2994,2995,2996,2997,2998,2999,3000,3001,3002,3003,3004,3005,3006,3007,3008, +3009,3010,3011,3012,1553,3013,3014,3015,3016,3017,1554,3018,1332,3019,3020,3021, +3022,3023,3024,3025,3026,3027,3028,3029,3030,3031,3032,3033,3034,3035,3036,3037, +3038,3039,3040,3041,3042,3043,3044,3045,3046,3047,3048,3049,3050,1555,3051,3052, +3053,1556,1557,3054,3055,3056,3057,3058,3059,3060,3061,3062,3063,3064,3065,3066, +3067,1558,3068,3069,3070,3071,3072,3073,3074,3075,3076,1559,3077,3078,3079,3080, +3081,3082,3083,1253,3084,3085,3086,3087,3088,3089,3090,3091,3092,3093,3094,3095, +3096,3097,3098,3099,3100,3101,3102,3103,3104,3105,3106,3107,3108,1152,3109,3110, +3111,3112,3113,1560,3114,3115,3116,3117,1111,3118,3119,3120,3121,3122,3123,3124, +3125,3126,3127,3128,3129,3130,3131,3132,3133,3134,3135,3136,3137,3138,3139,3140, +3141,3142,3143,3144,3145,3146,3147,3148,3149,3150,3151,3152,3153,3154,3155,3156, +3157,3158,3159,3160,3161,3162,3163,3164,3165,3166,3167,3168,3169,3170,3171,3172, +3173,3174,3175,3176,1333,3177,3178,3179,3180,3181,3182,3183,3184,3185,3186,3187, +3188,3189,1561,3190,3191,1334,3192,3193,3194,3195,3196,3197,3198,3199,3200,3201, +3202,3203,3204,3205,3206,3207,3208,3209,3210,3211,3212,3213,3214,3215,3216,3217, +3218,3219,3220,3221,3222,3223,3224,3225,3226,3227,3228,3229,3230,3231,3232,3233, +3234,1562,3235,3236,3237,3238,3239,3240,3241,3242,3243,3244,3245,3246,3247,3248, +3249,3250,3251,3252,3253,3254,3255,3256,3257,3258,3259,3260,3261,3262,3263,3264, +3265,3266,3267,3268,3269,3270,3271,3272,3273,3274,3275,3276,3277,1563,3278,3279, +3280,3281,3282,3283,3284,3285,3286,3287,3288,3289,3290,3291,3292,3293,3294,3295, +3296,3297,3298,3299,3300,3301,3302,3303,3304,3305,3306,3307,3308,3309,3310,3311, +3312,3313,3314,3315,3316,3317,3318,3319,3320,3321,3322,3323,3324,3325,3326,3327, +3328,3329,3330,3331,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341,3342,3343, +3344,3345,3346,3347,3348,3349,3350,3351,3352,3353,3354,3355,3356,3357,3358,3359, +3360,3361,3362,3363,3364,1335,3365,3366,3367,3368,3369,3370,3371,3372,3373,3374, +3375,3376,3377,3378,3379,3380,3381,3382,3383,3384,3385,3386,3387,1336,3388,3389, +3390,3391,3392,3393,3394,3395,3396,3397,3398,3399,3400,3401,3402,3403,3404,3405, +3406,3407,3408,3409,3410,3411,3412,3413,3414,1337,3415,3416,3417,3418,3419,1338, +3420,3421,3422,1564,1565,3423,3424,3425,3426,3427,3428,3429,3430,3431,1254,3432, +3433,3434,1339,3435,3436,3437,3438,3439,1566,3440,3441,3442,3443,3444,3445,3446, +3447,3448,3449,3450,3451,3452,3453,3454,1255,3455,3456,3457,3458,3459,1567,1191, +3460,1568,1569,3461,3462,3463,1570,3464,3465,3466,3467,3468,1571,3469,3470,3471, +3472,3473,1572,3474,3475,3476,3477,3478,3479,3480,3481,3482,3483,3484,3485,3486, +1340,3487,3488,3489,3490,3491,3492,1021,3493,3494,3495,3496,3497,3498,1573,3499, +1341,3500,3501,3502,3503,3504,3505,3506,3507,3508,3509,3510,3511,1342,3512,3513, +3514,3515,3516,1574,1343,3517,3518,3519,1575,3520,1576,3521,3522,3523,3524,3525, +3526,3527,3528,3529,3530,3531,3532,3533,3534,3535,3536,3537,3538,3539,3540,3541, +3542,3543,3544,3545,3546,3547,3548,3549,3550,3551,3552,3553,3554,3555,3556,3557, +3558,3559,3560,3561,3562,3563,3564,3565,3566,3567,3568,3569,3570,3571,3572,3573, +3574,3575,3576,3577,3578,3579,3580,1577,3581,3582,1578,3583,3584,3585,3586,3587, +3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603, +3604,1579,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618, +3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,1580,3630,3631,1581,3632, +3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,3643,3644,3645,3646,3647,3648, +3649,3650,3651,3652,3653,3654,3655,3656,1582,3657,3658,3659,3660,3661,3662,3663, +3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,3676,3677,3678,3679, +3680,3681,3682,3683,3684,3685,3686,3687,3688,3689,3690,3691,3692,3693,3694,3695, +3696,3697,3698,3699,3700,1192,3701,3702,3703,3704,1256,3705,3706,3707,3708,1583, +1257,3709,3710,3711,3712,3713,3714,3715,3716,1584,3717,3718,3719,3720,3721,3722, +3723,3724,3725,3726,3727,3728,3729,3730,3731,3732,3733,3734,3735,3736,3737,3738, +3739,3740,3741,3742,3743,3744,3745,1344,3746,3747,3748,3749,3750,3751,3752,3753, +3754,3755,3756,1585,3757,3758,3759,3760,3761,3762,3763,3764,3765,3766,1586,3767, +3768,3769,3770,3771,3772,3773,3774,3775,3776,3777,3778,1345,3779,3780,3781,3782, +3783,3784,3785,3786,3787,3788,3789,3790,3791,3792,3793,3794,3795,1346,1587,3796, +3797,1588,3798,3799,3800,3801,3802,3803,3804,3805,3806,1347,3807,3808,3809,3810, +3811,1589,3812,3813,3814,3815,3816,3817,3818,3819,3820,3821,1590,3822,3823,1591, +1348,3824,3825,3826,3827,3828,3829,3830,1592,3831,3832,1593,3833,3834,3835,3836, +3837,3838,3839,3840,3841,3842,3843,3844,1349,3845,3846,3847,3848,3849,3850,3851, +3852,3853,3854,3855,3856,3857,3858,1594,3859,3860,3861,3862,3863,3864,3865,3866, +3867,3868,3869,1595,3870,3871,3872,3873,1596,3874,3875,3876,3877,3878,3879,3880, +3881,3882,3883,3884,3885,3886,1597,3887,3888,3889,3890,3891,3892,3893,3894,3895, +1598,3896,3897,3898,1599,1600,3899,1350,3900,1351,3901,3902,1352,3903,3904,3905, +3906,3907,3908,3909,3910,3911,3912,3913,3914,3915,3916,3917,3918,3919,3920,3921, +3922,3923,3924,1258,3925,3926,3927,3928,3929,3930,3931,1193,3932,1601,3933,3934, +3935,3936,3937,3938,3939,3940,3941,3942,3943,1602,3944,3945,3946,3947,3948,1603, +3949,3950,3951,3952,3953,3954,3955,3956,3957,3958,3959,3960,3961,3962,3963,3964, +3965,1604,3966,3967,3968,3969,3970,3971,3972,3973,3974,3975,3976,3977,1353,3978, +3979,3980,3981,3982,3983,3984,3985,3986,3987,3988,3989,3990,3991,1354,3992,3993, +3994,3995,3996,3997,3998,3999,4000,4001,4002,4003,4004,4005,4006,4007,4008,4009, +4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,1355,4024, +4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040, +1605,4041,4042,4043,4044,4045,4046,4047,4048,4049,4050,4051,4052,4053,4054,4055, +4056,4057,4058,4059,4060,1606,4061,4062,4063,4064,1607,4065,4066,4067,4068,4069, +4070,4071,4072,4073,4074,4075,4076,1194,4077,4078,1608,4079,4080,4081,4082,4083, +4084,4085,4086,4087,1609,4088,4089,4090,4091,4092,4093,4094,4095,4096,4097,4098, +4099,4100,4101,4102,4103,4104,4105,4106,4107,4108,1259,4109,4110,4111,4112,4113, +4114,4115,4116,4117,4118,4119,4120,4121,4122,4123,4124,1195,4125,4126,4127,1610, +4128,4129,4130,4131,4132,4133,4134,4135,4136,4137,1356,4138,4139,4140,4141,4142, +4143,4144,1611,4145,4146,4147,4148,4149,4150,4151,4152,4153,4154,4155,4156,4157, +4158,4159,4160,4161,4162,4163,4164,4165,4166,4167,4168,4169,4170,4171,4172,4173, +4174,4175,4176,4177,4178,4179,4180,4181,4182,4183,4184,4185,4186,4187,4188,4189, +4190,4191,4192,4193,4194,4195,4196,4197,4198,4199,4200,4201,4202,4203,4204,4205, +4206,4207,4208,4209,4210,4211,4212,4213,4214,4215,4216,4217,4218,4219,1612,4220, +4221,4222,4223,4224,4225,4226,4227,1357,4228,1613,4229,4230,4231,4232,4233,4234, +4235,4236,4237,4238,4239,4240,4241,4242,4243,1614,4244,4245,4246,4247,4248,4249, +4250,4251,4252,4253,4254,4255,4256,4257,4258,4259,4260,4261,4262,4263,4264,4265, +4266,4267,4268,4269,4270,1196,1358,4271,4272,4273,4274,4275,4276,4277,4278,4279, +4280,4281,4282,4283,4284,4285,4286,4287,1615,4288,4289,4290,4291,4292,4293,4294, +4295,4296,4297,4298,4299,4300,4301,4302,4303,4304,4305,4306,4307,4308,4309,4310, +4311,4312,4313,4314,4315,4316,4317,4318,4319,4320,4321,4322,4323,4324,4325,4326, +4327,4328,4329,4330,4331,4332,4333,4334,1616,4335,4336,4337,4338,4339,4340,4341, +4342,4343,4344,4345,4346,4347,4348,4349,4350,4351,4352,4353,4354,4355,4356,4357, +4358,4359,4360,1617,4361,4362,4363,4364,4365,1618,4366,4367,4368,4369,4370,4371, +4372,4373,4374,4375,4376,4377,4378,4379,4380,4381,4382,4383,4384,4385,4386,4387, +4388,4389,4390,4391,4392,4393,4394,4395,4396,4397,4398,4399,4400,4401,4402,4403, +4404,4405,4406,4407,4408,4409,4410,4411,4412,4413,4414,4415,4416,1619,4417,4418, +4419,4420,4421,4422,4423,4424,4425,1112,4426,4427,4428,4429,4430,1620,4431,4432, +4433,4434,4435,4436,4437,4438,4439,4440,4441,4442,1260,1261,4443,4444,4445,4446, +4447,4448,4449,4450,4451,4452,4453,4454,4455,1359,4456,4457,4458,4459,4460,4461, +4462,4463,4464,4465,1621,4466,4467,4468,4469,4470,4471,4472,4473,4474,4475,4476, +4477,4478,4479,4480,4481,4482,4483,4484,4485,4486,4487,4488,4489,1055,4490,4491, +4492,4493,4494,4495,4496,4497,4498,4499,4500,4501,4502,4503,4504,4505,4506,4507, +4508,4509,4510,4511,4512,4513,4514,4515,4516,4517,4518,1622,4519,4520,4521,1623, +4522,4523,4524,4525,4526,4527,4528,4529,4530,4531,4532,4533,4534,4535,1360,4536, +4537,4538,4539,4540,4541,4542,4543, 975,4544,4545,4546,4547,4548,4549,4550,4551, +4552,4553,4554,4555,4556,4557,4558,4559,4560,4561,4562,4563,4564,4565,4566,4567, +4568,4569,4570,4571,1624,4572,4573,4574,4575,4576,1625,4577,4578,4579,4580,4581, +4582,4583,4584,1626,4585,4586,4587,4588,4589,4590,4591,4592,4593,4594,4595,1627, +4596,4597,4598,4599,4600,4601,4602,4603,4604,4605,4606,4607,4608,4609,4610,4611, +4612,4613,4614,4615,1628,4616,4617,4618,4619,4620,4621,4622,4623,4624,4625,4626, +4627,4628,4629,4630,4631,4632,4633,4634,4635,4636,4637,4638,4639,4640,4641,4642, +4643,4644,4645,4646,4647,4648,4649,1361,4650,4651,4652,4653,4654,4655,4656,4657, +4658,4659,4660,4661,1362,4662,4663,4664,4665,4666,4667,4668,4669,4670,4671,4672, +4673,4674,4675,4676,4677,4678,4679,4680,4681,4682,1629,4683,4684,4685,4686,4687, +1630,4688,4689,4690,4691,1153,4692,4693,4694,1113,4695,4696,4697,4698,4699,4700, +4701,4702,4703,4704,4705,4706,4707,4708,4709,4710,4711,1197,4712,4713,4714,4715, +4716,4717,4718,4719,4720,4721,4722,4723,4724,4725,4726,4727,4728,4729,4730,4731, +4732,4733,4734,4735,1631,4736,1632,4737,4738,4739,4740,4741,4742,4743,4744,1633, +4745,4746,4747,4748,4749,1262,4750,4751,4752,4753,4754,1363,4755,4756,4757,4758, +4759,4760,4761,4762,4763,4764,4765,4766,4767,4768,1634,4769,4770,4771,4772,4773, +4774,4775,4776,4777,4778,1635,4779,4780,4781,4782,4783,4784,4785,4786,4787,4788, +4789,1636,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799,4800,4801,4802,4803, +4804,4805,4806,1637,4807,4808,4809,1638,4810,4811,4812,4813,4814,4815,4816,4817, +4818,1639,4819,4820,4821,4822,4823,4824,4825,4826,4827,4828,4829,4830,4831,4832, +4833,1077,4834,4835,4836,4837,4838,4839,4840,4841,4842,4843,4844,4845,4846,4847, +4848,4849,4850,4851,4852,4853,4854,4855,4856,4857,4858,4859,4860,4861,4862,4863, +4864,4865,4866,4867,4868,4869,4870,4871,4872,4873,4874,4875,4876,4877,4878,4879, +4880,4881,4882,4883,1640,4884,4885,1641,4886,4887,4888,4889,4890,4891,4892,4893, +4894,4895,4896,4897,4898,4899,4900,4901,4902,4903,4904,4905,4906,4907,4908,4909, +4910,4911,1642,4912,4913,4914,1364,4915,4916,4917,4918,4919,4920,4921,4922,4923, +4924,4925,4926,4927,4928,4929,4930,4931,1643,4932,4933,4934,4935,4936,4937,4938, +4939,4940,4941,4942,4943,4944,4945,4946,4947,4948,4949,4950,4951,4952,4953,4954, +4955,4956,4957,4958,4959,4960,4961,4962,4963,4964,4965,4966,4967,4968,4969,4970, +4971,4972,4973,4974,4975,4976,4977,4978,4979,4980,1644,4981,4982,4983,4984,1645, +4985,4986,1646,4987,4988,4989,4990,4991,4992,4993,4994,4995,4996,4997,4998,4999, +5000,5001,5002,5003,5004,5005,1647,5006,1648,5007,5008,5009,5010,5011,5012,1078, +5013,5014,5015,5016,5017,5018,5019,5020,5021,5022,5023,5024,5025,5026,5027,5028, +1365,5029,5030,5031,5032,5033,5034,5035,5036,5037,5038,5039,1649,5040,5041,5042, +5043,5044,5045,1366,5046,5047,5048,5049,5050,5051,5052,5053,5054,5055,1650,5056, +5057,5058,5059,5060,5061,5062,5063,5064,5065,5066,5067,5068,5069,5070,5071,5072, +5073,5074,5075,5076,5077,1651,5078,5079,5080,5081,5082,5083,5084,5085,5086,5087, +5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102,5103, +5104,5105,5106,5107,5108,5109,5110,1652,5111,5112,5113,5114,5115,5116,5117,5118, +1367,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,1653,5130,5131,5132, +5133,5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148, +5149,1368,5150,1654,5151,1369,5152,5153,5154,5155,5156,5157,5158,5159,5160,5161, +5162,5163,5164,5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,5176,5177, +5178,1370,5179,5180,5181,5182,5183,5184,5185,5186,5187,5188,5189,5190,5191,5192, +5193,5194,5195,5196,5197,5198,1655,5199,5200,5201,5202,1656,5203,5204,5205,5206, +1371,5207,1372,5208,5209,5210,5211,1373,5212,5213,1374,5214,5215,5216,5217,5218, +5219,5220,5221,5222,5223,5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234, +5235,5236,5237,5238,5239,5240,5241,5242,5243,5244,5245,5246,5247,1657,5248,5249, +5250,5251,1658,1263,5252,5253,5254,5255,5256,1375,5257,5258,5259,5260,5261,5262, +5263,5264,5265,5266,5267,5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278, +5279,5280,5281,5282,5283,1659,5284,5285,5286,5287,5288,5289,5290,5291,5292,5293, +5294,5295,5296,5297,5298,5299,5300,1660,5301,5302,5303,5304,5305,5306,5307,5308, +5309,5310,5311,5312,5313,5314,5315,5316,5317,5318,5319,5320,5321,1376,5322,5323, +5324,5325,5326,5327,5328,5329,5330,5331,5332,5333,1198,5334,5335,5336,5337,5338, +5339,5340,5341,5342,5343,1661,5344,5345,5346,5347,5348,5349,5350,5351,5352,5353, +5354,5355,5356,5357,5358,5359,5360,5361,5362,5363,5364,5365,5366,5367,5368,5369, +5370,5371,5372,5373,5374,5375,5376,5377,5378,5379,5380,5381,5382,5383,5384,5385, +5386,5387,5388,5389,5390,5391,5392,5393,5394,5395,5396,5397,5398,1264,5399,5400, +5401,5402,5403,5404,5405,5406,5407,5408,5409,5410,5411,5412,1662,5413,5414,5415, +5416,1663,5417,5418,5419,5420,5421,5422,5423,5424,5425,5426,5427,5428,5429,5430, +5431,5432,5433,5434,5435,5436,5437,5438,1664,5439,5440,5441,5442,5443,5444,5445, +5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456,5457,5458,5459,5460,5461, +5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472,5473,5474,5475,5476,5477, +5478,1154,5479,5480,5481,5482,5483,5484,5485,1665,5486,5487,5488,5489,5490,5491, +5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504,5505,5506,5507, +5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520,5521,5522,5523, +5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536,5537,5538,5539, +5540,5541,5542,5543,5544,5545,5546,5547,5548,1377,5549,5550,5551,5552,5553,5554, +5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568,5569,5570, +1114,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584,5585, +5586,5587,5588,5589,5590,5591,5592,1378,5593,5594,5595,5596,5597,5598,5599,5600, +5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,1379,5615, +5616,5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631, +5632,5633,5634,1380,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646, +5647,5648,5649,1381,1056,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660, +1666,5661,5662,5663,5664,5665,5666,5667,5668,1667,5669,1668,5670,5671,5672,5673, +5674,5675,5676,5677,5678,1155,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688, +5689,5690,5691,5692,5693,5694,5695,5696,5697,5698,1669,5699,5700,5701,5702,5703, +5704,5705,1670,5706,5707,5708,5709,5710,1671,5711,5712,5713,5714,1382,5715,5716, +5717,5718,5719,5720,5721,5722,5723,5724,5725,1672,5726,5727,1673,1674,5728,5729, +5730,5731,5732,5733,5734,5735,5736,1675,5737,5738,5739,5740,5741,5742,5743,5744, +1676,5745,5746,5747,5748,5749,5750,5751,1383,5752,5753,5754,5755,5756,5757,5758, +5759,5760,5761,5762,5763,5764,5765,5766,5767,5768,1677,5769,5770,5771,5772,5773, +1678,5774,5775,5776, 998,5777,5778,5779,5780,5781,5782,5783,5784,5785,1384,5786, +5787,5788,5789,5790,5791,5792,5793,5794,5795,5796,5797,5798,5799,5800,1679,5801, +5802,5803,1115,1116,5804,5805,5806,5807,5808,5809,5810,5811,5812,5813,5814,5815, +5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828,5829,5830,5831, +5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844,5845,5846,5847, +5848,5849,5850,5851,5852,5853,5854,5855,1680,5856,5857,5858,5859,5860,5861,5862, +5863,5864,1681,5865,5866,5867,1682,5868,5869,5870,5871,5872,5873,5874,5875,5876, +5877,5878,5879,1683,5880,1684,5881,5882,5883,5884,1685,5885,5886,5887,5888,5889, +5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905, +5906,5907,1686,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, +5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,1687, +5936,5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951, +5952,1688,1689,5953,1199,5954,5955,5956,5957,5958,5959,5960,5961,1690,5962,5963, +5964,5965,5966,5967,5968,5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979, +5980,5981,1385,5982,1386,5983,5984,5985,5986,5987,5988,5989,5990,5991,5992,5993, +5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004,6005,6006,6007,6008,6009, +6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020,6021,6022,6023,6024,6025, +6026,6027,1265,6028,6029,1691,6030,6031,6032,6033,6034,6035,6036,6037,6038,6039, +6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055, +6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071, +6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084,1692,6085,6086, +6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100,6101,6102, +6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116,6117,6118, +6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,1693,6132,6133, +6134,6135,6136,1694,6137,6138,6139,6140,6141,1695,6142,6143,6144,6145,6146,6147, +6148,6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163, +6164,6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179, +6180,6181,6182,6183,6184,6185,1696,6186,6187,6188,6189,6190,6191,6192,6193,6194, +6195,6196,6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210, +6211,6212,6213,6214,6215,6216,6217,6218,6219,1697,6220,6221,6222,6223,6224,6225, +6226,6227,6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241, +6242,6243,6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,1698,6254,6255,6256, +6257,6258,6259,6260,6261,6262,6263,1200,6264,6265,6266,6267,6268,6269,6270,6271, #1024 +6272,6273,6274,6275,6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,6286,6287, +6288,6289,6290,6291,6292,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,1699, +6303,6304,1700,6305,6306,6307,6308,6309,6310,6311,6312,6313,6314,6315,6316,6317, +6318,6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333, +6334,6335,6336,6337,6338,6339,1701,6340,6341,6342,6343,6344,1387,6345,6346,6347, +6348,6349,6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363, +6364,6365,6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379, +6380,6381,6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395, +6396,6397,6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,6411, +6412,6413,1702,6414,6415,6416,6417,6418,6419,6420,6421,6422,1703,6423,6424,6425, +6426,6427,6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,1704,6439,6440, +6441,6442,6443,6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,6455,6456, +6457,6458,6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472, +6473,6474,6475,6476,6477,6478,6479,6480,6481,6482,6483,6484,6485,6486,6487,6488, +6489,6490,6491,6492,6493,6494,6495,6496,6497,6498,6499,6500,6501,6502,6503,1266, +6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516,6517,6518,6519, +6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532,6533,6534,6535, +6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548,6549,6550,6551, +1705,1706,6552,6553,6554,6555,6556,6557,6558,6559,6560,6561,6562,6563,6564,6565, +6566,6567,6568,6569,6570,6571,6572,6573,6574,6575,6576,6577,6578,6579,6580,6581, +6582,6583,6584,6585,6586,6587,6588,6589,6590,6591,6592,6593,6594,6595,6596,6597, +6598,6599,6600,6601,6602,6603,6604,6605,6606,6607,6608,6609,6610,6611,6612,6613, +6614,6615,6616,6617,6618,6619,6620,6621,6622,6623,6624,6625,6626,6627,6628,6629, +6630,6631,6632,6633,6634,6635,6636,6637,1388,6638,6639,6640,6641,6642,6643,6644, +1707,6645,6646,6647,6648,6649,6650,6651,6652,6653,6654,6655,6656,6657,6658,6659, +6660,6661,6662,6663,1708,6664,6665,6666,6667,6668,6669,6670,6671,6672,6673,6674, +1201,6675,6676,6677,6678,6679,6680,6681,6682,6683,6684,6685,6686,6687,6688,6689, +6690,6691,6692,6693,6694,6695,6696,6697,6698,6699,6700,6701,6702,6703,6704,6705, +6706,6707,6708,6709,6710,6711,6712,6713,6714,6715,6716,6717,6718,6719,6720,6721, +6722,6723,6724,6725,1389,6726,6727,6728,6729,6730,6731,6732,6733,6734,6735,6736, +1390,1709,6737,6738,6739,6740,6741,6742,1710,6743,6744,6745,6746,1391,6747,6748, +6749,6750,6751,6752,6753,6754,6755,6756,6757,1392,6758,6759,6760,6761,6762,6763, +6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777,6778,6779, +6780,1202,6781,6782,6783,6784,6785,6786,6787,6788,6789,6790,6791,6792,6793,6794, +6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806,6807,6808,6809,1711, +6810,6811,6812,6813,6814,6815,6816,6817,6818,6819,6820,6821,6822,6823,6824,6825, +6826,6827,6828,6829,6830,6831,6832,6833,6834,6835,6836,1393,6837,6838,6839,6840, +6841,6842,6843,6844,6845,6846,6847,6848,6849,6850,6851,6852,6853,6854,6855,6856, +6857,6858,6859,6860,6861,6862,6863,6864,6865,6866,6867,6868,6869,6870,6871,6872, +6873,6874,6875,6876,6877,6878,6879,6880,6881,6882,6883,6884,6885,6886,6887,6888, +6889,6890,6891,6892,6893,6894,6895,6896,6897,6898,6899,6900,6901,6902,1712,6903, +6904,6905,6906,6907,6908,6909,6910,1713,6911,6912,6913,6914,6915,6916,6917,6918, +6919,6920,6921,6922,6923,6924,6925,6926,6927,6928,6929,6930,6931,6932,6933,6934, +6935,6936,6937,6938,6939,6940,6941,6942,6943,6944,6945,6946,6947,6948,6949,6950, +6951,6952,6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966, +6967,6968,6969,6970,6971,6972,6973,6974,1714,6975,6976,6977,6978,6979,6980,6981, +6982,6983,6984,6985,6986,6987,6988,1394,6989,6990,6991,6992,6993,6994,6995,6996, +6997,6998,6999,7000,1715,7001,7002,7003,7004,7005,7006,7007,7008,7009,7010,7011, +7012,7013,7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027, +7028,1716,7029,7030,7031,7032,7033,7034,7035,7036,7037,7038,7039,7040,7041,7042, +7043,7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058, +7059,7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,7071,7072,7073,7074, +7075,7076,7077,7078,7079,7080,7081,7082,7083,7084,7085,7086,7087,7088,7089,7090, +7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105,7106, +7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,7119,7120,7121,7122, +7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136,7137,7138, +7139,7140,7141,7142,7143,7144,7145,7146,7147,7148,7149,7150,7151,7152,7153,7154, +7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167,7168,7169,7170, +7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183,7184,7185,7186, +7187,7188,7189,7190,7191,7192,7193,7194,7195,7196,7197,7198,7199,7200,7201,7202, +7203,7204,7205,7206,7207,1395,7208,7209,7210,7211,7212,7213,1717,7214,7215,7216, +7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229,7230,7231,7232, +7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245,7246,7247,7248, +7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261,7262,7263,7264, +7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277,7278,7279,7280, +7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293,7294,7295,7296, +7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308,7309,7310,7311,7312, +7313,1718,7314,7315,7316,7317,7318,7319,7320,7321,7322,7323,7324,7325,7326,7327, +7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339,7340,7341,7342,7343, +7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,7354,7355,7356,7357,7358,7359, +7360,7361,7362,7363,7364,7365,7366,7367,7368,7369,7370,7371,7372,7373,7374,7375, +7376,7377,7378,7379,7380,7381,7382,7383,7384,7385,7386,7387,7388,7389,7390,7391, +7392,7393,7394,7395,7396,7397,7398,7399,7400,7401,7402,7403,7404,7405,7406,7407, +7408,7409,7410,7411,7412,7413,7414,7415,7416,7417,7418,7419,7420,7421,7422,7423, +7424,7425,7426,7427,7428,7429,7430,7431,7432,7433,7434,7435,7436,7437,7438,7439, +7440,7441,7442,7443,7444,7445,7446,7447,7448,7449,7450,7451,7452,7453,7454,7455, +7456,7457,7458,7459,7460,7461,7462,7463,7464,7465,7466,7467,7468,7469,7470,7471, +7472,7473,7474,7475,7476,7477,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487, +7488,7489,7490,7491,7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,7503, +7504,7505,7506,7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519, +7520,7521,7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535, +7536,7537,7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,7550,7551, +7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, +7568,7569,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582,7583, +7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598,7599, +7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614,7615, +7616,7617,7618,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628,7629,7630,7631, +7632,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643,7644,7645,7646,7647, +7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659,7660,7661,7662,7663, +7664,7665,7666,7667,7668,7669,7670,7671,7672,7673,7674,7675,7676,7677,7678,7679, +7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690,7691,7692,7693,7694,7695, +7696,7697,7698,7699,7700,7701,7702,7703,7704,7705,7706,7707,7708,7709,7710,7711, +7712,7713,7714,7715,7716,7717,7718,7719,7720,7721,7722,7723,7724,7725,7726,7727, +7728,7729,7730,7731,7732,7733,7734,7735,7736,7737,7738,7739,7740,7741,7742,7743, +7744,7745,7746,7747,7748,7749,7750,7751,7752,7753,7754,7755,7756,7757,7758,7759, +7760,7761,7762,7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775, +7776,7777,7778,7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791, +7792,7793,7794,7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,7806,7807, +7808,7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823, +7824,7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839, +7840,7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855, +7856,7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871, +7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887, +7888,7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903, +7904,7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919, +7920,7921,7922,7923,7924,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, +7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, +7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, +7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, +7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, +8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, +8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, +8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, +8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, +8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, +8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, +8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, +8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, +8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, +8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, +8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, +8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, +8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, +8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, +8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, +8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, +8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, +8272,8273,8274,8275,8276,8277,8278,8279,8280,8281,8282,8283,8284,8285,8286,8287, +8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303, +8304,8305,8306,8307,8308,8309,8310,8311,8312,8313,8314,8315,8316,8317,8318,8319, +8320,8321,8322,8323,8324,8325,8326,8327,8328,8329,8330,8331,8332,8333,8334,8335, +8336,8337,8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351, +8352,8353,8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,8364,8365,8366,8367, +8368,8369,8370,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382,8383, +8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398,8399, +8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,8411,8412,8413,8414,8415, +8416,8417,8418,8419,8420,8421,8422,8423,8424,8425,8426,8427,8428,8429,8430,8431, +8432,8433,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443,8444,8445,8446,8447, +8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459,8460,8461,8462,8463, +8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475,8476,8477,8478,8479, +8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490,8491,8492,8493,8494,8495, +8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506,8507,8508,8509,8510,8511, +8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522,8523,8524,8525,8526,8527, +8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538,8539,8540,8541,8542,8543, +8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554,8555,8556,8557,8558,8559, +8560,8561,8562,8563,8564,8565,8566,8567,8568,8569,8570,8571,8572,8573,8574,8575, +8576,8577,8578,8579,8580,8581,8582,8583,8584,8585,8586,8587,8588,8589,8590,8591, +8592,8593,8594,8595,8596,8597,8598,8599,8600,8601,8602,8603,8604,8605,8606,8607, +8608,8609,8610,8611,8612,8613,8614,8615,8616,8617,8618,8619,8620,8621,8622,8623, +8624,8625,8626,8627,8628,8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,8639, +8640,8641,8642,8643,8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655, +8656,8657,8658,8659,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671, +8672,8673,8674,8675,8676,8677,8678,8679,8680,8681,8682,8683,8684,8685,8686,8687, +8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, +8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719, +8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735, +8736,8737,8738,8739,8740,8741) diff --git a/fanficdownloader/chardet/euckrprober.py b/fanficdownloader/chardet/euckrprober.py new file mode 100644 index 00000000..bd697ebf --- /dev/null +++ b/fanficdownloader/chardet/euckrprober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import EUCKRDistributionAnalysis +from mbcssm import EUCKRSMModel + +class EUCKRProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCKRSMModel) + self._mDistributionAnalyzer = EUCKRDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "EUC-KR" diff --git a/fanficdownloader/chardet/euctwfreq.py b/fanficdownloader/chardet/euctwfreq.py new file mode 100644 index 00000000..c0572095 --- /dev/null +++ b/fanficdownloader/chardet/euctwfreq.py @@ -0,0 +1,426 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# EUCTW frequency table +# Converted from big5 work +# by Taiwan's Mandarin Promotion Council +# <http:#www.edu.tw:81/mandr/> + +# 128 --> 0.42261 +# 256 --> 0.57851 +# 512 --> 0.74851 +# 1024 --> 0.89384 +# 2048 --> 0.97583 +# +# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98 +# Random Distribution Ration = 512/(5401-512)=0.105 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR + +EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75 + +# Char to FreqOrder table , +EUCTW_TABLE_SIZE = 8102 + +EUCTWCharToFreqOrder = ( \ + 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 +3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 +1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 + 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 +3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 +4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 +7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 + 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 + 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 + 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 +2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 +1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 +3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 + 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 +1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 +3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 +2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 + 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 +3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 +1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 +7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 + 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 +7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 +1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 + 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 + 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 +3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 +3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 + 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 +2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 +2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 + 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 + 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 +3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 +1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 +1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 +1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 +2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 + 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 +4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 +1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 +7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 +2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 + 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 + 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 + 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 + 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 +7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 + 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 +1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 + 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 + 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 +7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 +1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 + 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 +3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 +4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 +3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 + 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 + 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 +1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 +4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 +3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 +3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 +2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 +7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 +3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 +7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 +1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 +2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 +1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 + 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 +1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 +4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 +3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 + 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 + 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 + 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 +2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 +7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 +1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 +2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 +1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 +1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 +7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 +7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 +7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 +3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 +4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 +1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 +7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 +2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 +7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 +3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 +3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 +7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 +2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 +7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 + 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 +4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 +2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 +7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 +3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 +2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 +2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 + 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 +2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 +1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 +1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 +2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 +1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 +7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 +7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 +2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 +4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 +1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 +7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 + 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 +4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 + 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 +2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 + 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 +1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 +1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 + 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 +3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 +3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 +1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 +3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 +7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 +7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 +1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 +2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 +1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 +3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 +2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 +3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 +2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 +4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 +4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 +3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 + 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 +3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 + 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 +3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 +3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 +3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 +1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 +7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 + 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 +7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 +1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 + 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 +4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 +3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 + 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 +2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 +2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 +3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 +1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 +4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 +2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 +1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 +1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 +2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 +3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 +1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 +7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 +1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 +4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 +1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 + 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 +1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 +3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 +3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 +2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 +1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 +4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 + 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 +7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 +2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 +3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 +4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 + 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 +7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 +7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 +1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 +4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 +3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 +2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 +3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 +3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 +2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 +1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 +4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 +3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 +3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 +2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 +4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 +7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 +3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 +2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 +3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 +1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 +2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 +3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 +4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 +2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 +2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 +7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 +1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 +2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 +1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 +3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 +4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 +2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 +3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 +3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 +2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 +4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 +2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 +3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 +4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 +7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 +3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 + 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 +1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 +4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 +1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 +4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 +7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 + 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 +7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 +2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 +1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 +1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 +3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 + 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 + 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 + 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 +3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 +2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 + 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 +7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 +1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 +3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 +7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 +1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 +7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 +4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 +1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 +2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 +2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 +4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 + 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 + 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 +3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 +3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 +1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 +2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 +7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 +1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 +1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 +3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 + 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 +1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 +4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 +7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 +2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 +3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 + 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 +1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 +2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 +2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 +7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 +7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 +7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 +2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 +2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 +1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 +4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 +3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 +3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 +4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 +4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 +2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 +2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 +7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 +4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 +7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 +2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 +1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 +3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 +4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 +2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 + 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 +2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 +1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 +2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 +2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 +4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 +7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 +1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 +3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 +7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 +1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 +8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 +2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 +8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 +2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 +2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 +8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 +8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 +8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 + 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 +8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 +4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 +3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 +8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 +1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 +8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 + 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 +1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 + 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 +4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 +1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 +4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 +1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 + 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 +3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 +4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 +8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 + 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 +3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 + 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 +2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 +#Everything below is of no interest for detection purpose +2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, # 8118 +2493,3016,3734,8123,8124,2192,8125,8126,2162,8127,8128,8129,8130,8131,8132,8133, # 8134 +8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,8144,8145,8146,8147,8148,8149, # 8150 +8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,8160,8161,8162,8163,8164,8165, # 8166 +8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181, # 8182 +8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197, # 8198 +8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213, # 8214 +8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229, # 8230 +8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245, # 8246 +8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,8256,8257,8258,8259,8260,8261, # 8262 +8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,8272,8273,8274,8275,8276,8277, # 8278 +8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,8290,8291,8292,8293, # 8294 +8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,8308,8309, # 8310 +8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322,8323,8324,8325, # 8326 +8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337,8338,8339,8340,8341, # 8342 +8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353,8354,8355,8356,8357, # 8358 +8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,8368,8369,8370,8371,8372,8373, # 8374 +8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,8384,8385,8386,8387,8388,8389, # 8390 +8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,8400,8401,8402,8403,8404,8405, # 8406 +8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,8416,8417,8418,8419,8420,8421, # 8422 +8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,8432,8433,8434,8435,8436,8437, # 8438 +8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,8448,8449,8450,8451,8452,8453, # 8454 +8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,8464,8465,8466,8467,8468,8469, # 8470 +8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,8480,8481,8482,8483,8484,8485, # 8486 +8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501, # 8502 +8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517, # 8518 +8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533, # 8534 +8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549, # 8550 +8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,8565, # 8566 +8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,8576,8577,8578,8579,8580,8581, # 8582 +8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597, # 8598 +8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,8608,8609,8610,8611,8612,8613, # 8614 +8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,8624,8625,8626,8627,8628,8629, # 8630 +8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8644,8645, # 8646 +8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,8657,8658,8659,8660,8661, # 8662 +8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672,8673,8674,8675,8676,8677, # 8678 +8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, # 8694 +8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710 +8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726 +8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742 diff --git a/fanficdownloader/chardet/euctwprober.py b/fanficdownloader/chardet/euctwprober.py new file mode 100644 index 00000000..b073f134 --- /dev/null +++ b/fanficdownloader/chardet/euctwprober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import EUCTWDistributionAnalysis +from mbcssm import EUCTWSMModel + +class EUCTWProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCTWSMModel) + self._mDistributionAnalyzer = EUCTWDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "EUC-TW" diff --git a/fanficdownloader/chardet/gb2312freq.py b/fanficdownloader/chardet/gb2312freq.py new file mode 100644 index 00000000..7a4d5a1b --- /dev/null +++ b/fanficdownloader/chardet/gb2312freq.py @@ -0,0 +1,471 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# GB2312 most frequently used character table +# +# Char to FreqOrder table , from hz6763 + +# 512 --> 0.79 -- 0.79 +# 1024 --> 0.92 -- 0.13 +# 2048 --> 0.98 -- 0.06 +# 6768 --> 1.00 -- 0.02 +# +# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79 +# Random Distribution Ration = 512 / (3755 - 512) = 0.157 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR + +GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9 + +GB2312_TABLE_SIZE = 3760 + +GB2312CharToFreqOrder = ( \ +1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205, +2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842, +2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409, + 249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670, +1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820, +1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585, + 152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566, +1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575, +2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853, +3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061, + 544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155, +1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406, + 927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816, +2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606, + 360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023, +2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414, +1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513, +3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052, + 198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570, +1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575, + 253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250, +2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506, +1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26, +3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835, +1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686, +2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054, +1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894, + 585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105, +3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403, +3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694, + 252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873, +3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940, + 836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121, +1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648, +3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992, +2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233, +1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157, + 755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807, +1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094, +4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258, + 887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478, +3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152, +3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909, + 509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272, +1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221, +2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252, +1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301, +1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254, + 389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070, +3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461, +3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360, +4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124, + 296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535, +3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243, +1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713, +1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071, +4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442, + 215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946, + 814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257, +3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180, +1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427, + 602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781, +1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724, +2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937, + 930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943, + 432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789, + 396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552, +3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246, +4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451, +3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310, + 750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860, +2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297, +2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780, +2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745, + 776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936, +2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032, + 968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657, + 163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414, + 220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976, +3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436, +2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254, +2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536, +1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238, + 18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059, +2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741, + 90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447, + 286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601, +1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269, +1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894, + 915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173, + 681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994, +1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956, +2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437, +3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154, +2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240, +2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143, +2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634, +3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472, +1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541, +1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143, +2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312, +1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414, +3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754, +1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424, +1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302, +3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739, + 795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004, +2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484, +1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739, +4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535, +1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641, +1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307, +3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573, +1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533, + 47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965, + 504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99, +1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280, + 160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505, +1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012, +1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039, + 744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982, +3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530, +4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392, +3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656, +2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220, +2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766, +1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535, +3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728, +2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338, +1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627, +1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885, + 125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411, +2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671, +2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162, +3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774, +4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524, +3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346, + 180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040, +3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188, +2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280, +1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131, + 259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947, + 774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970, +3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814, +4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557, +2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997, +1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972, +1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369, + 766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376, +1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480, +3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610, + 955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128, + 642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769, +1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207, + 57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392, +1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623, + 193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782, +2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650, + 158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478, +2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773, +2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007, +1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323, +1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598, +2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961, + 819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302, +1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409, +1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683, +2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191, +2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616, +3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302, +1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774, +4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147, + 571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731, + 845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464, +3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377, +1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315, + 470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557, +3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903, +1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060, +4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261, +1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092, +2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810, +1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708, + 498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658, +1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871, +3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503, + 448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229, +2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112, + 136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504, +1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389, +1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27, +1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542, +3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861, +2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845, +3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700, +3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469, +3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582, + 996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999, +2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274, + 786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020, +2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601, + 12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628, +1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31, + 475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668, + 233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778, +1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169, +3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667, +3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881, +1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276, +1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320, +3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751, +2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432, +2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772, +1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843, +3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116, + 451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904, +4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652, +1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664, +2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770, +3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283, +3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626, +1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713, + 768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333, + 391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062, +2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555, + 931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014, +1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510, + 386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015, +1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459, +1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390, +1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238, +1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232, +1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624, + 381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189, + 852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, # last 512 +#Everything below is of no interest for detection purpose +5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636, +5509,3257,5510,5973,5445,5872,4941,4403,3174,4627,5873,6276,2286,4230,5446,5874, +5122,6102,6103,4162,5447,5123,5323,4849,6277,3980,3851,5066,4246,5774,5067,6278, +3001,2807,5695,3346,5775,5974,5158,5448,6487,5975,5976,5776,3598,6279,5696,4806, +4211,4154,6280,6488,6489,6490,6281,4212,5037,3374,4171,6491,4562,4807,4722,4827, +5977,6104,4532,4079,5159,5324,5160,4404,3858,5359,5875,3975,4288,4610,3486,4512, +5325,3893,5360,6282,6283,5560,2522,4231,5978,5186,5449,2569,3878,6284,5401,3578, +4415,6285,4656,5124,5979,2506,4247,4449,3219,3417,4334,4969,4329,6492,4576,4828, +4172,4416,4829,5402,6286,3927,3852,5361,4369,4830,4477,4867,5876,4173,6493,6105, +4657,6287,6106,5877,5450,6494,4155,4868,5451,3700,5629,4384,6288,6289,5878,3189, +4881,6107,6290,6495,4513,6496,4692,4515,4723,5100,3356,6497,6291,3810,4080,5561, +3570,4430,5980,6498,4355,5697,6499,4724,6108,6109,3764,4050,5038,5879,4093,3226, +6292,5068,5217,4693,3342,5630,3504,4831,4377,4466,4309,5698,4431,5777,6293,5778, +4272,3706,6110,5326,3752,4676,5327,4273,5403,4767,5631,6500,5699,5880,3475,5039, +6294,5562,5125,4348,4301,4482,4068,5126,4593,5700,3380,3462,5981,5563,3824,5404, +4970,5511,3825,4738,6295,6501,5452,4516,6111,5881,5564,6502,6296,5982,6503,4213, +4163,3454,6504,6112,4009,4450,6113,4658,6297,6114,3035,6505,6115,3995,4904,4739, +4563,4942,4110,5040,3661,3928,5362,3674,6506,5292,3612,4791,5565,4149,5983,5328, +5259,5021,4725,4577,4564,4517,4364,6298,5405,4578,5260,4594,4156,4157,5453,3592, +3491,6507,5127,5512,4709,4922,5984,5701,4726,4289,6508,4015,6116,5128,4628,3424, +4241,5779,6299,4905,6509,6510,5454,5702,5780,6300,4365,4923,3971,6511,5161,3270, +3158,5985,4100, 867,5129,5703,6117,5363,3695,3301,5513,4467,6118,6512,5455,4232, +4242,4629,6513,3959,4478,6514,5514,5329,5986,4850,5162,5566,3846,4694,6119,5456, +4869,5781,3779,6301,5704,5987,5515,4710,6302,5882,6120,4392,5364,5705,6515,6121, +6516,6517,3736,5988,5457,5989,4695,2457,5883,4551,5782,6303,6304,6305,5130,4971, +6122,5163,6123,4870,3263,5365,3150,4871,6518,6306,5783,5069,5706,3513,3498,4409, +5330,5632,5366,5458,5459,3991,5990,4502,3324,5991,5784,3696,4518,5633,4119,6519, +4630,5634,4417,5707,4832,5992,3418,6124,5993,5567,4768,5218,6520,4595,3458,5367, +6125,5635,6126,4202,6521,4740,4924,6307,3981,4069,4385,6308,3883,2675,4051,3834, +4302,4483,5568,5994,4972,4101,5368,6309,5164,5884,3922,6127,6522,6523,5261,5460, +5187,4164,5219,3538,5516,4111,3524,5995,6310,6311,5369,3181,3386,2484,5188,3464, +5569,3627,5708,6524,5406,5165,4677,4492,6312,4872,4851,5885,4468,5996,6313,5709, +5710,6128,2470,5886,6314,5293,4882,5785,3325,5461,5101,6129,5711,5786,6525,4906, +6526,6527,4418,5887,5712,4808,2907,3701,5713,5888,6528,3765,5636,5331,6529,6530, +3593,5889,3637,4943,3692,5714,5787,4925,6315,6130,5462,4405,6131,6132,6316,5262, +6531,6532,5715,3859,5716,5070,4696,5102,3929,5788,3987,4792,5997,6533,6534,3920, +4809,5000,5998,6535,2974,5370,6317,5189,5263,5717,3826,6536,3953,5001,4883,3190, +5463,5890,4973,5999,4741,6133,6134,3607,5570,6000,4711,3362,3630,4552,5041,6318, +6001,2950,2953,5637,4646,5371,4944,6002,2044,4120,3429,6319,6537,5103,4833,6538, +6539,4884,4647,3884,6003,6004,4758,3835,5220,5789,4565,5407,6540,6135,5294,4697, +4852,6320,6321,3206,4907,6541,6322,4945,6542,6136,6543,6323,6005,4631,3519,6544, +5891,6545,5464,3784,5221,6546,5571,4659,6547,6324,6137,5190,6548,3853,6549,4016, +4834,3954,6138,5332,3827,4017,3210,3546,4469,5408,5718,3505,4648,5790,5131,5638, +5791,5465,4727,4318,6325,6326,5792,4553,4010,4698,3439,4974,3638,4335,3085,6006, +5104,5042,5166,5892,5572,6327,4356,4519,5222,5573,5333,5793,5043,6550,5639,5071, +4503,6328,6139,6551,6140,3914,3901,5372,6007,5640,4728,4793,3976,3836,4885,6552, +4127,6553,4451,4102,5002,6554,3686,5105,6555,5191,5072,5295,4611,5794,5296,6556, +5893,5264,5894,4975,5466,5265,4699,4976,4370,4056,3492,5044,4886,6557,5795,4432, +4769,4357,5467,3940,4660,4290,6141,4484,4770,4661,3992,6329,4025,4662,5022,4632, +4835,4070,5297,4663,4596,5574,5132,5409,5895,6142,4504,5192,4664,5796,5896,3885, +5575,5797,5023,4810,5798,3732,5223,4712,5298,4084,5334,5468,6143,4052,4053,4336, +4977,4794,6558,5335,4908,5576,5224,4233,5024,4128,5469,5225,4873,6008,5045,4729, +4742,4633,3675,4597,6559,5897,5133,5577,5003,5641,5719,6330,6560,3017,2382,3854, +4406,4811,6331,4393,3964,4946,6561,2420,3722,6562,4926,4378,3247,1736,4442,6332, +5134,6333,5226,3996,2918,5470,4319,4003,4598,4743,4744,4485,3785,3902,5167,5004, +5373,4394,5898,6144,4874,1793,3997,6334,4085,4214,5106,5642,4909,5799,6009,4419, +4189,3330,5899,4165,4420,5299,5720,5227,3347,6145,4081,6335,2876,3930,6146,3293, +3786,3910,3998,5900,5300,5578,2840,6563,5901,5579,6147,3531,5374,6564,6565,5580, +4759,5375,6566,6148,3559,5643,6336,6010,5517,6337,6338,5721,5902,3873,6011,6339, +6567,5518,3868,3649,5722,6568,4771,4947,6569,6149,4812,6570,2853,5471,6340,6341, +5644,4795,6342,6012,5723,6343,5724,6013,4349,6344,3160,6150,5193,4599,4514,4493, +5168,4320,6345,4927,3666,4745,5169,5903,5005,4928,6346,5725,6014,4730,4203,5046, +4948,3395,5170,6015,4150,6016,5726,5519,6347,5047,3550,6151,6348,4197,4310,5904, +6571,5581,2965,6152,4978,3960,4291,5135,6572,5301,5727,4129,4026,5905,4853,5728, +5472,6153,6349,4533,2700,4505,5336,4678,3583,5073,2994,4486,3043,4554,5520,6350, +6017,5800,4487,6351,3931,4103,5376,6352,4011,4321,4311,4190,5136,6018,3988,3233, +4350,5906,5645,4198,6573,5107,3432,4191,3435,5582,6574,4139,5410,6353,5411,3944, +5583,5074,3198,6575,6354,4358,6576,5302,4600,5584,5194,5412,6577,6578,5585,5413, +5303,4248,5414,3879,4433,6579,4479,5025,4854,5415,6355,4760,4772,3683,2978,4700, +3797,4452,3965,3932,3721,4910,5801,6580,5195,3551,5907,3221,3471,3029,6019,3999, +5908,5909,5266,5267,3444,3023,3828,3170,4796,5646,4979,4259,6356,5647,5337,3694, +6357,5648,5338,4520,4322,5802,3031,3759,4071,6020,5586,4836,4386,5048,6581,3571, +4679,4174,4949,6154,4813,3787,3402,3822,3958,3215,3552,5268,4387,3933,4950,4359, +6021,5910,5075,3579,6358,4234,4566,5521,6359,3613,5049,6022,5911,3375,3702,3178, +4911,5339,4521,6582,6583,4395,3087,3811,5377,6023,6360,6155,4027,5171,5649,4421, +4249,2804,6584,2270,6585,4000,4235,3045,6156,5137,5729,4140,4312,3886,6361,4330, +6157,4215,6158,3500,3676,4929,4331,3713,4930,5912,4265,3776,3368,5587,4470,4855, +3038,4980,3631,6159,6160,4132,4680,6161,6362,3923,4379,5588,4255,6586,4121,6587, +6363,4649,6364,3288,4773,4774,6162,6024,6365,3543,6588,4274,3107,3737,5050,5803, +4797,4522,5589,5051,5730,3714,4887,5378,4001,4523,6163,5026,5522,4701,4175,2791, +3760,6589,5473,4224,4133,3847,4814,4815,4775,3259,5416,6590,2738,6164,6025,5304, +3733,5076,5650,4816,5590,6591,6165,6592,3934,5269,6593,3396,5340,6594,5804,3445, +3602,4042,4488,5731,5732,3525,5591,4601,5196,6166,6026,5172,3642,4612,3202,4506, +4798,6366,3818,5108,4303,5138,5139,4776,3332,4304,2915,3415,4434,5077,5109,4856, +2879,5305,4817,6595,5913,3104,3144,3903,4634,5341,3133,5110,5651,5805,6167,4057, +5592,2945,4371,5593,6596,3474,4182,6367,6597,6168,4507,4279,6598,2822,6599,4777, +4713,5594,3829,6169,3887,5417,6170,3653,5474,6368,4216,2971,5228,3790,4579,6369, +5733,6600,6601,4951,4746,4555,6602,5418,5475,6027,3400,4665,5806,6171,4799,6028, +5052,6172,3343,4800,4747,5006,6370,4556,4217,5476,4396,5229,5379,5477,3839,5914, +5652,5807,4714,3068,4635,5808,6173,5342,4192,5078,5419,5523,5734,6174,4557,6175, +4602,6371,6176,6603,5809,6372,5735,4260,3869,5111,5230,6029,5112,6177,3126,4681, +5524,5915,2706,3563,4748,3130,6178,4018,5525,6604,6605,5478,4012,4837,6606,4534, +4193,5810,4857,3615,5479,6030,4082,3697,3539,4086,5270,3662,4508,4931,5916,4912, +5811,5027,3888,6607,4397,3527,3302,3798,2775,2921,2637,3966,4122,4388,4028,4054, +1633,4858,5079,3024,5007,3982,3412,5736,6608,3426,3236,5595,3030,6179,3427,3336, +3279,3110,6373,3874,3039,5080,5917,5140,4489,3119,6374,5812,3405,4494,6031,4666, +4141,6180,4166,6032,5813,4981,6609,5081,4422,4982,4112,3915,5653,3296,3983,6375, +4266,4410,5654,6610,6181,3436,5082,6611,5380,6033,3819,5596,4535,5231,5306,5113, +6612,4952,5918,4275,3113,6613,6376,6182,6183,5814,3073,4731,4838,5008,3831,6614, +4888,3090,3848,4280,5526,5232,3014,5655,5009,5737,5420,5527,6615,5815,5343,5173, +5381,4818,6616,3151,4953,6617,5738,2796,3204,4360,2989,4281,5739,5174,5421,5197, +3132,5141,3849,5142,5528,5083,3799,3904,4839,5480,2880,4495,3448,6377,6184,5271, +5919,3771,3193,6034,6035,5920,5010,6036,5597,6037,6378,6038,3106,5422,6618,5423, +5424,4142,6619,4889,5084,4890,4313,5740,6620,3437,5175,5307,5816,4199,5198,5529, +5817,5199,5656,4913,5028,5344,3850,6185,2955,5272,5011,5818,4567,4580,5029,5921, +3616,5233,6621,6622,6186,4176,6039,6379,6380,3352,5200,5273,2908,5598,5234,3837, +5308,6623,6624,5819,4496,4323,5309,5201,6625,6626,4983,3194,3838,4167,5530,5922, +5274,6381,6382,3860,3861,5599,3333,4292,4509,6383,3553,5481,5820,5531,4778,6187, +3955,3956,4324,4389,4218,3945,4325,3397,2681,5923,4779,5085,4019,5482,4891,5382, +5383,6040,4682,3425,5275,4094,6627,5310,3015,5483,5657,4398,5924,3168,4819,6628, +5925,6629,5532,4932,4613,6041,6630,4636,6384,4780,4204,5658,4423,5821,3989,4683, +5822,6385,4954,6631,5345,6188,5425,5012,5384,3894,6386,4490,4104,6632,5741,5053, +6633,5823,5926,5659,5660,5927,6634,5235,5742,5824,4840,4933,4820,6387,4859,5928, +4955,6388,4143,3584,5825,5346,5013,6635,5661,6389,5014,5484,5743,4337,5176,5662, +6390,2836,6391,3268,6392,6636,6042,5236,6637,4158,6638,5744,5663,4471,5347,3663, +4123,5143,4293,3895,6639,6640,5311,5929,5826,3800,6189,6393,6190,5664,5348,3554, +3594,4749,4603,6641,5385,4801,6043,5827,4183,6642,5312,5426,4761,6394,5665,6191, +4715,2669,6643,6644,5533,3185,5427,5086,5930,5931,5386,6192,6044,6645,4781,4013, +5745,4282,4435,5534,4390,4267,6045,5746,4984,6046,2743,6193,3501,4087,5485,5932, +5428,4184,4095,5747,4061,5054,3058,3862,5933,5600,6646,5144,3618,6395,3131,5055, +5313,6396,4650,4956,3855,6194,3896,5202,4985,4029,4225,6195,6647,5828,5486,5829, +3589,3002,6648,6397,4782,5276,6649,6196,6650,4105,3803,4043,5237,5830,6398,4096, +3643,6399,3528,6651,4453,3315,4637,6652,3984,6197,5535,3182,3339,6653,3096,2660, +6400,6654,3449,5934,4250,4236,6047,6401,5831,6655,5487,3753,4062,5832,6198,6199, +6656,3766,6657,3403,4667,6048,6658,4338,2897,5833,3880,2797,3780,4326,6659,5748, +5015,6660,5387,4351,5601,4411,6661,3654,4424,5935,4339,4072,5277,4568,5536,6402, +6662,5238,6663,5349,5203,6200,5204,6201,5145,4536,5016,5056,4762,5834,4399,4957, +6202,6403,5666,5749,6664,4340,6665,5936,5177,5667,6666,6667,3459,4668,6404,6668, +6669,4543,6203,6670,4276,6405,4480,5537,6671,4614,5205,5668,6672,3348,2193,4763, +6406,6204,5937,5602,4177,5669,3419,6673,4020,6205,4443,4569,5388,3715,3639,6407, +6049,4058,6206,6674,5938,4544,6050,4185,4294,4841,4651,4615,5488,6207,6408,6051, +5178,3241,3509,5835,6208,4958,5836,4341,5489,5278,6209,2823,5538,5350,5206,5429, +6675,4638,4875,4073,3516,4684,4914,4860,5939,5603,5389,6052,5057,3237,5490,3791, +6676,6409,6677,4821,4915,4106,5351,5058,4243,5539,4244,5604,4842,4916,5239,3028, +3716,5837,5114,5605,5390,5940,5430,6210,4332,6678,5540,4732,3667,3840,6053,4305, +3408,5670,5541,6410,2744,5240,5750,6679,3234,5606,6680,5607,5671,3608,4283,4159, +4400,5352,4783,6681,6411,6682,4491,4802,6211,6412,5941,6413,6414,5542,5751,6683, +4669,3734,5942,6684,6415,5943,5059,3328,4670,4144,4268,6685,6686,6687,6688,4372, +3603,6689,5944,5491,4373,3440,6416,5543,4784,4822,5608,3792,4616,5838,5672,3514, +5391,6417,4892,6690,4639,6691,6054,5673,5839,6055,6692,6056,5392,6212,4038,5544, +5674,4497,6057,6693,5840,4284,5675,4021,4545,5609,6418,4454,6419,6213,4113,4472, +5314,3738,5087,5279,4074,5610,4959,4063,3179,4750,6058,6420,6214,3476,4498,4716, +5431,4960,4685,6215,5241,6694,6421,6216,6695,5841,5945,6422,3748,5946,5179,3905, +5752,5545,5947,4374,6217,4455,6423,4412,6218,4803,5353,6696,3832,5280,6219,4327, +4702,6220,6221,6059,4652,5432,6424,3749,4751,6425,5753,4986,5393,4917,5948,5030, +5754,4861,4733,6426,4703,6697,6222,4671,5949,4546,4961,5180,6223,5031,3316,5281, +6698,4862,4295,4934,5207,3644,6427,5842,5950,6428,6429,4570,5843,5282,6430,6224, +5088,3239,6060,6699,5844,5755,6061,6431,2701,5546,6432,5115,5676,4039,3993,3327, +4752,4425,5315,6433,3941,6434,5677,4617,4604,3074,4581,6225,5433,6435,6226,6062, +4823,5756,5116,6227,3717,5678,4717,5845,6436,5679,5846,6063,5847,6064,3977,3354, +6437,3863,5117,6228,5547,5394,4499,4524,6229,4605,6230,4306,4500,6700,5951,6065, +3693,5952,5089,4366,4918,6701,6231,5548,6232,6702,6438,4704,5434,6703,6704,5953, +4168,6705,5680,3420,6706,5242,4407,6066,3812,5757,5090,5954,4672,4525,3481,5681, +4618,5395,5354,5316,5955,6439,4962,6707,4526,6440,3465,4673,6067,6441,5682,6708, +5435,5492,5758,5683,4619,4571,4674,4804,4893,4686,5493,4753,6233,6068,4269,6442, +6234,5032,4705,5146,5243,5208,5848,6235,6443,4963,5033,4640,4226,6236,5849,3387, +6444,6445,4436,4437,5850,4843,5494,4785,4894,6709,4361,6710,5091,5956,3331,6237, +4987,5549,6069,6711,4342,3517,4473,5317,6070,6712,6071,4706,6446,5017,5355,6713, +6714,4988,5436,6447,4734,5759,6715,4735,4547,4456,4754,6448,5851,6449,6450,3547, +5852,5318,6451,6452,5092,4205,6716,6238,4620,4219,5611,6239,6072,4481,5760,5957, +5958,4059,6240,6453,4227,4537,6241,5761,4030,4186,5244,5209,3761,4457,4876,3337, +5495,5181,6242,5959,5319,5612,5684,5853,3493,5854,6073,4169,5613,5147,4895,6074, +5210,6717,5182,6718,3830,6243,2798,3841,6075,6244,5855,5614,3604,4606,5496,5685, +5118,5356,6719,6454,5960,5357,5961,6720,4145,3935,4621,5119,5962,4261,6721,6455, +4786,5963,4375,4582,6245,6246,6247,6076,5437,4877,5856,3376,4380,6248,4160,6722, +5148,6456,5211,6457,6723,4718,6458,6724,6249,5358,4044,3297,6459,6250,5857,5615, +5497,5245,6460,5498,6725,6251,6252,5550,3793,5499,2959,5396,6461,6462,4572,5093, +5500,5964,3806,4146,6463,4426,5762,5858,6077,6253,4755,3967,4220,5965,6254,4989, +5501,6464,4352,6726,6078,4764,2290,5246,3906,5438,5283,3767,4964,2861,5763,5094, +6255,6256,4622,5616,5859,5860,4707,6727,4285,4708,4824,5617,6257,5551,4787,5212, +4965,4935,4687,6465,6728,6466,5686,6079,3494,4413,2995,5247,5966,5618,6729,5967, +5764,5765,5687,5502,6730,6731,6080,5397,6467,4990,6258,6732,4538,5060,5619,6733, +4719,5688,5439,5018,5149,5284,5503,6734,6081,4607,6259,5120,3645,5861,4583,6260, +4584,4675,5620,4098,5440,6261,4863,2379,3306,4585,5552,5689,4586,5285,6735,4864, +6736,5286,6082,6737,4623,3010,4788,4381,4558,5621,4587,4896,3698,3161,5248,4353, +4045,6262,3754,5183,4588,6738,6263,6739,6740,5622,3936,6741,6468,6742,6264,5095, +6469,4991,5968,6743,4992,6744,6083,4897,6745,4256,5766,4307,3108,3968,4444,5287, +3889,4343,6084,4510,6085,4559,6086,4898,5969,6746,5623,5061,4919,5249,5250,5504, +5441,6265,5320,4878,3242,5862,5251,3428,6087,6747,4237,5624,5442,6266,5553,4539, +6748,2585,3533,5398,4262,6088,5150,4736,4438,6089,6267,5505,4966,6749,6268,6750, +6269,5288,5554,3650,6090,6091,4624,6092,5690,6751,5863,4270,5691,4277,5555,5864, +6752,5692,4720,4865,6470,5151,4688,4825,6753,3094,6754,6471,3235,4653,6755,5213, +5399,6756,3201,4589,5865,4967,6472,5866,6473,5019,3016,6757,5321,4756,3957,4573, +6093,4993,5767,4721,6474,6758,5625,6759,4458,6475,6270,6760,5556,4994,5214,5252, +6271,3875,5768,6094,5034,5506,4376,5769,6761,2120,6476,5253,5770,6762,5771,5970, +3990,5971,5557,5558,5772,6477,6095,2787,4641,5972,5121,6096,6097,6272,6763,3703, +5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978, +4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767) + diff --git a/fanficdownloader/chardet/gb2312prober.py b/fanficdownloader/chardet/gb2312prober.py new file mode 100644 index 00000000..91eb3925 --- /dev/null +++ b/fanficdownloader/chardet/gb2312prober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import GB2312DistributionAnalysis +from mbcssm import GB2312SMModel + +class GB2312Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(GB2312SMModel) + self._mDistributionAnalyzer = GB2312DistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "GB2312" diff --git a/fanficdownloader/chardet/hebrewprober.py b/fanficdownloader/chardet/hebrewprober.py new file mode 100644 index 00000000..a2b1eaa9 --- /dev/null +++ b/fanficdownloader/chardet/hebrewprober.py @@ -0,0 +1,269 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Shy Shalom +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from charsetprober import CharSetProber +import constants + +# This prober doesn't actually recognize a language or a charset. +# It is a helper prober for the use of the Hebrew model probers + +### General ideas of the Hebrew charset recognition ### +# +# Four main charsets exist in Hebrew: +# "ISO-8859-8" - Visual Hebrew +# "windows-1255" - Logical Hebrew +# "ISO-8859-8-I" - Logical Hebrew +# "x-mac-hebrew" - ?? Logical Hebrew ?? +# +# Both "ISO" charsets use a completely identical set of code points, whereas +# "windows-1255" and "x-mac-hebrew" are two different proper supersets of +# these code points. windows-1255 defines additional characters in the range +# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific +# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. +# x-mac-hebrew defines similar additional code points but with a different +# mapping. +# +# As far as an average Hebrew text with no diacritics is concerned, all four +# charsets are identical with respect to code points. Meaning that for the +# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters +# (including final letters). +# +# The dominant difference between these charsets is their directionality. +# "Visual" directionality means that the text is ordered as if the renderer is +# not aware of a BIDI rendering algorithm. The renderer sees the text and +# draws it from left to right. The text itself when ordered naturally is read +# backwards. A buffer of Visual Hebrew generally looks like so: +# "[last word of first line spelled backwards] [whole line ordered backwards +# and spelled backwards] [first word of first line spelled backwards] +# [end of line] [last word of second line] ... etc' " +# adding punctuation marks, numbers and English text to visual text is +# naturally also "visual" and from left to right. +# +# "Logical" directionality means the text is ordered "naturally" according to +# the order it is read. It is the responsibility of the renderer to display +# the text from right to left. A BIDI algorithm is used to place general +# punctuation marks, numbers and English text in the text. +# +# Texts in x-mac-hebrew are almost impossible to find on the Internet. From +# what little evidence I could find, it seems that its general directionality +# is Logical. +# +# To sum up all of the above, the Hebrew probing mechanism knows about two +# charsets: +# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are +# backwards while line order is natural. For charset recognition purposes +# the line order is unimportant (In fact, for this implementation, even +# word order is unimportant). +# Logical Hebrew - "windows-1255" - normal, naturally ordered text. +# +# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be +# specifically identified. +# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew +# that contain special punctuation marks or diacritics is displayed with +# some unconverted characters showing as question marks. This problem might +# be corrected using another model prober for x-mac-hebrew. Due to the fact +# that x-mac-hebrew texts are so rare, writing another model prober isn't +# worth the effort and performance hit. +# +#### The Prober #### +# +# The prober is divided between two SBCharSetProbers and a HebrewProber, +# all of which are managed, created, fed data, inquired and deleted by the +# SBCSGroupProber. The two SBCharSetProbers identify that the text is in +# fact some kind of Hebrew, Logical or Visual. The final decision about which +# one is it is made by the HebrewProber by combining final-letter scores +# with the scores of the two SBCharSetProbers to produce a final answer. +# +# The SBCSGroupProber is responsible for stripping the original text of HTML +# tags, English characters, numbers, low-ASCII punctuation characters, spaces +# and new lines. It reduces any sequence of such characters to a single space. +# The buffer fed to each prober in the SBCS group prober is pure text in +# high-ASCII. +# The two SBCharSetProbers (model probers) share the same language model: +# Win1255Model. +# The first SBCharSetProber uses the model normally as any other +# SBCharSetProber does, to recognize windows-1255, upon which this model was +# built. The second SBCharSetProber is told to make the pair-of-letter +# lookup in the language model backwards. This in practice exactly simulates +# a visual Hebrew model using the windows-1255 logical Hebrew model. +# +# The HebrewProber is not using any language model. All it does is look for +# final-letter evidence suggesting the text is either logical Hebrew or visual +# Hebrew. Disjointed from the model probers, the results of the HebrewProber +# alone are meaningless. HebrewProber always returns 0.00 as confidence +# since it never identifies a charset by itself. Instead, the pointer to the +# HebrewProber is passed to the model probers as a helper "Name Prober". +# When the Group prober receives a positive identification from any prober, +# it asks for the name of the charset identified. If the prober queried is a +# Hebrew model prober, the model prober forwards the call to the +# HebrewProber to make the final decision. In the HebrewProber, the +# decision is made according to the final-letters scores maintained and Both +# model probers scores. The answer is returned in the form of the name of the +# charset identified, either "windows-1255" or "ISO-8859-8". + +# windows-1255 / ISO-8859-8 code points of interest +FINAL_KAF = '\xea' +NORMAL_KAF = '\xeb' +FINAL_MEM = '\xed' +NORMAL_MEM = '\xee' +FINAL_NUN = '\xef' +NORMAL_NUN = '\xf0' +FINAL_PE = '\xf3' +NORMAL_PE = '\xf4' +FINAL_TSADI = '\xf5' +NORMAL_TSADI = '\xf6' + +# Minimum Visual vs Logical final letter score difference. +# If the difference is below this, don't rely solely on the final letter score distance. +MIN_FINAL_CHAR_DISTANCE = 5 + +# Minimum Visual vs Logical model score difference. +# If the difference is below this, don't rely at all on the model score distance. +MIN_MODEL_DISTANCE = 0.01 + +VISUAL_HEBREW_NAME = "ISO-8859-8" +LOGICAL_HEBREW_NAME = "windows-1255" + +class HebrewProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mLogicalProber = None + self._mVisualProber = None + self.reset() + + def reset(self): + self._mFinalCharLogicalScore = 0 + self._mFinalCharVisualScore = 0 + # The two last characters seen in the previous buffer, + # mPrev and mBeforePrev are initialized to space in order to simulate a word + # delimiter at the beginning of the data + self._mPrev = ' ' + self._mBeforePrev = ' ' + # These probers are owned by the group prober. + + def set_model_probers(self, logicalProber, visualProber): + self._mLogicalProber = logicalProber + self._mVisualProber = visualProber + + def is_final(self, c): + return c in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, FINAL_TSADI] + + def is_non_final(self, c): + # The normal Tsadi is not a good Non-Final letter due to words like + # 'lechotet' (to chat) containing an apostrophe after the tsadi. This + # apostrophe is converted to a space in FilterWithoutEnglishLetters causing + # the Non-Final tsadi to appear at an end of a word even though this is not + # the case in the original text. + # The letters Pe and Kaf rarely display a related behavior of not being a + # good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for + # example legally end with a Non-Final Pe or Kaf. However, the benefit of + # these letters as Non-Final letters outweighs the damage since these words + # are quite rare. + return c in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE] + + def feed(self, aBuf): + # Final letter analysis for logical-visual decision. + # Look for evidence that the received buffer is either logical Hebrew or + # visual Hebrew. + # The following cases are checked: + # 1) A word longer than 1 letter, ending with a final letter. This is an + # indication that the text is laid out "naturally" since the final letter + # really appears at the end. +1 for logical score. + # 2) A word longer than 1 letter, ending with a Non-Final letter. In normal + # Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with + # the Non-Final form of that letter. Exceptions to this rule are mentioned + # above in isNonFinal(). This is an indication that the text is laid out + # backwards. +1 for visual score + # 3) A word longer than 1 letter, starting with a final letter. Final letters + # should not appear at the beginning of a word. This is an indication that + # the text is laid out backwards. +1 for visual score. + # + # The visual score and logical score are accumulated throughout the text and + # are finally checked against each other in GetCharSetName(). + # No checking for final letters in the middle of words is done since that case + # is not an indication for either Logical or Visual text. + # + # We automatically filter out all 7-bit characters (replace them with spaces) + # so the word boundary detection works properly. [MAP] + + if self.get_state() == constants.eNotMe: + # Both model probers say it's not them. No reason to continue. + return constants.eNotMe + + aBuf = self.filter_high_bit_only(aBuf) + + for cur in aBuf: + if cur == ' ': + # We stand on a space - a word just ended + if self._mBeforePrev != ' ': + # next-to-last char was not a space so self._mPrev is not a 1 letter word + if self.is_final(self._mPrev): + # case (1) [-2:not space][-1:final letter][cur:space] + self._mFinalCharLogicalScore += 1 + elif self.is_non_final(self._mPrev): + # case (2) [-2:not space][-1:Non-Final letter][cur:space] + self._mFinalCharVisualScore += 1 + else: + # Not standing on a space + if (self._mBeforePrev == ' ') and (self.is_final(self._mPrev)) and (cur != ' '): + # case (3) [-2:space][-1:final letter][cur:not space] + self._mFinalCharVisualScore += 1 + self._mBeforePrev = self._mPrev + self._mPrev = cur + + # Forever detecting, till the end or until both model probers return eNotMe (handled above) + return constants.eDetecting + + def get_charset_name(self): + # Make the decision: is it Logical or Visual? + # If the final letter score distance is dominant enough, rely on it. + finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore + if finalsub >= MIN_FINAL_CHAR_DISTANCE: + return LOGICAL_HEBREW_NAME + if finalsub <= -MIN_FINAL_CHAR_DISTANCE: + return VISUAL_HEBREW_NAME + + # It's not dominant enough, try to rely on the model scores instead. + modelsub = self._mLogicalProber.get_confidence() - self._mVisualProber.get_confidence() + if modelsub > MIN_MODEL_DISTANCE: + return LOGICAL_HEBREW_NAME + if modelsub < -MIN_MODEL_DISTANCE: + return VISUAL_HEBREW_NAME + + # Still no good, back to final letter distance, maybe it'll save the day. + if finalsub < 0.0: + return VISUAL_HEBREW_NAME + + # (finalsub > 0 - Logical) or (don't know what to do) default to Logical. + return LOGICAL_HEBREW_NAME + + def get_state(self): + # Remain active as long as any of the model probers are active. + if (self._mLogicalProber.get_state() == constants.eNotMe) and \ + (self._mVisualProber.get_state() == constants.eNotMe): + return constants.eNotMe + return constants.eDetecting diff --git a/fanficdownloader/chardet/jisfreq.py b/fanficdownloader/chardet/jisfreq.py new file mode 100644 index 00000000..5fe4a5c3 --- /dev/null +++ b/fanficdownloader/chardet/jisfreq.py @@ -0,0 +1,567 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Sampling from about 20M text materials include literature and computer technology +# +# Japanese frequency table, applied to both S-JIS and EUC-JP +# They are sorted in order. + +# 128 --> 0.77094 +# 256 --> 0.85710 +# 512 --> 0.92635 +# 1024 --> 0.97130 +# 2048 --> 0.99431 +# +# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58 +# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191 +# +# Typical Distribution Ratio, 25% of IDR + +JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0 + +# Char to FreqOrder table , +JIS_TABLE_SIZE = 4368 + +JISCharToFreqOrder = ( \ + 40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16 +3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32 +1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48 +2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64 +2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80 +5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96 +1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112 +5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128 +5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144 +5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160 +5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176 +5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192 +5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208 +1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224 +1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240 +1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256 +2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272 +3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288 +3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304 + 4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320 + 12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336 +1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352 + 109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368 +5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384 + 271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400 + 32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416 + 43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432 + 280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448 + 54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464 +5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480 +5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496 +5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512 +4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528 +5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544 +5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560 +5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576 +5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592 +5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608 +5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624 +5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640 +5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656 +5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672 +3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688 +5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704 +5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720 +5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736 +5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752 +5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768 +5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784 +5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800 +5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816 +5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832 +5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848 +5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864 +5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880 +5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896 +5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912 +5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928 +5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944 +5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960 +5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976 +5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992 +5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008 +5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024 +5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040 +5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056 +5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072 +5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088 +5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104 +5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120 +5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136 +5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152 +5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168 +5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184 +5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200 +5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216 +5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232 +5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248 +5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264 +5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280 +5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296 +6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312 +6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328 +6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344 +6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360 +6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376 +6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392 +6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408 +6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424 +4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440 + 854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456 + 665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472 +1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488 +1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504 + 896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520 +3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536 +3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552 + 804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568 +3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584 +3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600 + 586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616 +2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632 + 277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648 +3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664 +1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680 + 380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696 +1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712 + 850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728 +2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744 +2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760 +2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776 +2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792 +1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808 +1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824 +1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840 +1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856 +2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872 +1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888 +2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904 +1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920 +1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936 +1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952 +1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968 +1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984 +1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000 + 606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016 + 684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032 +1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048 +2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064 +2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080 +2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096 +3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112 +3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128 + 884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144 +3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160 +1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176 + 861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192 +2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208 +1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224 + 576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240 +3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256 +4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272 +2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288 +1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304 +2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320 +1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336 + 385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352 + 178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368 +1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384 +2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400 +2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416 +2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432 +3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448 +1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464 +2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480 + 359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496 + 837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512 + 855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528 +1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544 +2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560 + 633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576 +1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592 +1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608 + 353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624 +1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640 +1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656 +1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672 + 764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688 +2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704 + 278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720 +2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736 +3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752 +2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768 +1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784 +6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800 +1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816 +2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832 +1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848 + 470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864 + 72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880 +3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896 +3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912 +1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928 +1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944 +1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960 +1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976 + 123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992 + 913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008 +2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024 + 900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040 +3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056 +2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072 + 423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088 +1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104 +2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120 + 220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136 +1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152 + 745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168 +4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184 +2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200 +1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216 + 666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232 +1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248 +2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264 + 376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280 +6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296 +1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312 +1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328 +2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344 +3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360 + 914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376 +3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392 +1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408 + 674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424 +1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440 + 199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456 +3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472 + 370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488 +2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504 + 414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520 +4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536 +2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552 +1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568 +1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584 +1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600 + 166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616 +1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632 +3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648 +1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664 +3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680 + 264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696 + 543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712 + 983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728 +2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744 +1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760 + 867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776 +1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792 + 894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808 +1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824 + 530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840 + 839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856 + 480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872 +1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888 +1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904 +2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920 +4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936 + 227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952 +1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968 + 328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984 +1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000 +3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016 +1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032 +2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048 +2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064 +1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080 +1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096 +2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112 + 455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128 +2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144 +1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160 +1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176 +1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192 +1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208 +3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224 +2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240 +2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256 + 575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272 +3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288 +3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304 +1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320 +2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336 +1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352 +2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512 +#Everything below is of no interest for detection purpose +2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, # 4384 +6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, # 4400 +6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, # 4416 +6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, # 4432 +6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, # 4448 +4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, # 4464 +4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, # 4480 +3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, # 4496 +3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, # 4512 +4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, # 4528 +3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, # 4544 +6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, # 4560 +4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, # 4576 +6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, # 4592 +6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, # 4608 +6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, # 4624 +6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, # 4640 +6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, # 4656 +6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, # 4672 +3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, # 4688 +3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, # 4704 +6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, # 4720 +2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, # 4736 +4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, # 4752 +4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, # 4768 +4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, # 4784 +6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, # 4800 +3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, # 4816 +4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, # 4832 +4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, # 4848 +6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, # 4864 +4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, # 4880 +6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, # 4896 +3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, # 4912 +2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, # 4928 +4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, # 4944 +2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, # 4960 +6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, # 4976 +4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, # 4992 +6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, # 5008 +6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, # 5024 +6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, # 5040 +4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, # 5056 +6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, # 5072 +2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, # 5088 +6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, # 5104 +4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, # 5120 +6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, # 5136 +4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, # 5152 +4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, # 5168 +6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, # 5184 +6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, # 5200 +6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, # 5216 +3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, # 5232 +1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, # 5248 +3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, # 5264 +3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, # 5280 +4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, # 5296 +6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, # 5312 +3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, # 5328 +6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, # 5344 +3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, # 5360 +3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, # 5376 +2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, # 5392 +6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, # 5408 +6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, # 5424 +3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, # 5440 +6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, # 5456 +3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, # 5472 +6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, # 5488 +6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, # 5504 +6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, # 5520 +4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, # 5536 +6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, # 5552 +4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, # 5568 +3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, # 5584 +3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, # 5600 +6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, # 5616 +6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, # 5632 +4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, # 5648 +6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, # 5664 +6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, # 5680 +6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, # 5696 +6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, # 5712 +6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, # 5728 +6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, # 5744 +4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, # 5760 +4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, # 5776 +3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, # 5792 +6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, # 5808 +4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, # 5824 +2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, # 5840 +6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, # 5856 +6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, # 5872 +4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, # 5888 +2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, # 5904 +4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, # 5920 +2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, # 5936 +4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, # 5952 +4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, # 5968 +4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, # 5984 +6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, # 6000 +3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, # 6016 +6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, # 6032 +3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, # 6048 +6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, # 6064 +2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, # 6080 +3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, # 6096 +7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, # 6112 +2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, # 6128 +3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, # 6144 +3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, # 6160 +3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, # 6176 +3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, # 6192 +7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, # 6208 +7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, # 6224 +7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, # 6240 +7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, # 6256 +7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, # 6272 +4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, # 6288 +3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, # 6304 +3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, # 6320 +4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, # 6336 +3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, # 6352 +3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, # 6368 +7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, # 6384 +4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, # 6400 +7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, # 6416 +7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, # 6432 +7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, # 6448 +7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, # 6464 +7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, # 6480 +4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, # 6496 +4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, # 6512 +7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, # 6528 +3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, # 6544 +4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, # 6560 +7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, # 6576 +7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, # 6592 +4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, # 6608 +3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, # 6624 +3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, # 6640 +7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, # 6656 +4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, # 6672 +4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, # 6688 +4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, # 6704 +4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, # 6720 +4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, # 6736 +4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, # 6752 +7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, # 6768 +7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, # 6784 +7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, # 6800 +7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, # 6816 +7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, # 6832 +2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, # 6848 +3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, # 6864 +7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, # 6880 +7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, # 6896 +3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, # 6912 +4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, # 6928 +3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, # 6944 +3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, # 6960 +2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, # 6976 +7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, # 6992 +7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, # 7008 +4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, # 7024 +3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, # 7040 +3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, # 7056 +7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, # 7072 +7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, # 7088 +7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, # 7104 +4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, # 7120 +7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, # 7136 +2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, # 7152 +3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, # 7168 +4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, # 7184 +7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, # 7200 +4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, # 7216 +4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, # 7232 +7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, # 7248 +7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, # 7264 +5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, # 7280 +7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, # 7296 +7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, # 7312 +7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, # 7328 +7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, # 7344 +7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, # 7360 +5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, # 7376 +5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, # 7392 +7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, # 7408 +3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, # 7424 +7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, # 7440 +7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, # 7456 +3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, # 7472 +7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, # 7488 +7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, # 7504 +1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, # 7520 +3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, # 7536 +4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, # 7552 +2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, # 7568 +3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, # 7584 +2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, # 7600 +5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, # 7616 +4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, # 7632 +4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, # 7648 +5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, # 7664 +7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, # 7680 +7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, # 7696 +7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, # 7712 +7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, # 7728 +3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, # 7744 +7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, # 7760 +3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, # 7776 +7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, # 7792 +4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, # 7808 +7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, # 7824 +7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7840 +7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, # 7856 +7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, # 7872 +7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, # 7888 +7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, # 7904 +7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, # 7920 +7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, # 7936 +7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, # 7952 +7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, # 7968 +7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, # 7984 +7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, # 8000 +8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, # 8016 +8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, # 8032 +8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, # 8048 +8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, # 8064 +8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, # 8080 +8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, # 8096 +8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, # 8112 +8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, # 8128 +8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, # 8144 +8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, # 8160 +8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, # 8176 +8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, # 8192 +8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, # 8208 +8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, # 8224 +8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240 +8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256 +8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272 diff --git a/fanficdownloader/chardet/jpcntx.py b/fanficdownloader/chardet/jpcntx.py new file mode 100644 index 00000000..93db4a9c --- /dev/null +++ b/fanficdownloader/chardet/jpcntx.py @@ -0,0 +1,210 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +NUM_OF_CATEGORY = 6 +DONT_KNOW = -1 +ENOUGH_REL_THRESHOLD = 100 +MAX_REL_THRESHOLD = 1000 +MINIMUM_DATA_THRESHOLD = 4 + +# This is hiragana 2-char sequence table, the number in each cell represents its frequency category +jp2CharContext = ( \ +(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), +(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), +(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), +(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), +(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), +(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), +(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), +(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), +(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), +(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), +(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), +(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), +(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), +(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), +(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), +(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), +(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), +(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), +(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), +(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), +(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), +(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), +(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), +(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), +(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), +(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), +(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), +(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), +(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), +(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), +(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), +(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), +(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), +(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), +(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), +(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), +(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), +(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), +(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), +(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), +(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), +(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), +(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), +(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), +(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), +(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), +(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), +(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), +(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), +(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), +(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), +(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), +(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), +(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), +(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), +(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), +(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), +(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), +(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), +(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), +(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), +(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), +(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), +(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), +(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), +(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), +(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), +(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), +(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), +(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), +(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), +(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), +(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), +(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), +) + +class JapaneseContextAnalysis: + def __init__(self): + self.reset() + + def reset(self): + self._mTotalRel = 0 # total sequence received + self._mRelSample = [0] * NUM_OF_CATEGORY # category counters, each interger counts sequence in its category + self._mNeedToSkipCharNum = 0 # if last byte in current buffer is not the last byte of a character, we need to know how many bytes to skip in next buffer + self._mLastCharOrder = -1 # The order of previous char + self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made + + def feed(self, aBuf, aLen): + if self._mDone: return + + # The buffer we got is byte oriented, and a character may span in more than one + # buffers. In case the last one or two byte in last buffer is not complete, we + # record how many byte needed to complete that character and skip these bytes here. + # We can choose to record those bytes as well and analyse the character once it + # is complete, but since a character will not make much difference, by simply skipping + # this character will simply our logic and improve performance. + i = self._mNeedToSkipCharNum + while i < aLen: + order, charLen = self.get_order(aBuf[i:i+2]) + i += charLen + if i > aLen: + self._mNeedToSkipCharNum = i - aLen + self._mLastCharOrder = -1 + else: + if (order != -1) and (self._mLastCharOrder != -1): + self._mTotalRel += 1 + if self._mTotalRel > MAX_REL_THRESHOLD: + self._mDone = constants.True + break + self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1 + self._mLastCharOrder = order + + def got_enough_data(self): + return self._mTotalRel > ENOUGH_REL_THRESHOLD + + def get_confidence(self): + # This is just one way to calculate confidence. It works well for me. + if self._mTotalRel > MINIMUM_DATA_THRESHOLD: + return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel + else: + return DONT_KNOW + + def get_order(self, aStr): + return -1, 1 + +class SJISContextAnalysis(JapaneseContextAnalysis): + def get_order(self, aStr): + if not aStr: return -1, 1 + # find out current char's byte length + if ((aStr[0] >= '\x81') and (aStr[0] <= '\x9F')) or \ + ((aStr[0] >= '\xE0') and (aStr[0] <= '\xFC')): + charLen = 2 + else: + charLen = 1 + + # return its order if it is hiragana + if len(aStr) > 1: + if (aStr[0] == '\202') and \ + (aStr[1] >= '\x9F') and \ + (aStr[1] <= '\xF1'): + return ord(aStr[1]) - 0x9F, charLen + + return -1, charLen + +class EUCJPContextAnalysis(JapaneseContextAnalysis): + def get_order(self, aStr): + if not aStr: return -1, 1 + # find out current char's byte length + if (aStr[0] == '\x8E') or \ + ((aStr[0] >= '\xA1') and (aStr[0] <= '\xFE')): + charLen = 2 + elif aStr[0] == '\x8F': + charLen = 3 + else: + charLen = 1 + + # return its order if it is hiragana + if len(aStr) > 1: + if (aStr[0] == '\xA4') and \ + (aStr[1] >= '\xA1') and \ + (aStr[1] <= '\xF3'): + return ord(aStr[1]) - 0xA1, charLen + + return -1, charLen diff --git a/fanficdownloader/chardet/langbulgarianmodel.py b/fanficdownloader/chardet/langbulgarianmodel.py new file mode 100644 index 00000000..bf5641e7 --- /dev/null +++ b/fanficdownloader/chardet/langbulgarianmodel.py @@ -0,0 +1,228 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +# this table is modified base on win1251BulgarianCharToOrderMap, so +# only number <64 is sure valid + +Latin5_BulgarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80 +210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90 + 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0 + 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0 +) + +win1251BulgarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80 +221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90 + 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0 + 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 96.9392% +# first 1024 sequences:3.0618% +# rest sequences: 0.2992% +# negative sequences: 0.0020% +BulgarianLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, +3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1, +0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0, +0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0, +0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0, +0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0, +0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3, +2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1, +3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2, +1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0, +3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1, +1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0, +2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2, +2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0, +3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2, +1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0, +2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2, +2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2, +1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0, +2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2, +2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0, +2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2, +1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0, +2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2, +1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0, +3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2, +1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0, +3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1, +1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0, +2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1, +1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0, +2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2, +1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0, +2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1, +1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2, +1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1, +0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2, +1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1, +1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1, +0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1, +1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +) + +Latin5BulgarianModel = { \ + 'charToOrderMap': Latin5_BulgarianCharToOrderMap, + 'precedenceMatrix': BulgarianLangModel, + 'mTypicalPositiveRatio': 0.969392, + 'keepEnglishLetter': constants.False, + 'charsetName': "ISO-8859-5" +} + +Win1251BulgarianModel = { \ + 'charToOrderMap': win1251BulgarianCharToOrderMap, + 'precedenceMatrix': BulgarianLangModel, + 'mTypicalPositiveRatio': 0.969392, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1251" +} diff --git a/fanficdownloader/chardet/langcyrillicmodel.py b/fanficdownloader/chardet/langcyrillicmodel.py new file mode 100644 index 00000000..e604cc73 --- /dev/null +++ b/fanficdownloader/chardet/langcyrillicmodel.py @@ -0,0 +1,329 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# KOI8-R language model +# Character Mapping Table: +KOI8R_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80 +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90 +223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0 +238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0 + 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0 + 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0 + 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0 + 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 +) + +win1251_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +) + +latin5_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +macCyrillic_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, +) + +IBM855_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, +206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, + 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, +220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229, +230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, + 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, + 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, +250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, +) + +IBM866_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 97.6601% +# first 1024 sequences: 2.3389% +# rest sequences: 0.1237% +# negative sequences: 0.0009% +RussianLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, +1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, +1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, +2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, +1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, +3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, +1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, +2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, +1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, +1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, +1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, +3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, +1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, +2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, +1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, +2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, +1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, +1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, +1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, +3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, +3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, +1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, +1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, +0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, +1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, +1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, +1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, +2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, +1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, +1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, +2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, +1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, +1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, +0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, +0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, +0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, +2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +) + +Koi8rModel = { \ + 'charToOrderMap': KOI8R_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "KOI8-R" +} + +Win1251CyrillicModel = { \ + 'charToOrderMap': win1251_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1251" +} + +Latin5CyrillicModel = { \ + 'charToOrderMap': latin5_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "ISO-8859-5" +} + +MacCyrillicModel = { \ + 'charToOrderMap': macCyrillic_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "MacCyrillic" +}; + +Ibm866Model = { \ + 'charToOrderMap': IBM866_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "IBM866" +} + +Ibm855Model = { \ + 'charToOrderMap': IBM855_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "IBM855" +} diff --git a/fanficdownloader/chardet/langgreekmodel.py b/fanficdownloader/chardet/langgreekmodel.py new file mode 100644 index 00000000..ec6d49e8 --- /dev/null +++ b/fanficdownloader/chardet/langgreekmodel.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin7_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +win1253_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.2851% +# first 1024 sequences:1.7001% +# rest sequences: 0.0359% +# negative sequences: 0.0148% +GreekLangModel = ( \ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, +2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, +2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, +2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, +0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, +3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, +2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, +0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, +0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, +0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, +0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, +0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, +0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, +0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, +0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, +0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, +0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, +0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, +0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, +0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, +0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, +0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, +0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, +0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin7GreekModel = { \ + 'charToOrderMap': Latin7_CharToOrderMap, + 'precedenceMatrix': GreekLangModel, + 'mTypicalPositiveRatio': 0.982851, + 'keepEnglishLetter': constants.False, + 'charsetName': "ISO-8859-7" +} + +Win1253GreekModel = { \ + 'charToOrderMap': win1253_CharToOrderMap, + 'precedenceMatrix': GreekLangModel, + 'mTypicalPositiveRatio': 0.982851, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1253" +} diff --git a/fanficdownloader/chardet/langhebrewmodel.py b/fanficdownloader/chardet/langhebrewmodel.py new file mode 100644 index 00000000..a8bcc65b --- /dev/null +++ b/fanficdownloader/chardet/langhebrewmodel.py @@ -0,0 +1,201 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Simon Montagu +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Shoshannah Forbes - original C code (?) +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Windows-1255 language model +# Character Mapping Table: +win1255_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40 + 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50 +253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60 + 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70 +124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, +215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, + 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, +106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, + 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, +238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, + 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, + 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.4004% +# first 1024 sequences: 1.5981% +# rest sequences: 0.087% +# negative sequences: 0.0015% +HebrewLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, +3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, +1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, +1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, +1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, +1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, +0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, +0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, +0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, +0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, +0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, +0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, +0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, +0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, +0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, +1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, +0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, +0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, +0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, +0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, +1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, +2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, +0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, +) + +Win1255HebrewModel = { \ + 'charToOrderMap': win1255_CharToOrderMap, + 'precedenceMatrix': HebrewLangModel, + 'mTypicalPositiveRatio': 0.984004, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1255" +} diff --git a/fanficdownloader/chardet/langhungarianmodel.py b/fanficdownloader/chardet/langhungarianmodel.py new file mode 100644 index 00000000..d635f03c --- /dev/null +++ b/fanficdownloader/chardet/langhungarianmodel.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin2_HungarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, +175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, + 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, + 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, +245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +win1250HungarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, +177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, + 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, + 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, +245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 94.7368% +# first 1024 sequences:5.2623% +# rest sequences: 0.8894% +# negative sequences: 0.0009% +HungarianLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, +3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, +0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, +1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, +1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, +3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, +2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, +2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, +2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, +2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, +1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, +1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, +3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, +1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, +1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, +2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, +2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, +2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, +3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, +1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, +2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, +1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, +2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, +2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, +1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, +0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, +2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, +2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, +2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, +2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, +2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, +0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +) + +Latin2HungarianModel = { \ + 'charToOrderMap': Latin2_HungarianCharToOrderMap, + 'precedenceMatrix': HungarianLangModel, + 'mTypicalPositiveRatio': 0.947368, + 'keepEnglishLetter': constants.True, + 'charsetName': "ISO-8859-2" +} + +Win1250HungarianModel = { \ + 'charToOrderMap': win1250HungarianCharToOrderMap, + 'precedenceMatrix': HungarianLangModel, + 'mTypicalPositiveRatio': 0.947368, + 'keepEnglishLetter': constants.True, + 'charsetName': "windows-1250" +} diff --git a/fanficdownloader/chardet/langthaimodel.py b/fanficdownloader/chardet/langthaimodel.py new file mode 100644 index 00000000..96ec054f --- /dev/null +++ b/fanficdownloader/chardet/langthaimodel.py @@ -0,0 +1,200 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# The following result for thai was collected from a limited sample (1M). + +# Character Mapping Table: +TIS620CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40 +188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50 +253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60 + 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70 +209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, +223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, +236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, + 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, + 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, + 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, + 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, + 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 92.6386% +# first 1024 sequences:7.3177% +# rest sequences: 1.0230% +# negative sequences: 0.0436% +ThaiLangModel = ( \ +0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, +0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, +3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, +0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, +3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, +3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, +3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, +3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, +2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, +3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, +1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, +3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, +1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, +0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, +0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, +2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, +0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, +3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, +2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, +3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, +2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, +3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, +3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, +3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, +3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, +1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, +0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, +0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, +3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, +3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, +1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, +3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, +3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, +0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, +0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, +1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, +1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, +3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, +0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, +3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, +0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, +0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, +0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, +0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, +0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, +0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, +0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, +3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, +2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, +3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, +1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +TIS620ThaiModel = { \ + 'charToOrderMap': TIS620CharToOrderMap, + 'precedenceMatrix': ThaiLangModel, + 'mTypicalPositiveRatio': 0.926386, + 'keepEnglishLetter': constants.False, + 'charsetName': "TIS-620" +} diff --git a/fanficdownloader/chardet/latin1prober.py b/fanficdownloader/chardet/latin1prober.py new file mode 100644 index 00000000..b46129ba --- /dev/null +++ b/fanficdownloader/chardet/latin1prober.py @@ -0,0 +1,136 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from charsetprober import CharSetProber +import constants +import operator + +FREQ_CAT_NUM = 4 + +UDF = 0 # undefined +OTH = 1 # other +ASC = 2 # ascii capital letter +ASS = 3 # ascii small letter +ACV = 4 # accent capital vowel +ACO = 5 # accent capital other +ASV = 6 # accent small vowel +ASO = 7 # accent small other +CLASS_NUM = 8 # total classes + +Latin1_CharToClass = ( \ + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F + OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 + ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F + OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 + ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F + OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 + OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F + UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 + OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF + ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 + ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF + ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 + ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF + ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 + ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF + ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 + ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF +) + +# 0 : illegal +# 1 : very unlikely +# 2 : normal +# 3 : very likely +Latin1ClassModel = ( \ +# UDF OTH ASC ASS ACV ACO ASV ASO + 0, 0, 0, 0, 0, 0, 0, 0, # UDF + 0, 3, 3, 3, 3, 3, 3, 3, # OTH + 0, 3, 3, 3, 3, 3, 3, 3, # ASC + 0, 3, 3, 3, 1, 1, 3, 3, # ASS + 0, 3, 3, 3, 1, 2, 1, 2, # ACV + 0, 3, 3, 3, 3, 3, 3, 3, # ACO + 0, 3, 1, 3, 1, 1, 1, 3, # ASV + 0, 3, 1, 3, 1, 1, 3, 3, # ASO +) + +class Latin1Prober(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self.reset() + + def reset(self): + self._mLastCharClass = OTH + self._mFreqCounter = [0] * FREQ_CAT_NUM + CharSetProber.reset(self) + + def get_charset_name(self): + return "windows-1252" + + def feed(self, aBuf): + aBuf = self.filter_with_english_letters(aBuf) + for c in aBuf: + charClass = Latin1_CharToClass[ord(c)] + freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + charClass] + if freq == 0: + self._mState = constants.eNotMe + break + self._mFreqCounter[freq] += 1 + self._mLastCharClass = charClass + + return self.get_state() + + def get_confidence(self): + if self.get_state() == constants.eNotMe: + return 0.01 + + total = reduce(operator.add, self._mFreqCounter) + if total < 0.01: + confidence = 0.0 + else: + confidence = (self._mFreqCounter[3] / total) - (self._mFreqCounter[1] * 20.0 / total) + if confidence < 0.0: + confidence = 0.0 + # lower the confidence of latin1 so that other more accurate detector + # can take priority. + confidence = confidence * 0.5 + return confidence diff --git a/fanficdownloader/chardet/mbcharsetprober.py b/fanficdownloader/chardet/mbcharsetprober.py new file mode 100644 index 00000000..a8131445 --- /dev/null +++ b/fanficdownloader/chardet/mbcharsetprober.py @@ -0,0 +1,82 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from constants import eStart, eError, eItsMe +from charsetprober import CharSetProber + +class MultiByteCharSetProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mDistributionAnalyzer = None + self._mCodingSM = None + self._mLastChar = ['\x00', '\x00'] + + def reset(self): + CharSetProber.reset(self) + if self._mCodingSM: + self._mCodingSM.reset() + if self._mDistributionAnalyzer: + self._mDistributionAnalyzer.reset() + self._mLastChar = ['\x00', '\x00'] + + def get_charset_name(self): + pass + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n') + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if self._mDistributionAnalyzer.got_enough_data() and \ + (self.get_confidence() > constants.SHORTCUT_THRESHOLD): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + return self._mDistributionAnalyzer.get_confidence() diff --git a/fanficdownloader/chardet/mbcsgroupprober.py b/fanficdownloader/chardet/mbcsgroupprober.py new file mode 100644 index 00000000..941cc3e3 --- /dev/null +++ b/fanficdownloader/chardet/mbcsgroupprober.py @@ -0,0 +1,50 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from charsetgroupprober import CharSetGroupProber +from utf8prober import UTF8Prober +from sjisprober import SJISProber +from eucjpprober import EUCJPProber +from gb2312prober import GB2312Prober +from euckrprober import EUCKRProber +from big5prober import Big5Prober +from euctwprober import EUCTWProber + +class MBCSGroupProber(CharSetGroupProber): + def __init__(self): + CharSetGroupProber.__init__(self) + self._mProbers = [ \ + UTF8Prober(), + SJISProber(), + EUCJPProber(), + GB2312Prober(), + EUCKRProber(), + Big5Prober(), + EUCTWProber()] + self.reset() diff --git a/fanficdownloader/chardet/mbcssm.py b/fanficdownloader/chardet/mbcssm.py new file mode 100644 index 00000000..e46c1ffe --- /dev/null +++ b/fanficdownloader/chardet/mbcssm.py @@ -0,0 +1,514 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from constants import eStart, eError, eItsMe + +# BIG5 + +BIG5_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 4,4,4,4,4,4,4,4, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 4,3,3,3,3,3,3,3, # a0 - a7 + 3,3,3,3,3,3,3,3, # a8 - af + 3,3,3,3,3,3,3,3, # b0 - b7 + 3,3,3,3,3,3,3,3, # b8 - bf + 3,3,3,3,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0) # f8 - ff + +BIG5_st = ( \ + eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 + eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f + eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart)#10-17 + +Big5CharLenTable = (0, 1, 1, 2, 0) + +Big5SMModel = {'classTable': BIG5_cls, + 'classFactor': 5, + 'stateTable': BIG5_st, + 'charLenTable': Big5CharLenTable, + 'name': 'Big5'} + +# EUC-JP + +EUCJP_cls = ( \ + 4,4,4,4,4,4,4,4, # 00 - 07 + 4,4,4,4,4,4,5,5, # 08 - 0f + 4,4,4,4,4,4,4,4, # 10 - 17 + 4,4,4,5,4,4,4,4, # 18 - 1f + 4,4,4,4,4,4,4,4, # 20 - 27 + 4,4,4,4,4,4,4,4, # 28 - 2f + 4,4,4,4,4,4,4,4, # 30 - 37 + 4,4,4,4,4,4,4,4, # 38 - 3f + 4,4,4,4,4,4,4,4, # 40 - 47 + 4,4,4,4,4,4,4,4, # 48 - 4f + 4,4,4,4,4,4,4,4, # 50 - 57 + 4,4,4,4,4,4,4,4, # 58 - 5f + 4,4,4,4,4,4,4,4, # 60 - 67 + 4,4,4,4,4,4,4,4, # 68 - 6f + 4,4,4,4,4,4,4,4, # 70 - 77 + 4,4,4,4,4,4,4,4, # 78 - 7f + 5,5,5,5,5,5,5,5, # 80 - 87 + 5,5,5,5,5,5,1,3, # 88 - 8f + 5,5,5,5,5,5,5,5, # 90 - 97 + 5,5,5,5,5,5,5,5, # 98 - 9f + 5,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,0,5) # f8 - ff + +EUCJP_st = ( \ + 3, 4, 3, 5,eStart,eError,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 + eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f + 3,eError,eError,eError,eStart,eStart,eStart,eStart)#20-27 + +EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) + +EUCJPSMModel = {'classTable': EUCJP_cls, + 'classFactor': 6, + 'stateTable': EUCJP_st, + 'charLenTable': EUCJPCharLenTable, + 'name': 'EUC-JP'} + +# EUC-KR + +EUCKR_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,3,3,3, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,3,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 2,2,2,2,2,2,2,2, # e0 - e7 + 2,2,2,2,2,2,2,2, # e8 - ef + 2,2,2,2,2,2,2,2, # f0 - f7 + 2,2,2,2,2,2,2,0) # f8 - ff + +EUCKR_st = ( + eError,eStart, 3,eError,eError,eError,eError,eError,#00-07 + eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart)#08-0f + +EUCKRCharLenTable = (0, 1, 2, 0) + +EUCKRSMModel = {'classTable': EUCKR_cls, + 'classFactor': 4, + 'stateTable': EUCKR_st, + 'charLenTable': EUCKRCharLenTable, + 'name': 'EUC-KR'} + +# EUC-TW + +EUCTW_cls = ( \ + 2,2,2,2,2,2,2,2, # 00 - 07 + 2,2,2,2,2,2,0,0, # 08 - 0f + 2,2,2,2,2,2,2,2, # 10 - 17 + 2,2,2,0,2,2,2,2, # 18 - 1f + 2,2,2,2,2,2,2,2, # 20 - 27 + 2,2,2,2,2,2,2,2, # 28 - 2f + 2,2,2,2,2,2,2,2, # 30 - 37 + 2,2,2,2,2,2,2,2, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,2, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,6,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,3,4,4,4,4,4,4, # a0 - a7 + 5,5,1,1,1,1,1,1, # a8 - af + 1,1,1,1,1,1,1,1, # b0 - b7 + 1,1,1,1,1,1,1,1, # b8 - bf + 1,1,3,1,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0) # f8 - ff + +EUCTW_st = ( \ + eError,eError,eStart, 3, 3, 3, 4,eError,#00-07 + eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 + eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f + 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 + eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f + +EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) + +EUCTWSMModel = {'classTable': EUCTW_cls, + 'classFactor': 7, + 'stateTable': EUCTW_st, + 'charLenTable': EUCTWCharLenTable, + 'name': 'x-euc-tw'} + +# GB2312 + +GB2312_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 3,3,3,3,3,3,3,3, # 30 - 37 + 3,3,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,4, # 78 - 7f + 5,6,6,6,6,6,6,6, # 80 - 87 + 6,6,6,6,6,6,6,6, # 88 - 8f + 6,6,6,6,6,6,6,6, # 90 - 97 + 6,6,6,6,6,6,6,6, # 98 - 9f + 6,6,6,6,6,6,6,6, # a0 - a7 + 6,6,6,6,6,6,6,6, # a8 - af + 6,6,6,6,6,6,6,6, # b0 - b7 + 6,6,6,6,6,6,6,6, # b8 - bf + 6,6,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 6,6,6,6,6,6,6,6, # e0 - e7 + 6,6,6,6,6,6,6,6, # e8 - ef + 6,6,6,6,6,6,6,6, # f0 - f7 + 6,6,6,6,6,6,6,0) # f8 - ff + +GB2312_st = ( \ + eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07 + eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 + 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f + eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27 + eError,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f + +# To be accurate, the length of class 6 can be either 2 or 4. +# But it is not necessary to discriminate between the two since +# it is used for frequency analysis only, and we are validing +# each code range there as well. So it is safe to set it to be +# 2 here. +GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) + +GB2312SMModel = {'classTable': GB2312_cls, + 'classFactor': 7, + 'stateTable': GB2312_st, + 'charLenTable': GB2312CharLenTable, + 'name': 'GB2312'} + +# Shift_JIS + +SJIS_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 3,3,3,3,3,3,3,3, # 80 - 87 + 3,3,3,3,3,3,3,3, # 88 - 8f + 3,3,3,3,3,3,3,3, # 90 - 97 + 3,3,3,3,3,3,3,3, # 98 - 9f + #0xa0 is illegal in sjis encoding, but some pages does + #contain such byte. We need to be more error forgiven. + 2,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,4,4,4, # e8 - ef + 4,4,4,4,4,4,4,4, # f0 - f7 + 4,4,4,4,4,0,0,0) # f8 - ff + +SJIS_st = ( \ + eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart)#10-17 + +SJISCharLenTable = (0, 1, 1, 2, 0, 0) + +SJISSMModel = {'classTable': SJIS_cls, + 'classFactor': 6, + 'stateTable': SJIS_st, + 'charLenTable': SJISCharLenTable, + 'name': 'Shift_JIS'} + +# UCS2-BE + +UCS2BE_cls = ( \ + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5) # f8 - ff + +UCS2BE_st = ( \ + 5, 7, 7,eError, 4, 3,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17 + 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f + 6, 6, 6, 6, 5, 7, 7,eError,#20-27 + 5, 8, 6, 6,eError, 6, 6, 6,#28-2f + 6, 6, 6, 6,eError,eError,eStart,eStart)#30-37 + +UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) + +UCS2BESMModel = {'classTable': UCS2BE_cls, + 'classFactor': 6, + 'stateTable': UCS2BE_st, + 'charLenTable': UCS2BECharLenTable, + 'name': 'UTF-16BE'} + +# UCS2-LE + +UCS2LE_cls = ( \ + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5) # f8 - ff + +UCS2LE_st = ( \ + 6, 6, 7, 6, 4, 3,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17 + 5, 5, 5,eError, 5,eError, 6, 6,#18-1f + 7, 6, 8, 8, 5, 5, 5,eError,#20-27 + 5, 5, 5,eError,eError,eError, 5, 5,#28-2f + 5, 5, 5,eError, 5,eError,eStart,eStart)#30-37 + +UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) + +UCS2LESMModel = {'classTable': UCS2LE_cls, + 'classFactor': 6, + 'stateTable': UCS2LE_st, + 'charLenTable': UCS2LECharLenTable, + 'name': 'UTF-16LE'} + +# UTF-8 + +UTF8_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 2,2,2,2,3,3,3,3, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 5,5,5,5,5,5,5,5, # a0 - a7 + 5,5,5,5,5,5,5,5, # a8 - af + 5,5,5,5,5,5,5,5, # b0 - b7 + 5,5,5,5,5,5,5,5, # b8 - bf + 0,0,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 7,8,8,8,8,8,8,8, # e0 - e7 + 8,8,8,8,8,9,8,8, # e8 - ef + 10,11,11,11,11,11,11,11, # f0 - f7 + 12,13,13,13,14,15,0,0) # f8 - ff + +UTF8_st = ( \ + eError,eStart,eError,eError,eError,eError, 12, 10,#00-07 + 9, 11, 8, 7, 6, 5, 4, 3,#08-0f + eError,eError,eError,eError,eError,eError,eError,eError,#10-17 + eError,eError,eError,eError,eError,eError,eError,eError,#18-1f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f + eError,eError, 5, 5, 5, 5,eError,eError,#30-37 + eError,eError,eError,eError,eError,eError,eError,eError,#38-3f + eError,eError,eError, 5, 5, 5,eError,eError,#40-47 + eError,eError,eError,eError,eError,eError,eError,eError,#48-4f + eError,eError, 7, 7, 7, 7,eError,eError,#50-57 + eError,eError,eError,eError,eError,eError,eError,eError,#58-5f + eError,eError,eError,eError, 7, 7,eError,eError,#60-67 + eError,eError,eError,eError,eError,eError,eError,eError,#68-6f + eError,eError, 9, 9, 9, 9,eError,eError,#70-77 + eError,eError,eError,eError,eError,eError,eError,eError,#78-7f + eError,eError,eError,eError,eError, 9,eError,eError,#80-87 + eError,eError,eError,eError,eError,eError,eError,eError,#88-8f + eError,eError, 12, 12, 12, 12,eError,eError,#90-97 + eError,eError,eError,eError,eError,eError,eError,eError,#98-9f + eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7 + eError,eError,eError,eError,eError,eError,eError,eError,#a8-af + eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7 + eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf + eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 + eError,eError,eError,eError,eError,eError,eError,eError)#c8-cf + +UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) + +UTF8SMModel = {'classTable': UTF8_cls, + 'classFactor': 16, + 'stateTable': UTF8_st, + 'charLenTable': UTF8CharLenTable, + 'name': 'UTF-8'} diff --git a/fanficdownloader/chardet/sbcharsetprober.py b/fanficdownloader/chardet/sbcharsetprober.py new file mode 100644 index 00000000..da071163 --- /dev/null +++ b/fanficdownloader/chardet/sbcharsetprober.py @@ -0,0 +1,106 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from charsetprober import CharSetProber + +SAMPLE_SIZE = 64 +SB_ENOUGH_REL_THRESHOLD = 1024 +POSITIVE_SHORTCUT_THRESHOLD = 0.95 +NEGATIVE_SHORTCUT_THRESHOLD = 0.05 +SYMBOL_CAT_ORDER = 250 +NUMBER_OF_SEQ_CAT = 4 +POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 +#NEGATIVE_CAT = 0 + +class SingleByteCharSetProber(CharSetProber): + def __init__(self, model, reversed=constants.False, nameProber=None): + CharSetProber.__init__(self) + self._mModel = model + self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup + self._mNameProber = nameProber # Optional auxiliary prober for name decision + self.reset() + + def reset(self): + CharSetProber.reset(self) + self._mLastOrder = 255 # char order of last character + self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT + self._mTotalSeqs = 0 + self._mTotalChar = 0 + self._mFreqChar = 0 # characters that fall in our sampling range + + def get_charset_name(self): + if self._mNameProber: + return self._mNameProber.get_charset_name() + else: + return self._mModel['charsetName'] + + def feed(self, aBuf): + if not self._mModel['keepEnglishLetter']: + aBuf = self.filter_without_english_letters(aBuf) + aLen = len(aBuf) + if not aLen: + return self.get_state() + for c in aBuf: + order = self._mModel['charToOrderMap'][ord(c)] + if order < SYMBOL_CAT_ORDER: + self._mTotalChar += 1 + if order < SAMPLE_SIZE: + self._mFreqChar += 1 + if self._mLastOrder < SAMPLE_SIZE: + self._mTotalSeqs += 1 + if not self._mReversed: + self._mSeqCounters[self._mModel['precedenceMatrix'][(self._mLastOrder * SAMPLE_SIZE) + order]] += 1 + else: # reverse the order of the letters in the lookup + self._mSeqCounters[self._mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + self._mLastOrder]] += 1 + self._mLastOrder = order + + if self.get_state() == constants.eDetecting: + if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: + cf = self.get_confidence() + if cf > POSITIVE_SHORTCUT_THRESHOLD: + if constants._debug: + sys.stderr.write('%s confidence = %s, we have a winner\n' % (self._mModel['charsetName'], cf)) + self._mState = constants.eFoundIt + elif cf < NEGATIVE_SHORTCUT_THRESHOLD: + if constants._debug: + sys.stderr.write('%s confidence = %s, below negative shortcut threshhold %s\n' % (self._mModel['charsetName'], cf, NEGATIVE_SHORTCUT_THRESHOLD)) + self._mState = constants.eNotMe + + return self.get_state() + + def get_confidence(self): + r = 0.01 + if self._mTotalSeqs > 0: +# print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio'] + r = (1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs / self._mModel['mTypicalPositiveRatio'] +# print r, self._mFreqChar, self._mTotalChar + r = r * self._mFreqChar / self._mTotalChar + if r >= 1.0: + r = 0.99 + return r diff --git a/fanficdownloader/chardet/sbcsgroupprober.py b/fanficdownloader/chardet/sbcsgroupprober.py new file mode 100644 index 00000000..d19160c8 --- /dev/null +++ b/fanficdownloader/chardet/sbcsgroupprober.py @@ -0,0 +1,64 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from charsetgroupprober import CharSetGroupProber +from sbcharsetprober import SingleByteCharSetProber +from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model +from langgreekmodel import Latin7GreekModel, Win1253GreekModel +from langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel +from langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel +from langthaimodel import TIS620ThaiModel +from langhebrewmodel import Win1255HebrewModel +from hebrewprober import HebrewProber + +class SBCSGroupProber(CharSetGroupProber): + def __init__(self): + CharSetGroupProber.__init__(self) + self._mProbers = [ \ + SingleByteCharSetProber(Win1251CyrillicModel), + SingleByteCharSetProber(Koi8rModel), + SingleByteCharSetProber(Latin5CyrillicModel), + SingleByteCharSetProber(MacCyrillicModel), + SingleByteCharSetProber(Ibm866Model), + SingleByteCharSetProber(Ibm855Model), + SingleByteCharSetProber(Latin7GreekModel), + SingleByteCharSetProber(Win1253GreekModel), + SingleByteCharSetProber(Latin5BulgarianModel), + SingleByteCharSetProber(Win1251BulgarianModel), + SingleByteCharSetProber(Latin2HungarianModel), + SingleByteCharSetProber(Win1250HungarianModel), + SingleByteCharSetProber(TIS620ThaiModel), + ] + hebrewProber = HebrewProber() + logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.False, hebrewProber) + visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.True, hebrewProber) + hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) + self._mProbers.extend([hebrewProber, logicalHebrewProber, visualHebrewProber]) + + self.reset() diff --git a/fanficdownloader/chardet/sjisprober.py b/fanficdownloader/chardet/sjisprober.py new file mode 100644 index 00000000..fea2690c --- /dev/null +++ b/fanficdownloader/chardet/sjisprober.py @@ -0,0 +1,85 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import SJISDistributionAnalysis +from jpcntx import SJISContextAnalysis +from mbcssm import SJISSMModel +import constants, sys +from constants import eStart, eError, eItsMe + +class SJISProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(SJISSMModel) + self._mDistributionAnalyzer = SJISDistributionAnalysis() + self._mContextAnalyzer = SJISContextAnalysis() + self.reset() + + def reset(self): + MultiByteCharSetProber.reset(self) + self._mContextAnalyzer.reset() + + def get_charset_name(self): + return "SHIFT_JIS" + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n') + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mContextAnalyzer.feed(self._mLastChar[2 - charLen :], charLen) + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mContextAnalyzer.feed(aBuf[i + 1 - charLen : i + 3 - charLen], charLen) + self._mDistributionAnalyzer.feed(aBuf[i - 1 : i + 1], charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if self._mContextAnalyzer.got_enough_data() and \ + (self.get_confidence() > constants.SHORTCUT_THRESHOLD): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + contxtCf = self._mContextAnalyzer.get_confidence() + distribCf = self._mDistributionAnalyzer.get_confidence() + return max(contxtCf, distribCf) diff --git a/fanficdownloader/chardet/test.py b/fanficdownloader/chardet/test.py new file mode 100644 index 00000000..2ebf3a4d --- /dev/null +++ b/fanficdownloader/chardet/test.py @@ -0,0 +1,20 @@ +import sys, glob +sys.path.insert(0, '..') +from chardet.universaldetector import UniversalDetector + +count = 0 +u = UniversalDetector() +for f in glob.glob(sys.argv[1]): + print f.ljust(60), + u.reset() + for line in file(f, 'rb'): + u.feed(line) + if u.done: break + u.close() + result = u.result + if result['encoding']: + print result['encoding'], 'with confidence', result['confidence'] + else: + print '******** no result' + count += 1 +print count, 'tests' diff --git a/fanficdownloader/chardet/universaldetector.py b/fanficdownloader/chardet/universaldetector.py new file mode 100644 index 00000000..809df227 --- /dev/null +++ b/fanficdownloader/chardet/universaldetector.py @@ -0,0 +1,154 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from latin1prober import Latin1Prober # windows-1252 +from mbcsgroupprober import MBCSGroupProber # multi-byte character sets +from sbcsgroupprober import SBCSGroupProber # single-byte character sets +from escprober import EscCharSetProber # ISO-2122, etc. +import re + +MINIMUM_THRESHOLD = 0.20 +ePureAscii = 0 +eEscAscii = 1 +eHighbyte = 2 + +class UniversalDetector: + def __init__(self): + self._highBitDetector = re.compile(r'[\x80-\xFF]') + self._escDetector = re.compile(r'(\033|~{)') + self._mEscCharSetProber = None + self._mCharSetProbers = [] + self.reset() + + def reset(self): + self.result = {'encoding': None, 'confidence': 0.0} + self.done = constants.False + self._mStart = constants.True + self._mGotData = constants.False + self._mInputState = ePureAscii + self._mLastChar = '' + if self._mEscCharSetProber: + self._mEscCharSetProber.reset() + for prober in self._mCharSetProbers: + prober.reset() + + def feed(self, aBuf): + if self.done: return + + aLen = len(aBuf) + if not aLen: return + + if not self._mGotData: + # If the data starts with BOM, we know it is UTF + if aBuf[:3] == '\xEF\xBB\xBF': + # EF BB BF UTF-8 with BOM + self.result = {'encoding': "UTF-8", 'confidence': 1.0} + elif aBuf[:4] == '\xFF\xFE\x00\x00': + # FF FE 00 00 UTF-32, little-endian BOM + self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} + elif aBuf[:4] == '\x00\x00\xFE\xFF': + # 00 00 FE FF UTF-32, big-endian BOM + self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} + elif aBuf[:4] == '\xFE\xFF\x00\x00': + # FE FF 00 00 UCS-4, unusual octet order BOM (3412) + self.result = {'encoding': "X-ISO-10646-UCS-4-3412", 'confidence': 1.0} + elif aBuf[:4] == '\x00\x00\xFF\xFE': + # 00 00 FF FE UCS-4, unusual octet order BOM (2143) + self.result = {'encoding': "X-ISO-10646-UCS-4-2143", 'confidence': 1.0} + elif aBuf[:2] == '\xFF\xFE': + # FF FE UTF-16, little endian BOM + self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} + elif aBuf[:2] == '\xFE\xFF': + # FE FF UTF-16, big endian BOM + self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} + + self._mGotData = constants.True + if self.result['encoding'] and (self.result['confidence'] > 0.0): + self.done = constants.True + return + + if self._mInputState == ePureAscii: + if self._highBitDetector.search(aBuf): + self._mInputState = eHighbyte + elif (self._mInputState == ePureAscii) and self._escDetector.search(self._mLastChar + aBuf): + self._mInputState = eEscAscii + + self._mLastChar = aBuf[-1] + + if self._mInputState == eEscAscii: + if not self._mEscCharSetProber: + self._mEscCharSetProber = EscCharSetProber() + if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: + self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), + 'confidence': self._mEscCharSetProber.get_confidence()} + self.done = constants.True + elif self._mInputState == eHighbyte: + if not self._mCharSetProbers: + self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), Latin1Prober()] + for prober in self._mCharSetProbers: + if prober.feed(aBuf) == constants.eFoundIt: + self.result = {'encoding': prober.get_charset_name(), + 'confidence': prober.get_confidence()} + self.done = constants.True + break + + def close(self): + if self.done: return + if not self._mGotData: + if constants._debug: + sys.stderr.write('no data received!\n') + return + self.done = constants.True + + if self._mInputState == ePureAscii: + self.result = {'encoding': 'ascii', 'confidence': 1.0} + return self.result + + if self._mInputState == eHighbyte: + proberConfidence = None + maxProberConfidence = 0.0 + maxProber = None + for prober in self._mCharSetProbers: + if not prober: continue + proberConfidence = prober.get_confidence() + if proberConfidence > maxProberConfidence: + maxProberConfidence = proberConfidence + maxProber = prober + if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): + self.result = {'encoding': maxProber.get_charset_name(), + 'confidence': maxProber.get_confidence()} + return self.result + + if constants._debug: + sys.stderr.write('no probers hit minimum threshhold\n') + for prober in self._mCharSetProbers[0].mProbers: + if not prober: continue + sys.stderr.write('%s confidence = %s\n' % \ + (prober.get_charset_name(), \ + prober.get_confidence())) diff --git a/fanficdownloader/chardet/utf8prober.py b/fanficdownloader/chardet/utf8prober.py new file mode 100644 index 00000000..c1792bb3 --- /dev/null +++ b/fanficdownloader/chardet/utf8prober.py @@ -0,0 +1,76 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from constants import eStart, eError, eItsMe +from charsetprober import CharSetProber +from codingstatemachine import CodingStateMachine +from mbcssm import UTF8SMModel + +ONE_CHAR_PROB = 0.5 + +class UTF8Prober(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(UTF8SMModel) + self.reset() + + def reset(self): + CharSetProber.reset(self) + self._mCodingSM.reset() + self._mNumOfMBChar = 0 + + def get_charset_name(self): + return "utf-8" + + def feed(self, aBuf): + for c in aBuf: + codingState = self._mCodingSM.next_state(c) + if codingState == eError: + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + if self._mCodingSM.get_current_charlen() >= 2: + self._mNumOfMBChar += 1 + + if self.get_state() == constants.eDetecting: + if self.get_confidence() > constants.SHORTCUT_THRESHOLD: + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + unlike = 0.99 + if self._mNumOfMBChar < 6: + for i in range(0, self._mNumOfMBChar): + unlike = unlike * ONE_CHAR_PROB + return 1.0 - unlike + else: + return unlike diff --git a/fanficdownloader/configurable.py b/fanficdownloader/configurable.py new file mode 100644 index 00000000..bc27a82f --- /dev/null +++ b/fanficdownloader/configurable.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ConfigParser + +# All of the writers(epub,html,txt) and adapters(ffnet,twlt,etc) +# inherit from Configurable. The config file(s) uses ini format: +# [sections] with key:value settings. +# +# There's a [defaults] section which is overriden by the writer's +# section [epub], which is overriden by the adapter's section for each +# site. +# +# [defaults] +# titlepage_entries: category,genre, status +# [epub] +# titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated +# [www.whofic.com] +# titlepage_entries: category,genre, status,dateUpdated,rating + +class Configurable(object): + + def __init__(self, config): + self.config = config + self.sectionslist = ['defaults'] + + def addConfigSection(self,section): + self.sectionslist.insert(0,section) + + def getConfig(self, key): + val = "" + for section in self.sectionslist: + try: + val = self.config.get(section,key) + if val and val.lower() == "false": + val = False + #print "getConfig(%s)=[%s]%s" % (key,section,val) + return val + except (ConfigParser.NoOptionError, ConfigParser.NoSectionError), e: + pass + + return val + + # split and strip each. + def getConfigList(self, key): + vlist = self.getConfig(key).split(',') + vlist = [ v.strip() for v in vlist ] + #print "vlist("+key+"):"+str(vlist) + return vlist + diff --git a/fanficdownloader/exceptions.py b/fanficdownloader/exceptions.py new file mode 100644 index 00000000..cf8e558e --- /dev/null +++ b/fanficdownloader/exceptions.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## A few exceptions for different things for adapters + +class FailedToDownload(Exception): + def __init__(self,error): + self.error=error + + def __str__(self): + return self.error + +class InvalidStoryURL(Exception): + def __init__(self,url,domain,example): + self.url=url + self.domain=domain + self.example=example + + def __str__(self): + return "Bad Story URL: (%s) for site: (%s) Example: (%s)" % (self.url, self.domain, self.example) + +class FailedToLogin(Exception): + def __init__(self,url,username): + self.url=url + self.username=username + + def __str__(self): + return "Failed to Login for URL: (%s) with username: (%s)" % (self.url, self.username) + +class AdultCheckRequired(Exception): + def __init__(self,url): + self.url=url + + def __str__(self): + return "Story requires confirmation of adult status: (%s)" % self.url + +class StoryDoesNotExist(Exception): + def __init__(self,url): + self.url=url + + def __str__(self): + return "Story does not exist: (%s)" % self.url + +class UnknownSite(Exception): + def __init__(self,url,supported_sites_list): + self.url=url + self.supported_sites_list=supported_sites_list + + def __str__(self): + return "Unknown Site(%s). Supported sites: (%s)" % (self.url, ", ".join(self.supported_sites_list)) + diff --git a/fanficdownloader/html.py b/fanficdownloader/html.py new file mode 100644 index 00000000..e1ca7db5 --- /dev/null +++ b/fanficdownloader/html.py @@ -0,0 +1,126 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + +import re +import sys +import StringIO +import urllib + +from BeautifulSoup import BeautifulSoup + +class HtmlProcessor: + WHITESPACE_RE = re.compile(r'\s') + # Look for </blockquote <p> + BAD_TAG_RE = re.compile(r'<[^>]+<', re.MULTILINE) + + def __init__(self, html, unfill=0): + self.unfill = unfill + html = self._ProcessRawHtml(html) + self._soup = BeautifulSoup(html) + if self._soup.title: + self.title = self._soup.title.contents[0] + else: + self.title = None + + def _ProcessRawHtml(self, html): + new_html, count = HtmlProcessor.BAD_TAG_RE.subn('<', html) + if count: + print >>sys.stderr, 'Replaced %d bad tags' % count + return new_html + + def _StubInternalAnchors(self): + '''Replace each internal anchor with a fixed-size filepos anchor. + + Looks for every anchor with <a href="#myanchor"> and replaces that + with <a filepos="00000000050">. Stores anchors in self._anchor_references''' + self._anchor_references = [] + anchor_num = 0 + # anchor links + anchorlist = self._soup.findAll('a', href=re.compile('^#')) + # treat reference tags like a tags for TOCTOP. + anchorlist.extend(self._soup.findAll('reference', href=re.compile('^#'))) + for anchor in anchorlist: + self._anchor_references.append((anchor_num, anchor['href'])) + del anchor['href'] + anchor['filepos'] = '%.10d' % anchor_num + anchor_num += 1 + + def _ReplaceAnchorStubs(self): + # TODO: Browsers allow extra whitespace in the href names. + # use __str__ instead of prettify--it inserts extra spaces. + assembled_text = self._soup.__str__('utf8') + del self._soup # shouldn't touch this anymore + for anchor_num, original_ref in self._anchor_references: + ref = urllib.unquote(original_ref[1:]) # remove leading '#' + # Find the position of ref in the utf-8 document. + # TODO(chatham): Using regexes and looking for name= would be better. + newpos = assembled_text.rfind(ref.encode('utf-8')) + if newpos == -1: + print >>sys.stderr, 'Could not find anchor "%s"' % original_ref + continue + newpos += len(ref) + 2 # don't point into the middle of the <a name> tag + old_filepos = 'filepos="%.10d"' % anchor_num + new_filepos = 'filepos="%.10d"' % newpos + assert assembled_text.find(old_filepos) != -1 + assembled_text = assembled_text.replace(old_filepos, new_filepos, 1) + return assembled_text + + def _FixPreTags(self): + '''Replace <pre> tags with HTML-ified text.''' + pres = self._soup.findAll('pre') + for pre in pres: + pre.replaceWith(self._FixPreContents(str(pre.contents[0]))) + + def _FixPreContents(self, text): + if self.unfill: + line_splitter = '\n\n' + line_joiner = '<p>' + else: + line_splitter = '\n' + line_joiner = '<br>' + lines = [] + for line in text.split(line_splitter): + lines.append(self.WHITESPACE_RE.subn(' ', line)[0]) + return line_joiner.join(lines) + + def _RemoveUnsupported(self): + '''Remove any tags which the kindle cannot handle.''' + # TODO(chatham): <link> tags to script? + unsupported_tags = ('script', 'style') + for tag_type in unsupported_tags: + for element in self._soup.findAll(tag_type): + element.extract() + + def RenameAnchors(self, prefix): + '''Rename every internal anchor to have the given prefix, then + return the contents of the body tag.''' + for anchor in self._soup.findAll('a', href=re.compile('^#')): + anchor['href'] = '#' + prefix + anchor['href'][1:] + for a in self._soup.findAll('a'): + if a.get('name'): + a['name'] = prefix + a['name'] + + # TODO(chatham): figure out how to fix this. sometimes body comes out + # as NoneType. + content = [] + if self._soup.body is not None: + content = [unicode(c) for c in self._soup.body.contents] + return '\n'.join(content) + + def CleanHtml(self): + # TODO(chatham): fix_html_br, fix_html + self._RemoveUnsupported() + self._StubInternalAnchors() + self._FixPreTags() + return self._ReplaceAnchorStubs() + + +if __name__ == '__main__': + FILE ='/tmp/documentation.html' + #FILE = '/tmp/multipre.html' + FILE = '/tmp/view.html' + import codecs + d = open(FILE).read() + h = HtmlProcessor(d) + s = h.CleanHtml() + #print s diff --git a/fanficdownloader/html2text.py b/fanficdownloader/html2text.py new file mode 100644 index 00000000..ce6e1d3d --- /dev/null +++ b/fanficdownloader/html2text.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""html2text: Turn HTML into equivalent Markdown-structured text.""" +__version__ = "2.37" +__author__ = "Aaron Swartz (me@aaronsw.com)" +__copyright__ = "(C) 2004-2008 Aaron Swartz. GNU GPL 3." +__contributors__ = ["Martin 'Joey' Schulze", "Ricardo Reyes", "Kevin Jay North"] + +# TODO: +# Support decoded entities with unifiable. + +if not hasattr(__builtins__, 'True'): True, False = 1, 0 +import re, sys, urllib, htmlentitydefs, codecs, StringIO, types +import sgmllib +import urlparse +sgmllib.charref = re.compile('&#([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]') + +try: from textwrap import wrap +except: pass + +# Use Unicode characters instead of their ascii psuedo-replacements +UNICODE_SNOB = 0 + +# Put the links after each paragraph instead of at the end. +LINKS_EACH_PARAGRAPH = 0 + +# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.) +BODY_WIDTH = 78 + +# Don't show internal links (href="#local-anchor") -- corresponding link targets +# won't be visible in the plain text file anyway. +SKIP_INTERNAL_LINKS = False + +### Entity Nonsense ### + +def name2cp(k): + if k == 'apos': return ord("'") + if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 + return htmlentitydefs.name2codepoint[k] + else: + k = htmlentitydefs.entitydefs[k] + if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 + return ord(codecs.latin_1_decode(k)[0]) + +unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"', +'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*', +'ndash':'-', 'oelig':'oe', 'aelig':'ae', +'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a', +'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e', +'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i', +'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o', +'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u'} + +unifiable_n = {} + +for k in unifiable.keys(): + unifiable_n[name2cp(k)] = unifiable[k] + +def charref(name): + if name[0] in ['x','X']: + c = int(name[1:], 16) + else: + c = int(name) + + if not UNICODE_SNOB and c in unifiable_n.keys(): + return unifiable_n[c] + else: + return unichr(c) + +def entityref(c): + if not UNICODE_SNOB and c in unifiable.keys(): + return unifiable[c] + else: + try: name2cp(c) + except KeyError: return "&" + c + else: return unichr(name2cp(c)) + +def replaceEntities(s): + s = s.group(1) + if s[0] == "#": + return charref(s[1:]) + else: return entityref(s) + +r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") +def unescape(s): + return r_unescape.sub(replaceEntities, s) + +def fixattrs(attrs): + # Fix bug in sgmllib.py + if not attrs: return attrs + newattrs = [] + for attr in attrs: + newattrs.append((attr[0], unescape(attr[1]))) + return newattrs + +### End Entity Nonsense ### + +def onlywhite(line): + """Return true if the line does only consist of whitespace characters.""" + for c in line: + if c is not ' ' and c is not ' ': + return c is ' ' + return line + +def optwrap(text): + """Wrap all paragraphs in the provided text.""" + if not BODY_WIDTH: + return text + + assert wrap, "Requires Python 2.3." + result = '' + newlines = 0 + for para in text.split("\n"): + if len(para) > 0: + if para[0] is not ' ' and para[0] is not '-' and para[0] is not '*': + for line in wrap(para, BODY_WIDTH): + result += line + "\n" + result += "\n" + newlines = 2 + else: + if not onlywhite(para): + result += para + "\n" + newlines = 1 + else: + if newlines < 2: + result += "\n" + newlines += 1 + return result + +def hn(tag): + if tag[0] == 'h' and len(tag) == 2: + try: + n = int(tag[1]) + if n in range(1, 10): return n + except ValueError: return 0 + +class _html2text(sgmllib.SGMLParser): + def __init__(self, out=None, baseurl=''): + sgmllib.SGMLParser.__init__(self) + + if out is None: self.out = self.outtextf + else: self.out = out + self.outtext = u'' + self.quiet = 0 + self.p_p = 0 + self.outcount = 0 + self.start = 1 + self.space = 0 + self.a = [] + self.astack = [] + self.acount = 0 + self.list = [] + self.blockquote = 0 + self.pre = 0 + self.startpre = 0 + self.lastWasNL = 0 + self.abbr_title = None # current abbreviation definition + self.abbr_data = None # last inner HTML (for abbr being defined) + self.abbr_list = {} # stack of abbreviations to write later + self.baseurl = baseurl + + def outtextf(self, s): + self.outtext += s + + def close(self): + sgmllib.SGMLParser.close(self) + + self.pbr() + self.o('', 0, 'end') + + return self.outtext + + def handle_charref(self, c): + self.o(charref(c)) + + def handle_entityref(self, c): + self.o(entityref(c)) + + def unknown_starttag(self, tag, attrs): + self.handle_tag(tag, attrs, 1) + + def unknown_endtag(self, tag): + self.handle_tag(tag, None, 0) + + def previousIndex(self, attrs): + """ returns the index of certain set of attributes (of a link) in the + self.a list + + If the set of attributes is not found, returns None + """ + if not attrs.has_key('href'): return None + + i = -1 + for a in self.a: + i += 1 + match = 0 + + if a.has_key('href') and a['href'] == attrs['href']: + if a.has_key('title') or attrs.has_key('title'): + if (a.has_key('title') and attrs.has_key('title') and + a['title'] == attrs['title']): + match = True + else: + match = True + + if match: return i + + def handle_tag(self, tag, attrs, start): + attrs = fixattrs(attrs) + + if hn(tag): + self.p() + if start: self.o(hn(tag)*"#" + ' ') + + if tag in ['p', 'div']: self.p() + + if tag == "br" and start: self.o(" \n") + + if tag == "hr" and start: + self.p() + self.o("* * *") + self.p() + + if tag in ["head", "style", 'script']: + if start: self.quiet += 1 + else: self.quiet -= 1 + + if tag in ["body"]: + self.quiet = 0 # sites like 9rules.com never close <head> + + if tag == "blockquote": + if start: + self.p(); self.o('> ', 0, 1); self.start = 1 + self.blockquote += 1 + else: + self.blockquote -= 1 + self.p() + + if tag in ['em', 'i', 'u']: self.o("_") + if tag in ['strong', 'b']: self.o("**") + if tag == "code" and not self.pre: self.o('`') #TODO: `` `this` `` + if tag == "abbr": + if start: + attrsD = {} + for (x, y) in attrs: attrsD[x] = y + attrs = attrsD + + self.abbr_title = None + self.abbr_data = '' + if attrs.has_key('title'): + self.abbr_title = attrs['title'] + else: + if self.abbr_title != None: + self.abbr_list[self.abbr_data] = self.abbr_title + self.abbr_title = None + self.abbr_data = '' + + if tag == "a": + if start: + attrsD = {} + for (x, y) in attrs: attrsD[x] = y + attrs = attrsD + if attrs.has_key('href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')): + self.astack.append(attrs) + self.o("[") + else: + self.astack.append(None) + else: + if self.astack: + a = self.astack.pop() + if a: + i = self.previousIndex(a) + if i is not None: + a = self.a[i] + else: + self.acount += 1 + a['count'] = self.acount + a['outcount'] = self.outcount + self.a.append(a) + self.o("][" + `a['count']` + "]") + + if tag == "img" and start: + attrsD = {} + for (x, y) in attrs: attrsD[x] = y + attrs = attrsD + if attrs.has_key('src'): + attrs['href'] = attrs['src'] + alt = attrs.get('alt', '') + i = self.previousIndex(attrs) + if i is not None: + attrs = self.a[i] + else: + self.acount += 1 + attrs['count'] = self.acount + attrs['outcount'] = self.outcount + self.a.append(attrs) + self.o("![") + self.o(alt) + self.o("]["+`attrs['count']`+"]") + + if tag == 'dl' and start: self.p() + if tag == 'dt' and not start: self.pbr() + if tag == 'dd' and start: self.o(' ') + if tag == 'dd' and not start: self.pbr() + + if tag in ["ol", "ul"]: + if start: + self.list.append({'name':tag, 'num':0}) + else: + if self.list: self.list.pop() + + self.p() + + if tag == 'li': + if start: + self.pbr() + if self.list: li = self.list[-1] + else: li = {'name':'ul', 'num':0} + self.o(" "*len(self.list)) #TODO: line up <ol><li>s > 9 correctly. + if li['name'] == "ul": self.o("* ") + elif li['name'] == "ol": + li['num'] += 1 + self.o(`li['num']`+". ") + self.start = 1 + else: + self.pbr() + + if tag in ["table", "tr"] and start: self.p() + if tag == 'td': self.pbr() + + if tag == "pre": + if start: + self.startpre = 1 + self.pre = 1 + else: + self.pre = 0 + self.p() + + def pbr(self): + if self.p_p == 0: self.p_p = 1 + + def p(self): self.p_p = 2 + + def o(self, data, puredata=0, force=0): + if self.abbr_data is not None: self.abbr_data += data + + if not self.quiet: + if puredata and not self.pre: + data = re.sub('\s+', ' ', data) + if data and data[0] == ' ': + self.space = 1 + data = data[1:] + if not data and not force: return + + if self.startpre: + #self.out(" :") #TODO: not output when already one there + self.startpre = 0 + + bq = (">" * self.blockquote) + if not (force and data and data[0] == ">") and self.blockquote: bq += " " + + if self.pre: + bq += " " + data = data.replace("\n", "\n"+bq) + + if self.start: + self.space = 0 + self.p_p = 0 + self.start = 0 + + if force == 'end': + # It's the end. + self.p_p = 0 + self.out("\n") + self.space = 0 + + + if self.p_p: + self.out(('\n'+bq)*self.p_p) + self.space = 0 + + if self.space: + if not self.lastWasNL: self.out(' ') + self.space = 0 + + if self.a and ((self.p_p == 2 and LINKS_EACH_PARAGRAPH) or force == "end"): + if force == "end": self.out("\n") + + newa = [] + for link in self.a: + if self.outcount > link['outcount']: + self.out(" ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href'])) + if link.has_key('title'): self.out(" ("+link['title']+")") + self.out("\n") + else: + newa.append(link) + + if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done. + + self.a = newa + + if self.abbr_list and force == "end": + for abbr, definition in self.abbr_list.items(): + self.out(" *[" + abbr + "]: " + definition + "\n") + + self.p_p = 0 + self.out(data) + self.lastWasNL = data and data[-1] == '\n' + self.outcount += 1 + + def handle_data(self, data): + if r'\/script>' in data: self.quiet -= 1 + self.o(data, 1) + + def unknown_decl(self, data): pass + +def wrapwrite(text): sys.stdout.write(text.encode('utf8')) + +def html2text_file(html, out=wrapwrite, baseurl=''): + h = _html2text(out, baseurl) + h.feed(html) + h.feed("") + return h.close() + +def html2text(html, baseurl=''): + return optwrap(html2text_file(html, None, baseurl)) + +if __name__ == "__main__": + baseurl = '' + if sys.argv[1:]: + arg = sys.argv[1] + if arg.startswith('http://'): + baseurl = arg + j = urllib.urlopen(baseurl) + try: + from feedparser import _getCharacterEncoding as enc + except ImportError: + enc = lambda x, y: ('utf-8', 1) + text = j.read() + encoding = enc(j.headers, text)[0] + if encoding == 'us-ascii': encoding = 'utf-8' + data = text.decode(encoding) + + else: + encoding = 'utf8' + if len(sys.argv) > 2: + encoding = sys.argv[2] + data = open(arg, 'r').read().decode(encoding) + else: + data = sys.stdin.read().decode('utf8') + wrapwrite(html2text(data, baseurl)) diff --git a/fanficdownloader/htmlcleanup.py b/fanficdownloader/htmlcleanup.py new file mode 100644 index 00000000..7e91f190 --- /dev/null +++ b/fanficdownloader/htmlcleanup.py @@ -0,0 +1,463 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re + +def _unirepl(match): + "Return the unicode string for a decimal number" + if match.group(1)=='x': + radix=16 + else: + radix=10 + value = int(match.group(2), radix ) + return unichr(value) + +def _replaceNumberEntities(data): + p = re.compile(r'&#(x?)(\d+);') + return p.sub(_unirepl, data) + +def _replaceNotEntities(data): + # not just \w or \S. regexp from c:\Python25\lib\sgmllib.py + # (or equiv), SGMLParser, entityref + p = re.compile(r'&([a-zA-Z][-.a-zA-Z0-9]*);') + return p.sub(r'&\1', data) + +def stripHTML(soup): + return removeAllEntities(re.sub(r'<[^>]+>','',"%s" % soup)).strip() + +def conditionalRemoveEntities(value): + if isinstance(value,str) or isinstance(value,unicode) : + return removeEntities(value).strip() + else: + return value + +def removeAllEntities(text): + # Remove < < and & + return removeEntities(text).replace('<', '<').replace('>', '>').replace('&', '&') + +def removeEntities(text): + + # replace numeric versions of [&<>] with named versions, + # then replace named versions with actual characters, + + if text is None: + return "" + if not (isinstance(text,str) or isinstance(text,unicode)): + return str(text) + + try: + t = text.decode('utf-8') + except UnicodeEncodeError, e: + try: + t = text.encode ('ascii', 'xmlcharrefreplace') + except UnicodeEncodeError, e: + t = text + text = t + text = re.sub(r'�*38;','&',text) + text = re.sub(r'�*60;','<',text) + text = re.sub(r'�*62;','>',text) + + # replace remaining � entities with unicode value, such as ' -> ' + text = _replaceNumberEntities(text) + + # replace several named entities with character, such as — -> - + # see constants.py for the list. + # reverse sort will put entities with ; before the same one without, when valid. + for e in reversed(sorted(entities.keys())): + v = entities[e] + try: + text = text.replace(e, v) + except UnicodeDecodeError, ex: + # for the pound symbol in constants.py + text = text.replace(e, v.decode('utf-8')) + + # SGMLParser, and in turn, BeautifulStoneSoup doesn't parse + # entities terribly well and inserts (;) after something that + # it thinks might be an entity. AT&T becomes AT&T; All of my + # attempts to fix this by changing the input to + # BeautifulStoneSoup break something else instead. But at + # this point, there should be *no* real entities left, so find + # these not-entities and removing them here should be safe. + text = _replaceNotEntities(text) + + # < < and & are the only html entities allowed in xhtml, put those back. + return text.replace('&', '&').replace('&lt', '<').replace('&gt', '>') + +# entity list from http://code.google.com/p/doctype/wiki/CharacterEntitiesConsistent +entities = { 'á' : 'á', + 'Á' : 'Á', + 'Á' : 'Á', + 'á' : 'á', + 'â' : 'â', + 'Â' : 'Â', + 'Â' : 'Â', + 'â' : 'â', + '´' : '´', + '´' : '´', + 'Æ' : 'Æ', + 'æ' : 'æ', + 'Æ' : 'Æ', + 'æ' : 'æ', + 'à' : 'à', + 'À' : 'À', + 'À' : 'À', + 'à' : 'à', + 'ℵ' : 'ℵ', + 'α' : 'α', + 'Α' : 'Α', + '&' : '&', + '&' : '&', + '&' : '&', + '&' : '&', + '∧' : '∧', + '∠' : '∠', + 'å' : 'å', + 'Å' : 'Å', + 'Å' : 'Å', + 'å' : 'å', + '≈' : '≈', + 'ã' : 'ã', + 'Ã' : 'Ã', + 'Ã' : 'Ã', + 'ã' : 'ã', + 'ä' : 'ä', + 'Ä' : 'Ä', + 'Ä' : 'Ä', + 'ä' : 'ä', + '„' : '„', + 'β' : 'β', + 'Β' : 'Β', + '¦' : '¦', + '¦' : '¦', + '•' : '•', + '∩' : '∩', + 'ç' : 'ç', + 'Ç' : 'Ç', + 'Ç' : 'Ç', + 'ç' : 'ç', + '¸' : '¸', + '¸' : '¸', + '¢' : '¢', + '¢' : '¢', + 'χ' : 'χ', + 'Χ' : 'Χ', + 'ˆ' : 'ˆ', + '♣' : '♣', + '≅' : '≅', + '©' : '©', + '©' : '©', + '©' : '©', + '©' : '©', + '↵' : '↵', + '∪' : '∪', + '¤' : '¤', + '¤' : '¤', + '†' : '†', + '‡' : '‡', + '↓' : '↓', + '⇓' : '⇓', + '°' : '°', + '°' : '°', + 'δ' : 'δ', + 'Δ' : 'Δ', + '♦' : '♦', + '÷' : '÷', + '÷' : '÷', + 'é' : 'é', + 'É' : 'É', + 'É' : 'É', + 'é' : 'é', + 'ê' : 'ê', + 'Ê' : 'Ê', + 'Ê' : 'Ê', + 'ê' : 'ê', + 'è' : 'è', + 'È' : 'È', + 'È' : 'È', + 'è' : 'è', + '∅' : '∅', + ' ' : ' ', + ' ' : ' ', + 'ε' : 'ε', + 'Ε' : 'Ε', + '≡' : '≡', + 'η' : 'η', + 'Η' : 'Η', + 'ð' : 'ð', + 'Ð' : 'Ð', + 'Ð' : 'Ð', + 'ð' : 'ð', + 'ë' : 'ë', + 'Ë' : 'Ë', + 'Ë' : 'Ë', + 'ë' : 'ë', + '€' : '€', + '∃' : '∃', + 'ƒ' : 'ƒ', + '∀' : '∀', + '½' : '½', + '½' : '½', + '¼' : '¼', + '¼' : '¼', + '¾' : '¾', + '¾' : '¾', + '⁄' : '⁄', + 'γ' : 'γ', + 'Γ' : 'Γ', + '≥' : '≥', + #'>' : '>', + #'>' : '>', + #'>' : '>', + #'>' : '>', + '↔' : '↔', + '⇔' : '⇔', + '♥' : '♥', + '…' : '…', + 'í' : 'í', + 'Í' : 'Í', + 'Í' : 'Í', + 'í' : 'í', + 'î' : 'î', + 'Î' : 'Î', + 'Î' : 'Î', + 'î' : 'î', + '¡' : '¡', + '¡' : '¡', + 'ì' : 'ì', + 'Ì' : 'Ì', + 'Ì' : 'Ì', + 'ì' : 'ì', + 'ℑ' : 'ℑ', + '∞' : '∞', + '∫' : '∫', + 'ι' : 'ι', + 'Ι' : 'Ι', + '¿' : '¿', + '¿' : '¿', + '∈' : '∈', + 'ï' : 'ï', + 'Ï' : 'Ï', + 'Ï' : 'Ï', + 'ï' : 'ï', + 'κ' : 'κ', + 'Κ' : 'Κ', + 'λ' : 'λ', + 'Λ' : 'Λ', + '«' : '«', + '«' : '«', + '←' : '←', + '⇐' : '⇐', + '⌈' : '⌈', + '“' : '“', + '≤' : '≤', + '⌊' : '⌊', + '∗' : '∗', + '◊' : '◊', + '‎' : '‎', + '‹' : '‹', + '‘' : '‘', + #'<' : '<', + #'<' : '<', + #'<' : '<', + #'<' : '<', + '¯' : '¯', + '¯' : '¯', + '—' : '—', + 'µ' : 'µ', + 'µ' : 'µ', + '·' : '·', + '·' : '·', + '−' : '−', + 'μ' : 'μ', + 'Μ' : 'Μ', + '∇' : '∇', + ' ' : ' ', + ' ' : ' ', + '–' : '–', + '≠' : '≠', + '∋' : '∋', + '¬' : '¬', + '¬' : '¬', + '∉' : '∉', + '⊄' : '⊄', + 'ñ' : 'ñ', + 'Ñ' : 'Ñ', + 'Ñ' : 'Ñ', + 'ñ' : 'ñ', + 'ν' : 'ν', + 'Ν' : 'Ν', + 'ó' : 'ó', + 'Ó' : 'Ó', + 'Ó' : 'Ó', + 'ó' : 'ó', + 'ô' : 'ô', + 'Ô' : 'Ô', + 'Ô' : 'Ô', + 'ô' : 'ô', + 'Œ' : 'Œ', + 'œ' : 'œ', + 'ò' : 'ò', + 'Ò' : 'Ò', + 'Ò' : 'Ò', + 'ò' : 'ò', + '‾' : '‾', + 'ω' : 'ω', + 'Ω' : 'Ω', + 'ο' : 'ο', + 'Ο' : 'Ο', + '⊕' : '⊕', + '∨' : '∨', + 'ª' : 'ª', + 'ª' : 'ª', + 'º' : 'º', + 'º' : 'º', + 'ø' : 'ø', + 'Ø' : 'Ø', + 'Ø' : 'Ø', + 'ø' : 'ø', + 'õ' : 'õ', + 'Õ' : 'Õ', + 'Õ' : 'Õ', + 'õ' : 'õ', + '⊗' : '⊗', + 'ö' : 'ö', + 'Ö' : 'Ö', + 'Ö' : 'Ö', + 'ö' : 'ö', + '¶' : '¶', + '¶' : '¶', + '∂' : '∂', + '‰' : '‰', + '⊥' : '⊥', + 'φ' : 'φ', + 'Φ' : 'Φ', + 'π' : 'π', + 'Π' : 'Π', + 'ϖ' : 'ϖ', + '±' : '±', + '±' : '±', + '£' : '£', + '£' : '£', + '′' : '′', + '″' : '″', + '∏' : '∏', + '∝' : '∝', + 'ψ' : 'ψ', + 'Ψ' : 'Ψ', + '"' : '"', + '"' : '"', + '"' : '"', + '"' : '"', + '√' : '√', + '»' : '»', + '»' : '»', + '→' : '→', + '⇒' : '⇒', + '⌉' : '⌉', + '”' : '”', + 'ℜ' : 'ℜ', + '®' : '®', + '®' : '®', + '®' : '®', + '®' : '®', + '⌋' : '⌋', + 'ρ' : 'ρ', + 'Ρ' : 'Ρ', + '‏' : '‏', + '›' : '›', + '’' : '’', + '‚' : '‚', + 'š' : 'š', + 'Š' : 'Š', + '⋅' : '⋅', + '§' : '§', + '§' : '§', + '­' : '­', # strange optional hyphenation control character, not just a dash + '­' : '­', + 'σ' : 'σ', + 'Σ' : 'Σ', + 'ς' : 'ς', + '∼' : '∼', + '♠' : '♠', + '⊂' : '⊂', + '⊆' : '⊆', + '∑' : '∑', + '¹' : '¹', + '¹' : '¹', + '²' : '²', + '²' : '²', + '³' : '³', + '³' : '³', + '⊃' : '⊃', + '⊇' : '⊇', + 'ß' : 'ß', + 'ß' : 'ß', + 'τ' : 'τ', + 'Τ' : 'Τ', + '∴' : '∴', + 'θ' : 'θ', + 'Θ' : 'Θ', + 'ϑ' : 'ϑ', + ' ' : ' ', + 'þ' : 'þ', + 'Þ' : 'Þ', + 'Þ' : 'Þ', + 'þ' : 'þ', + '˜' : '˜', + '×' : '×', + '×' : '×', + '™' : '™', + 'ú' : 'ú', + 'Ú' : 'Ú', + 'Ú' : 'Ú', + 'ú' : 'ú', + '↑' : '↑', + '⇑' : '⇑', + 'û' : 'û', + 'Û' : 'Û', + 'Û' : 'Û', + 'û' : 'û', + 'ù' : 'ù', + 'Ù' : 'Ù', + 'Ù' : 'Ù', + 'ù' : 'ù', + '¨' : '¨', + '¨' : '¨', + 'ϒ' : 'ϒ', + 'υ' : 'υ', + 'Υ' : 'Υ', + 'ü' : 'ü', + 'Ü' : 'Ü', + 'Ü' : 'Ü', + 'ü' : 'ü', + '℘' : '℘', + 'ξ' : 'ξ', + 'Ξ' : 'Ξ', + 'ý' : 'ý', + 'Ý' : 'Ý', + 'Ý' : 'Ý', + 'ý' : 'ý', + '¥' : '¥', + '¥' : '¥', + 'ÿ' : 'ÿ', + 'Ÿ' : 'Ÿ', + 'ÿ' : 'ÿ', + 'ζ' : 'ζ', + 'Ζ' : 'Ζ', + '‍' : '‍', # strange spacing control character, not just a space + '‌' : '‌', # strange spacing control character, not just a space + } diff --git a/fanficdownloader/mobi.py b/fanficdownloader/mobi.py new file mode 100644 index 00000000..4748e202 --- /dev/null +++ b/fanficdownloader/mobi.py @@ -0,0 +1,384 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + + +import StringIO +import struct +import time +import random +import logging + +from html import HtmlProcessor + +# http://wiki.mobileread.com/wiki/MOBI +# http://membres.lycos.fr/microfirst/palm/pdb.html + +encoding = { + 'UTF-8' : 65001, + 'latin-1' : 1252, +} + +languages = {"en-us" : 0x0409, + "sv" : 0x041d, + "fi" : 0x000b, + "en" : 0x0009, + "en-gb" : 0x0809} + +def ToHex(s): + v = ['%.2x' % ord(c) for c in s] + return ' '.join(v) + +class _SubEntry: + def __init__(self, pos, html_data): + self.pos = pos + self.html = HtmlProcessor(html_data) + self.title = self.html.title + self._name = 'mobi_article_%d' % pos + if not self.title: + self.title = 'Article %d' % self.pos + + def TocLink(self): + return '<a href="#%s_MOBI_START">%.80s</a>' % (self._name, self.title) + + def Anchor(self): + return '<a name="%s_MOBI_START">' % self._name + + def Body(self): + return self.html.RenameAnchors(self._name + '_') + +class Converter: + def __init__(self, refresh_url='', title='Unknown', author='Unknown', publisher='Unknown'): + self._header = Header() + self._header.SetTitle(title) + self._header.SetAuthor(author) + self._header.SetPublisher(publisher) + self._refresh_url = refresh_url + + def ConvertString(self, s): + out = StringIO.StringIO() + self._ConvertStringToFile(s, out) + return out.getvalue() + + def ConvertStrings(self, html_strs): + out = StringIO.StringIO() + self._ConvertStringsToFile(html_strs, out) + return out.getvalue() + + def ConvertFile(self, html_file, out_file): + self._ConvertStringToFile(open(html_file,'rb').read(), + open(out_file, 'wb')) + + def ConvertFiles(self, html_files, out_file): + html_strs = [open(f,'rb').read() for f in html_files] + self._ConvertStringsToFile(html_strs, open(out_file, 'wb')) + + def MakeOneHTML(self, html_strs): + """This takes a list of HTML strings and returns a big HTML file with + all contents consolidated. It constructs a table of contents and adds + anchors within the text + """ + title_html = [] + toc_html = [] + body_html = [] + + PAGE_BREAK = '<mbp:pagebreak>' + + # pull out the title page, assumed first html_strs. + htmltitle = html_strs[0] + entrytitle = _SubEntry(1, htmltitle) + title_html.append(entrytitle.Body()) + + title_html.append(PAGE_BREAK) + toc_html.append('<a name="TOCTOP"><h3>Table of Contents</h3><br />') + + for pos, html in enumerate(html_strs[1:]): + entry = _SubEntry(pos+1, html) + toc_html.append('%s<br />' % entry.TocLink()) + + # give some space between bodies of work. + body_html.append(PAGE_BREAK) + + body_html.append(entry.Anchor()) + + body_html.append(entry.Body()) + + # TODO: this title can get way too long with RSS feeds. Not sure how to fix + # cheat slightly and use the <a href> code to set filepos in references. + header = '''<html> +<head> +<title>Bibliorize %s GMT + + + + + +''' % time.ctime(time.time()) + + footer = '' + all_html = header + '\n'.join(title_html + toc_html + body_html) + footer + #print "%s" % all_html.encode('utf8') + return all_html + + def _ConvertStringsToFile(self, html_strs, out_file): + try: + tmp = self.MakeOneHTML(html_strs) + self._ConvertStringToFile(tmp, out_file) + except Exception, e: + logging.error('Error %s', e) + logging.debug('Details: %s' % html_strs) + + def _ConvertStringToFile(self, html_data, out): + html = HtmlProcessor(html_data) + data = html.CleanHtml() + + # collect offsets of '' tags, use to make index list. + # indexlist = [] # list of (offset,length) tuples. + # not in current use. + + # j=0 + # lastj=0 + # while True: + # j=data.find('',lastj+10) # plus a bit so we find the next. + # if j < 0: + # break + # indexlist.append((lastj,j-lastj)) + # print "index offset: %d length: %d" % (lastj,j-lastj) + # lastj=j + + records = [] +# title = html.title +# if title: +# self._header.SetTitle(title) + record_id = 1 + for start_pos in range(0, len(data), Record.MAX_SIZE): + end = min(len(data), start_pos + Record.MAX_SIZE) + record_data = data[start_pos:end] + records.append(self._header.AddRecord(record_data, record_id)) + #print "HTML Record %03d: (size:%d) [[%s ... %s]]" % ( record_id, len(record_data), record_data[:20], record_data[-20:] ) + record_id += 1 + self._header.SetImageRecordIndex(record_id) + records[0:0] = [self._header.MobiHeader()] + + header, rec_offset = self._header.PDBHeader(len(records)) + out.write(header) + for record in records: + record.WriteHeader(out, rec_offset) + #print "rec_offset: %d len(record.data): %d" % (rec_offset,len(record.data)) + rec_offset += (len(record.data)+1) # plus one for trailing null + + # Write to nuls for some reason + out.write('\0\0') + for record in records: + record.WriteData(out) + out.write('\0') + # needs a trailing null, I believe it indicates zero length 'overlap'. + # otherwise, the readers eat the last char of each html record. + # Calibre writes another 6-7 bytes of stuff after that, but we seem + # to be getting along without it. + +class Record: + MAX_SIZE = 4096 + INDEX_LEN = 8 + _unique_id_seed = 28 # should be arbitrary, but taken from MobiHeader + + # TODO(chatham): Record compression doesn't look that hard. + + def __init__(self, data, record_id): + assert len(data) <= self.MAX_SIZE + self.data = data + if record_id != 0: + self._id = record_id + else: + Record._unique_id_seed += 1 + self._id = 0 + + def __repr__(self): + return 'Record: id=%d len=%d' % (self._id, len(self.data)) + + def _SetUniqueId(self): + Record._unique_id_seed += 1 + # TODO(chatham): Wraparound crap + self._id = Record._unique_id_seed + + def WriteData(self, out): + out.write(self.data) + + def WriteHeader(self, out, rec_offset): + attributes = 64 # dirty? + header = struct.pack('>IbbH', + rec_offset, + attributes, + 0, self._id) + assert len(header) == Record.INDEX_LEN + out.write(header) + +EXTH_HEADER_FIELDS = { + 'author' : 100, + 'publisher' : 101, +} + +class Header: + EPOCH_1904 = 2082844800 + + def __init__(self): + self._length = 0 + self._record_count = 0 + self._title = '2008_2_34' + self._author = 'Unknown author' + self._publisher = 'Unknown publisher' + self._first_image_index = 0 + + def SetAuthor(self, author): + self._author = author.encode('ascii','ignore') + + def SetTitle(self, title): + # TODO(chatham): Reevaluate whether this needs to be ASCII. + # maybe just do sys.setdefaultencoding('utf-8')? Problems + # appending self._title with other things. + self._title = title.encode('ascii','ignore') + + def SetPublisher(self, publisher): + self._publisher = publisher.encode('ascii','ignore') + + def AddRecord(self, data, record_id): + self.max_record_size = max(Record.MAX_SIZE, len(data)) + self._record_count += 1 + self._length += len(data) + return Record(data, record_id) + + def _ReplaceWord(self, data, pos, word): + return data[:pos] + struct.pack('>I', word) + data[pos+4:] + + def PalmDocHeader(self): + compression = 1 # no compression + unused = 0 + encryption_type = 0 # no ecryption + records = self._record_count + 1 # the header record itself + palmdoc_header = struct.pack('>HHIHHHH', + compression, + unused, + self._length, + records, + Record.MAX_SIZE, + encryption_type, + unused) + assert len(palmdoc_header) == 16 + return palmdoc_header + + def PDBHeader(self, num_records): + HEADER_LEN = 32+2+2+9*4 + RECORD_INDEX_HEADER_LEN = 6 + RESOURCE_INDEX_LEN = 10 + + index_len = RECORD_INDEX_HEADER_LEN + num_records * Record.INDEX_LEN + rec_offset = HEADER_LEN + index_len + 2 + + short_title = self._title[0:31] + attributes = 0 + version = 0 + ctime = self.EPOCH_1904 + int(time.time()) + mtime = self.EPOCH_1904 + int(time.time()) + backup_time = self.EPOCH_1904 + int(time.time()) + modnum = 0 + appinfo_offset = 0 + sort_offset = 0 + type = 'BOOK' + creator = 'MOBI' + id_seed = 36 + header = struct.pack('>32sHHII', + short_title, attributes, version, + ctime, mtime) + header += struct.pack('>IIII', backup_time, modnum, + appinfo_offset, sort_offset) + header += struct.pack('>4s4sI', + type, creator, id_seed) + next_record = 0 # not used? + header += struct.pack('>IH', next_record, num_records) + return header, rec_offset + + def _GetExthHeader(self): + # They set author, publisher, coveroffset, thumboffset + data = {'author' : self._author, + 'publisher' : self._publisher, + } + # Turn string type names into EXTH typeids. + r = [] + for key, value in data.items(): + typeid = EXTH_HEADER_FIELDS[key] + length_encoding_len = 8 + r.append(struct.pack('>LL', typeid, len(value) + length_encoding_len,) + value) + content = ''.join(r) + + # Pad to word boundary + while len(content) % 4: + content += '\0' + TODO_mysterious = 12 + exth = 'EXTH' + struct.pack('>LL', len(content) + TODO_mysterious, len(data)) + content + return exth + + def SetImageRecordIndex(self, idx): + self._first_image_index = idx + + def MobiHeader(self): + exth_header = self._GetExthHeader(); + palmdoc_header = self.PalmDocHeader() + + fs = 0xffffffff + + # Record 0 + header_len = 0xE4 # TODO + mobi_type = 2 # BOOK + text_encoding = encoding['UTF-8'] + unique_id = random.randint(1, 1<<32) + creator_version = 4 + reserved = '%c' % 0xff * 40 + nonbook_index = fs + full_name_offset = header_len + len(palmdoc_header) + len(exth_header) # put full name after header + language = languages['en-us'] + unused = 0 + mobi_header = struct.pack('>4sIIIII40sIIIIII', + 'MOBI', + header_len, + mobi_type, + text_encoding, + unique_id, + creator_version, + reserved, + nonbook_index, + full_name_offset, + len(self._title), + language, + fs, fs) + assert len(mobi_header) == 104 - 16 + + unknown_fields = chr(0) * 32 + drm_offset = 0 + drm_count = 0 + drm_size = 0 + drm_flags = 0 + exth_flags = 0x50 + header_end = chr(0) * 64 + mobi_header += struct.pack('>IIIIIII', + creator_version, + self._first_image_index, + fs, + unused, + fs, + unused, + exth_flags) + mobi_header += '\0' * 112 # TODO: Why this much padding? + # Set some magic offsets to be 0xFFFFFFF. + for pos in (0x94, 0x98, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xdc): + mobi_header = self._ReplaceWord(mobi_header, pos, fs) + + # 16 bytes? + padding = '\0' * 48 * 4 # why? + total_header = palmdoc_header + mobi_header + exth_header + self._title + padding + + return self.AddRecord(total_header, 0) + +if __name__ == '__main__': + import sys + m = Converter(title='Testing Mobi', author='Mobi Author', publisher='mobi converter') + m.ConvertFiles(sys.argv[1:], 'test.mobi') + #m.ConvertFile(sys.argv[1], 'test.mobi') diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py new file mode 100644 index 00000000..ca48cde7 --- /dev/null +++ b/fanficdownloader/story.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from htmlcleanup import conditionalRemoveEntities + +class Story: + + def __init__(self): + try: + self.metadata = {'version':os.environ['CURRENT_VERSION_ID']} + except: + self.metadata = {'version':'4.0'} + self.chapters = [] # chapters will be tuples of (title,html) + self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists. + + def setMetadata(self, key, value): + self.metadata[key]=conditionalRemoveEntities(value) + + def getMetadataRaw(self,key): + if self.metadata.has_key(key): + return self.metadata[key] + + def getMetadata(self, key): + if self.getLists().has_key(key): + return ', '.join(self.getList(key)) + if self.metadata.has_key(key): + value = self.metadata[key] + if value: + if key == "numWords": + value = commaGroups(value) + if key == "dateCreated": + value = value.strftime("%Y-%m-%d %H:%M:%S") + if key == "datePublished" or key == "dateUpdated": + value = value.strftime("%Y-%m-%d") + return value + + def addToList(self,listname,value): + if value==None: + return + if not self.listables.has_key(listname): + self.listables[listname]=[] + # prevent duplicates. + if not value in self.listables[listname]: + self.listables[listname].append(conditionalRemoveEntities(value)) + + def getList(self,listname): + if not self.listables.has_key(listname): + return [] + return self.listables[listname] + + def getLists(self): + return self.listables + + def addChapter(self, title, html): + self.chapters.append( (title,html) ) + + def getChapters(self): + "Chapters will be tuples of (title,html)" + return self.chapters + + def __str__(self): + return "Metadata: " +str(self.metadata) + "\nListables: " +str(self.listables) #+ "\nChapters: "+str(self.chapters) + +def commaGroups(s): + groups = [] + while s and s[-1].isdigit(): + groups.append(s[-3:]) + s = s[:-3] + return s + ','.join(reversed(groups)) + diff --git a/fanficdownloader/writers/__init__.py b/fanficdownloader/writers/__init__.py new file mode 100644 index 00000000..7a3f7032 --- /dev/null +++ b/fanficdownloader/writers/__init__.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## This could (should?) use a dynamic loader like adapters, but for +## now, it's static, since there's so few of them. + +from fanficdownloader.exceptions import FailedToDownload + +from writer_html import HTMLWriter +from writer_txt import TextWriter +from writer_epub import EpubWriter +from writer_mobi import MobiWriter + +def getWriter(type,config,story): + if type == "html": + return HTMLWriter(config,story) + if type == "txt": + return TextWriter(config,story) + if type == "epub": + return EpubWriter(config,story) + if type == "mobi": + return MobiWriter(config,story) + + raise FailedToDownload("(%s) is not a supported download format."%type) diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py new file mode 100644 index 00000000..411c2e4d --- /dev/null +++ b/fanficdownloader/writers/base_writer.py @@ -0,0 +1,248 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +import os.path +import datetime +import string +import StringIO +import zipfile +from zipfile import ZipFile, ZIP_DEFLATED +import logging + +from fanficdownloader.configurable import Configurable +from fanficdownloader.htmlcleanup import removeEntities, removeAllEntities, stripHTML + +class BaseStoryWriter(Configurable): + + @staticmethod + def getFormatName(): + return 'base' + + @staticmethod + def getFormatExt(): + return '.bse' + + def __init__(self, config, adapter): + Configurable.__init__(self, config) + self.addConfigSection(adapter.getSiteDomain()) + self.addConfigSection(self.getFormatName()) + self.addConfigSection(adapter.getSiteDomain()+":"+self.getFormatName()) + self.addConfigSection("overrides") + + self.adapter = adapter + self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially. + self.validEntries = [ + 'category', + 'genre', + 'characters', + 'status', + 'datePublished', + 'dateUpdated', + 'dateCreated', + 'rating', + 'warnings', + 'numChapters', + 'numWords', + 'site', + 'storyId', + 'authorId', + 'extratags', + 'title', + 'storyUrl', + 'description', + 'author', + 'authorUrl', + 'formatname', + 'formatext', + 'siteabbrev', + 'version'] + + # fall back labels. + self.titleLabels = { + 'category':'Category', + 'genre':'Genre', + 'status':'Status', + 'characters':'Characters', + 'datePublished':'Published', + 'dateUpdated':'Updated', + 'dateCreated':'Packaged', + 'rating':'Rating', + 'warnings':'Warnings', + 'numChapters':'Chapters', + 'numWords':'Words', + 'site':'Site', + 'storyId':'Story ID', + 'authorId':'Author ID', + 'extratags':'Extra Tags', + 'title':'Title', + 'storyUrl':'Story URL', + 'description':'Summary', + 'author':'Author', + 'authorUrl':'Author URL', + 'formatname':'File Format', + 'formatext':'File Extension', + 'siteabbrev':'Site Abbrev', + 'version':'FFD Version' + } + self.story.setMetadata('formatname',self.getFormatName()) + self.story.setMetadata('formatext',self.getFormatExt()) + + def getOutputFileName(self): + if self.getConfig('zip_output'): + return self.getZipFileName() + else: + return self.getBaseFileName() + + def getBaseFileName(self): + return self.formatFileName(self.getConfig('output_filename')) + + def getZipFileName(self): + return self.formatFileName(self.getConfig('zip_filename')) + + def formatFileName(self,template): + values = self.story.metadata + # fall back default: + if not template: + template="${title}-${siteabbrev}_${storyId}${formatext}" + + if not self.getConfig('allow_unsafe_filename'): + values={} + pattern = re.compile(r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+") + for k in self.story.metadata.keys(): + values[k]=re.sub(pattern,'_', removeAllEntities(self.story.getMetadata(k))) + + return string.Template(template).substitute(values).encode('utf8') + + def _write(self, out, text): + out.write(text.encode('utf8')) + + def writeTitlePage(self, out, START, ENTRY, END, WIDE_ENTRY=None): + """ + Write the title page, but only include entries that there's + metadata for. START, ENTRY and END are expected to already by + string.Template(). START and END are expected to use the same + names as Story.metadata, but ENTRY should use label and value. + """ + if self.getConfig("include_titlepage"): + self._write(out,START.substitute(self.story.metadata)) + + if WIDE_ENTRY==None: + WIDE_ENTRY=ENTRY + + titleEntriesList = self.getConfigList("titlepage_entries") + wideTitleEntriesList = self.getConfigList("wide_titlepage_entries") + + for entry in titleEntriesList: + if entry in self.validEntries: + if self.story.getMetadata(entry): + if entry in wideTitleEntriesList: + TEMPLATE=WIDE_ENTRY + else: + TEMPLATE=ENTRY + if self.getConfigList(entry): + label=self.getConfig(entry+"_label") + else: + label=self.titleLabels[entry] + self._write(out,TEMPLATE.substitute({'label':label, + 'value':self.story.getMetadata(entry)})) + + self._write(out,END.substitute(self.story.metadata)) + + def writeTOCPage(self, out, START, ENTRY, END): + """ + Write the Table of Contents page. START, ENTRY and END are expected to already by + string.Template(). START and END are expected to use the same + names as Story.metadata, but ENTRY should use index and chapter. + """ + # Only do TOC if there's more than one chapter and it's configured. + if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : + self._write(out,START.substitute(self.story.metadata)) + + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)})) + + self._write(out,END.substitute(self.story.metadata)) + + # if no outstream is given, write to file. + def writeStory(self,outstream=None,metaonly=False): + for tag in self.getConfigList("extratags"): + self.story.addToList("extratags",tag) + + self.metaonly = metaonly + outfilename=self.getOutputFileName() + + if not outstream: + close=True + logging.debug("Save directly to file: %s" % outfilename) + if self.getConfig('make_directories'): + path="" + dirs = os.path.dirname(outfilename).split('/') + for dir in dirs: + path+=dir+"/" + if not os.path.exists(path): + os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2? + + ## Check for output file date vs updated date here + if not self.getConfig('always_overwrite'): + if os.path.exists(outfilename): + ## date() truncs off time, which files have, but sites don't report. + lastupdated=self.story.getMetadataRaw('dateUpdated').date() + fileupdated=datetime.datetime.fromtimestamp(os.stat(outfilename)[8]).date() + if fileupdated > lastupdated: + print "File(%s) Updated(%s) more recently than Story(%s) - Skipping" % (outfilename,fileupdated,lastupdated) + return + if not metaonly: + self.story = self.adapter.getStory() # get full story + # now, just + # before writing. + # Fetch before + # opening file. + outstream = open(outfilename,"wb") + else: + close=False + logging.debug("Save to stream") + + if not metaonly: + self.story = self.adapter.getStory() # get full story now, + # just before + # writing. Okay if + # double called with + # above, it will only + # fetch once. + if self.getConfig('zip_output'): + out = StringIO.StringIO() + self.writeStoryImpl(out) + zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED) + zipout.writestr(self.getBaseFileName(),out.getvalue()) + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in zipout.filelist: + zf.create_system = 0 + zipout.close() + out.close() + else: + self.writeStoryImpl(outstream) + + if close: + outstream.close() + + def writeStoryImpl(self, out): + "Must be overriden by sub classes." + pass + diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py new file mode 100644 index 00000000..ec9b11dc --- /dev/null +++ b/fanficdownloader/writers/writer_epub.py @@ -0,0 +1,442 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string +import StringIO +import zipfile +from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED + +## XML isn't as forgiving as HTML, so rather than generate as strings, +## use DOM to generate the XML files. +from xml.dom.minidom import parse, parseString, getDOMImplementation + +from base_writer import * +from fanficdownloader.htmlcleanup import stripHTML + +class EpubWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'epub' + + @staticmethod + def getFormatExt(): + return '.epub' + + def __init__(self, config, story): + BaseStoryWriter.__init__(self, config, story) + + self.EPUB_CSS='''body { margin-left: 2%; margin-right: 2%; margin-top: 2%; margin-bottom: 2%; text-align: justify; } +pre { font-size: x-small; } +sml { font-size: small; } +h1 { text-align: center; } +h2 { text-align: center; } +h3 { text-align: center; } +h4 { text-align: center; } +h5 { text-align: center; } +h6 { text-align: center; } +.CI { + text-align:center; + margin-top:0px; + margin-bottom:0px; + padding:0px; + } +.center {text-align: center;} +.cover {text-align: center;} +.full {width: 100%; } +.quarter {width: 25%; } +.smcap {font-variant: small-caps;} +.u {text-decoration: underline;} +.bold {font-weight: bold;} +''' + + self.EPUB_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +

    ${title} by ${author}

    +
    +''') + + self.EPUB_TITLE_ENTRY = string.Template(''' +${label}: ${value}
    +''') + + self.EPUB_TITLE_PAGE_END = string.Template(''' +
    + + + +''') + + self.EPUB_TABLE_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +

    ${title} by ${author}

    +
    Atmosphere: Chapter 1 [ P - Pre-Teen ]Hearts of Ice [ P - Pre-Teen ]Suzaku no Princess [ P - Pre-Teen ]The Kraut, The Bartender, and The Drunkard: Chapter 1 [ P - Pre-Teen ]Betrayal and Justice: A Cold Heart ( Chapter 1 ) [ A - All Readers ]
    +''') + + self.EPUB_TABLE_TITLE_ENTRY = string.Template(''' + +''') + + self.EPUB_TABLE_TITLE_WIDE_ENTRY = string.Template(''' + +''') + + self.EPUB_TABLE_TITLE_PAGE_END = string.Template(''' +
    ${label}:${value}
    ${label}: ${value}
    + + + +''') + + self.EPUB_TOC_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +
    +

    Table of Contents

    +''') + + self.EPUB_TOC_ENTRY = string.Template(''' +${chapter}
    +''') + + self.EPUB_TOC_PAGE_END = string.Template(''' +
    + + +''') + + self.EPUB_CHAPTER_START = string.Template(''' + + + +${chapter} + + + +

    ${chapter}

    +''') + + self.EPUB_CHAPTER_END = string.Template(''' + + +''') + + def getMetadata(self,key): + return stripHTML(self.story.getMetadata(key)) + + def writeStoryImpl(self, out): + + ## Python 2.5 ZipFile is rather more primative than later + ## versions. It can operate on a file, or on a StringIO, but + ## not on an open stream. OTOH, I suspect we would have had + ## problems with closing and opening again to change the + ## compression type anyway. + zipio = StringIO.StringIO() + + ## mimetype must be first file and uncompressed. Python 2.5 + ## ZipFile can't change compression type file-by-file, so we + ## have to close and re-open + outputepub = ZipFile(zipio, 'w', compression=ZIP_STORED) + outputepub.debug=3 + outputepub.writestr('mimetype','application/epub+zip') + outputepub.close() + + ## Re-open file for content. + outputepub = ZipFile(zipio, 'a', compression=ZIP_DEFLATED) + outputepub.debug=3 + + ## Create META-INF/container.xml file. The only thing it does is + ## point to content.opf + containerdom = getDOMImplementation().createDocument(None, "container", None) + containertop = containerdom.documentElement + containertop.setAttribute("version","1.0") + containertop.setAttribute("xmlns","urn:oasis:names:tc:opendocument:xmlns:container") + rootfiles = containerdom.createElement("rootfiles") + containertop.appendChild(rootfiles) + rootfiles.appendChild(newTag(containerdom,"rootfile",{"full-path":"content.opf", + "media-type":"application/oebps-package+xml"})) + outputepub.writestr("META-INF/container.xml",containerdom.toxml(encoding='utf-8')) + containerdom.unlink() + del containerdom + + ## Epub has two metadata files with real data. We're putting + ## them in content.opf (pointed to by META-INF/container.xml) + ## and toc.ncx (pointed to by content.opf) + + ## content.opf contains metadata, a 'manifest' list of all + ## other included files, and another 'spine' list of the items in the + ## file + + uniqueid= 'fanficdownloader-uid:%s-u%s-s%s' % ( + self.getMetadata('site'), + self.getMetadata('authorId'), + self.getMetadata('storyId')) + + contentdom = getDOMImplementation().createDocument(None, "package", None) + package = contentdom.documentElement + package.setAttribute("version","2.0") + package.setAttribute("xmlns","http://www.idpf.org/2007/opf") + package.setAttribute("unique-identifier","fanficdownloader-uid") + metadata=newTag(contentdom,"metadata", + attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/", + "xmlns:opf":"http://www.idpf.org/2007/opf"}) + package.appendChild(metadata) + + metadata.appendChild(newTag(contentdom,"dc:identifier", + text=uniqueid, + attrs={"id":"fanficdownloader-uid"})) + + if self.getMetadata('title'): + metadata.appendChild(newTag(contentdom,"dc:title",text=self.getMetadata('title'))) + + if self.getMetadata('author'): + metadata.appendChild(newTag(contentdom,"dc:creator", + attrs={"opf:role":"aut"}, + text=self.getMetadata('author'))) + + metadata.appendChild(newTag(contentdom,"dc:contributor",text="fanficdownloader [http://fanficdownloader.googlecode.com]",attrs={"opf:role":"bkp"})) + metadata.appendChild(newTag(contentdom,"dc:rights",text="")) + metadata.appendChild(newTag(contentdom,"dc:language",text="en")) + + # published, created, updated, calibre + # Leave calling self.story.getMetadataRaw directly in case date format changes. + if self.story.getMetadataRaw('datePublished'): + metadata.appendChild(newTag(contentdom,"dc:date", + attrs={"opf:event":"publication"}, + text=self.story.getMetadataRaw('datePublished').strftime("%Y-%m-%d"))) + + if self.story.getMetadataRaw('dateCreated'): + metadata.appendChild(newTag(contentdom,"dc:date", + attrs={"opf:event":"creation"}, + text=self.story.getMetadataRaw('dateCreated').strftime("%Y-%m-%d"))) + + if self.story.getMetadataRaw('dateUpdated'): + metadata.appendChild(newTag(contentdom,"dc:date", + attrs={"opf:event":"modification"}, + text=self.story.getMetadataRaw('dateUpdated').strftime("%Y-%m-%d"))) + metadata.appendChild(newTag(contentdom,"meta", + attrs={"name":"calibre:timestamp", + "content":self.story.getMetadataRaw('dateUpdated').strftime("%Y-%m-%dT%H:%M:%S")})) + # Last Update tags for Bill. + self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update Year/Month: %Y/%m")) + self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update: %Y/%m/%d")) + + if self.getMetadata('description'): + metadata.appendChild(newTag(contentdom,"dc:description",text= + self.getMetadata('description'))) + + # set to avoid duplicates subject tags. + subjectset = set() + for entry in self.validEntries: + if entry in self.getConfigList("include_subject_tags") and \ + entry not in self.story.getLists() and \ + self.story.getMetadata(entry): + subjectset.add(self.getMetadata(entry)) + # listables all go into dc:subject tags, but only if they are configured. + for (name,lst) in self.story.getLists().iteritems(): + if name in self.getConfigList("include_subject_tags"): + for tag in lst: + subjectset.add(tag) + for subject in subjectset: + metadata.appendChild(newTag(contentdom,"dc:subject",text=subject)) + + + if self.getMetadata('site'): + metadata.appendChild(newTag(contentdom,"dc:publisher", + text=self.getMetadata('site'))) + + if self.getMetadata('storyUrl'): + metadata.appendChild(newTag(contentdom,"dc:identifier", + attrs={"opf:scheme":"URL"}, + text=self.getMetadata('storyUrl'))) + metadata.appendChild(newTag(contentdom,"dc:source", + text=self.getMetadata('storyUrl'))) + + ## end of metadata, create manifest. + items = [] # list of (id, href, type, title) tuples(all strings) + itemrefs = [] # list of strings -- idrefs from .opfs' spines + items.append(("ncx","toc.ncx","application/x-dtbncx+xml",None)) ## we'll generate the toc.ncx file, + ## but it needs to be in the items manifest. + items.append(("style","OEBPS/stylesheet.css","text/css",None)) + if self.getConfig("include_titlepage"): + items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page")) + itemrefs.append("title_page") + if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : + items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents")) + itemrefs.append("toc_page") + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + i=index+1 + items.append(("file%04d"%i, + "OEBPS/file%04d.xhtml"%i, + "application/xhtml+xml", + title)) + itemrefs.append("file%04d"%i) + + manifest = contentdom.createElement("manifest") + package.appendChild(manifest) + for item in items: + (id,href,type,title)=item + manifest.appendChild(newTag(contentdom,"item", + attrs={'id':id, + 'href':href, + 'media-type':type})) + + spine = newTag(contentdom,"spine",attrs={"toc":"ncx"}) + package.appendChild(spine) + for itemref in itemrefs: + spine.appendChild(newTag(contentdom,"itemref", + attrs={"idref":itemref, + "linear":"yes"})) + # write content.opf to zip. + outputepub.writestr("content.opf",contentdom.toxml(encoding='utf-8')) + contentdom.unlink() + del contentdom + + ## create toc.ncx file + tocncxdom = getDOMImplementation().createDocument(None, "ncx", None) + ncx = tocncxdom.documentElement + ncx.setAttribute("version","2005-1") + ncx.setAttribute("xmlns","http://www.daisy.org/z3986/2005/ncx/") + head = tocncxdom.createElement("head") + ncx.appendChild(head) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:uid", "content":uniqueid})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:depth", "content":"1"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:totalPageCount", "content":"0"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:maxPageNumber", "content":"0"})) + + docTitle = tocncxdom.createElement("docTitle") + docTitle.appendChild(newTag(tocncxdom,"text",text=self.getMetadata('title'))) + ncx.appendChild(docTitle) + + tocnavMap = tocncxdom.createElement("navMap") + ncx.appendChild(tocnavMap) + + # + # + # + # + # + # + index=0 + for item in items: + (id,href,type,title)=item + # only items to be skipped, toc.ncx, stylesheet.css, should have no title. + if title : + navPoint = newTag(tocncxdom,"navPoint", + attrs={'id':id, + 'playOrder':str(index)}) + tocnavMap.appendChild(navPoint) + navLabel = newTag(tocncxdom,"navLabel") + navPoint.appendChild(navLabel) + ## the xml library will re-escape as needed. + navLabel.appendChild(newTag(tocncxdom,"text",text=stripHTML(title))) + navPoint.appendChild(newTag(tocncxdom,"content",attrs={"src":href})) + index=index+1 + + # write toc.ncs to zip file + outputepub.writestr("toc.ncx",tocncxdom.toxml(encoding='utf-8')) + tocncxdom.unlink() + del tocncxdom + + # write stylesheet.css file. + outputepub.writestr("OEBPS/stylesheet.css",self.EPUB_CSS) + + # write title page. + if self.getConfig("titlepage_use_table"): + TITLE_PAGE_START = self.EPUB_TABLE_TITLE_PAGE_START + TITLE_ENTRY = self.EPUB_TABLE_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.EPUB_TABLE_TITLE_WIDE_ENTRY + TITLE_PAGE_END = self.EPUB_TABLE_TITLE_PAGE_END + else: + TITLE_PAGE_START = self.EPUB_TITLE_PAGE_START + TITLE_ENTRY = self.EPUB_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.EPUB_TITLE_ENTRY # same, only wide in tables. + TITLE_PAGE_END = self.EPUB_TITLE_PAGE_END + + titlepageIO = StringIO.StringIO() + self.writeTitlePage(out=titlepageIO, + START=TITLE_PAGE_START, + ENTRY=TITLE_ENTRY, + WIDE_ENTRY=WIDE_TITLE_ENTRY, + END=TITLE_PAGE_END) + if titlepageIO.getvalue(): # will be false if no title page. + outputepub.writestr("OEBPS/title_page.xhtml",titlepageIO.getvalue()) + titlepageIO.close() + + # write toc page. + tocpageIO = StringIO.StringIO() + self.writeTOCPage(tocpageIO, + self.EPUB_TOC_PAGE_START, + self.EPUB_TOC_ENTRY, + self.EPUB_TOC_PAGE_END) + if tocpageIO.getvalue(): # will be false if no toc page. + outputepub.writestr("OEBPS/toc_page.xhtml",tocpageIO.getvalue()) + tocpageIO.close() + + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + logging.debug('Writing chapter text for: %s' % title) + fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) + # ffnet(& maybe others) gives the whole chapter text + # as one line. This causes problems for nook(at + # least) when the chapter size starts getting big + # (200k+) + fullhtml = fullhtml.replace('

    ','

    \n').replace('
    ','
    \n') + outputepub.writestr("OEBPS/file%04d.xhtml"%(index+1),fullhtml.encode('utf-8')) + del fullhtml + + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in outputepub.filelist: + zf.create_system = 0 + outputepub.close() + out.write(zipio.getvalue()) + zipio.close() + +## Utility method for creating new tags. +def newTag(dom,name,attrs=None,text=None): + tag = dom.createElement(name) + if( attrs is not None ): + for attr in attrs.keys(): + tag.setAttribute(attr,attrs[attr]) + if( text is not None ): + tag.appendChild(dom.createTextNode(text)) + return tag + diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py new file mode 100644 index 00000000..32d27d62 --- /dev/null +++ b/fanficdownloader/writers/writer_html.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string + +from base_writer import * + +class HTMLWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'html' + + @staticmethod + def getFormatExt(): + return '.html' + + def __init__(self, config, story): + BaseStoryWriter.__init__(self, config, story) + + self.HTML_FILE_START = string.Template(''' + + + +${title} by ${author} + + + +

    ${title} by ${author}

    +''') + + self.HTML_TITLE_PAGE_START = string.Template(''' + +''') + + self.HTML_TITLE_ENTRY = string.Template(''' + +''') + + self.HTML_TITLE_PAGE_END = string.Template(''' +
    ${label}:${value}
    +''') + + self.HTML_TOC_PAGE_START = string.Template(''' +

    Table of Contents

    +

    +''') + + self.HTML_TOC_ENTRY = string.Template(''' +${chapter}
    +''') + + self.HTML_TOC_PAGE_END = string.Template(''' +

    +''') + + self.HTML_CHAPTER_START = string.Template(''' +

    ${chapter}

    +''') + + self.HTML_FILE_END = string.Template(''' + +''') + + + def writeStoryImpl(self, out): + + self._write(out,self.HTML_FILE_START.substitute(self.story.metadata)) + + self.writeTitlePage(out, + self.HTML_TITLE_PAGE_START, + self.HTML_TITLE_ENTRY, + self.HTML_TITLE_PAGE_END) + + self.writeTOCPage(out, + self.HTML_TOC_PAGE_START, + self.HTML_TOC_ENTRY, + self.HTML_TOC_PAGE_END) + + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + logging.debug('Writing chapter text for: %s' % title) + self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)})) + self._write(out,html) + + self._write(out,self.HTML_FILE_END.substitute(self.story.metadata)) diff --git a/fanficdownloader/writers/writer_mobi.py b/fanficdownloader/writers/writer_mobi.py new file mode 100644 index 00000000..73b48afb --- /dev/null +++ b/fanficdownloader/writers/writer_mobi.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string +import StringIO + +from base_writer import * +from fanficdownloader.htmlcleanup import stripHTML +from fanficdownloader.mobi import Converter + +class MobiWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'mobi' + + @staticmethod + def getFormatExt(): + return '.mobi' + + def __init__(self, config, story): + BaseStoryWriter.__init__(self, config, story) + + self.MOBI_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +

    ${title} by ${author}

    +
    +''') + + self.MOBI_TITLE_ENTRY = string.Template(''' +${label}: ${value}
    +''') + + self.MOBI_TITLE_PAGE_END = string.Template(''' +
    + + + +''') + + self.MOBI_TABLE_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +

    ${title} by ${author}

    + +''') + + self.MOBI_TABLE_TITLE_ENTRY = string.Template(''' + +''') + + self.MOBI_TABLE_TITLE_WIDE_ENTRY = string.Template(''' + +''') + + self.MOBI_TABLE_TITLE_PAGE_END = string.Template(''' +
    ${label}:${value}
    ${label}: ${value}
    + + + +''') + + self.MOBI_TOC_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +
    +

    Table of Contents

    +''') + + self.MOBI_TOC_ENTRY = string.Template(''' +${chapter}
    +''') + + self.MOBI_TOC_PAGE_END = string.Template(''' +
    + + +''') + + self.MOBI_CHAPTER_START = string.Template(''' + + + +${chapter} + + + +

    ${chapter}

    +''') + + self.MOBI_CHAPTER_END = string.Template(''' + + +''') + + def getMetadata(self,key): + return stripHTML(self.story.getMetadata(key)) + + def writeStoryImpl(self, out): + + files = [] + + # write title page. + if self.getConfig("titlepage_use_table"): + TITLE_PAGE_START = self.MOBI_TABLE_TITLE_PAGE_START + TITLE_ENTRY = self.MOBI_TABLE_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.MOBI_TABLE_TITLE_WIDE_ENTRY + TITLE_PAGE_END = self.MOBI_TABLE_TITLE_PAGE_END + else: + TITLE_PAGE_START = self.MOBI_TITLE_PAGE_START + TITLE_ENTRY = self.MOBI_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.MOBI_TITLE_ENTRY # same, only wide in tables. + TITLE_PAGE_END = self.MOBI_TITLE_PAGE_END + + titlepageIO = StringIO.StringIO() + self.writeTitlePage(out=titlepageIO, + START=TITLE_PAGE_START, + ENTRY=TITLE_ENTRY, + WIDE_ENTRY=WIDE_TITLE_ENTRY, + END=TITLE_PAGE_END) + if titlepageIO.getvalue(): # will be false if no title page. + files.append(titlepageIO.getvalue()) + titlepageIO.close() + + ## MOBI always has a TOC injected by mobi.py because there's + ## no meta-data TOC. + # # write toc page. + # tocpageIO = StringIO.StringIO() + # self.writeTOCPage(tocpageIO, + # self.MOBI_TOC_PAGE_START, + # self.MOBI_TOC_ENTRY, + # self.MOBI_TOC_PAGE_END) + # if tocpageIO.getvalue(): # will be false if no toc page. + # files.append(tocpageIO.getvalue()) + # tocpageIO.close() + + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + logging.debug('Writing chapter text for: %s' % title) + fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) + # ffnet(& maybe others) gives the whole chapter text + # as one line. This causes problems for nook(at + # least) when the chapter size starts getting big + # (200k+) + fullhtml = fullhtml.replace('

    ','

    \n').replace('
    ','
    \n') + files.append(fullhtml.encode('utf-8')) + del fullhtml + + c = Converter(title=self.getMetadata('title'), + author=self.getMetadata('author'), + publisher=self.getMetadata('site')) + mobidata = c.ConvertStrings(files) + out.write(mobidata) + + del files + del mobidata + +## Utility method for creating new tags. +def newTag(dom,name,attrs=None,text=None): + tag = dom.createElement(name) + if( attrs is not None ): + for attr in attrs.keys(): + tag.setAttribute(attr,attrs[attr]) + if( text is not None ): + tag.appendChild(dom.createTextNode(text)) + return tag + diff --git a/fanficdownloader/writers/writer_txt.py b/fanficdownloader/writers/writer_txt.py new file mode 100644 index 00000000..ccf46375 --- /dev/null +++ b/fanficdownloader/writers/writer_txt.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string +from textwrap import wrap + +from base_writer import * + +from fanficdownloader.html2text import html2text, BODY_WIDTH + +## In BaseStoryWriter, we define _write to encode objects +## back into for true output. But txt needs to write the +## title page and TOC to a buffer first to wordwrap. And StringIO +## gets pissy about unicode bytes in its buflist. This decodes the +## unicode containing object passed in back to a +## object so they join up properly. Could override _write to not +## encode and do out.write(whatever.encode('utf8') instead. Honestly +## not sure which is uglier. +class KludgeStringIO(): + def __init__(self, buf = ''): + self.buflist=[] + def write(self,s): + try: + s=s.decode('utf-8') + except: + pass + self.buflist.append(s) + def getvalue(self): + return u''.join(self.buflist) + def close(self): + pass + +class TextWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'txt' + + @staticmethod + def getFormatExt(): + return '.txt' + + def __init__(self, config, story): + + BaseStoryWriter.__init__(self, config, story) + + self.TEXT_FILE_START = string.Template(u''' + + +${title} + +by ${author} + + +''') + + self.TEXT_TITLE_PAGE_START = string.Template(u''' +''') + + self.TEXT_TITLE_ENTRY = string.Template(u'''${label}: ${value} +''') + + self.TEXT_TITLE_PAGE_END = string.Template(u''' + + +''') + + self.TEXT_TOC_PAGE_START = string.Template(u''' + +TABLE OF CONTENTS + +''') + + self.TEXT_TOC_ENTRY = string.Template(u''' +${chapter} +''') + + self.TEXT_TOC_PAGE_END = string.Template(u''' +''') + + self.TEXT_CHAPTER_START = string.Template(u''' + +\t${chapter} + +''') + + self.TEXT_FILE_END = string.Template(u''' + +End file. +''') + + def writeStoryImpl(self, out): + + wrapout = KludgeStringIO() + + wrapout.write(self.TEXT_FILE_START.substitute(self.story.metadata)) + + self.writeTitlePage(wrapout, + self.TEXT_TITLE_PAGE_START, + self.TEXT_TITLE_ENTRY, + self.TEXT_TITLE_PAGE_END) + towrap = wrapout.getvalue() + + self.writeTOCPage(wrapout, + self.TEXT_TOC_PAGE_START, + self.TEXT_TOC_ENTRY, + self.TEXT_TOC_PAGE_END) + + towrap = wrapout.getvalue() + wrapout.close() + towrap = removeAllEntities(towrap) + + self._write(out,self.lineends(self.wraplines(towrap))) + + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + logging.debug('Writing chapter text for: %s' % title) + self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1}))))) + self._write(out,self.lineends(html2text(html))) + + self._write(out,self.lineends(self.wraplines(self.TEXT_FILE_END.substitute(self.story.metadata)))) + + def wraplines(self, text): + result='' + for para in text.split("\n"): + first=True + for line in wrap(para, BODY_WIDTH): + if first: + first=False + else: + result += u"\n" + result += line + result += u"\n" + return result + + ## The appengine will return unix line endings. + def lineends(self, txt): + txt = txt.replace('\r','') + if self.getConfig("windows_eol"): + txt = txt.replace('\n',u'\r\n') + return txt + diff --git a/ffstorage.py b/ffstorage.py new file mode 100644 index 00000000..92e29d04 --- /dev/null +++ b/ffstorage.py @@ -0,0 +1,39 @@ +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from google.appengine.ext import db + +class DownloadMeta(db.Model): + user = db.UserProperty() + url = db.StringProperty() + name = db.StringProperty() + title = db.StringProperty() + author = db.StringProperty() + format = db.StringProperty() + failure = db.TextProperty() + completed = db.BooleanProperty(default=False) + date = db.DateTimeProperty(auto_now_add=True) + version = db.StringProperty() + # data_chunks is implicit from DownloadData def. + +class DownloadData(db.Model): + download = db.ReferenceProperty(DownloadMeta, + collection_name='data_chunks') + blob = db.BlobProperty() + index = db.IntegerProperty() + +class UserConfig(db.Model): + user = db.UserProperty() + config = db.BlobProperty() diff --git a/index-ajax.html b/index-ajax.html new file mode 100644 index 00000000..51dc6318 --- /dev/null +++ b/index-ajax.html @@ -0,0 +1,109 @@ + + + + + + + Fanfiction Downloader (fanfiction.net, fictionalley, ficwad to epub and HTML) + + + + + + + + + +
    +

    + FanFiction Downloader +

    + + +
    +
    + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
    + +
    + Ebook format   +
    + +
    + +
    + + + +
    + + + +
    +
    + +

    + Login and Password +

    +
    + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
    +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    +
    +
    + + +
    + + +
    + +
    +
    + Few things to know, which will make your life substantially easier: +
      +
    1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
    2. +
    3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
    4. +
    5. Paste a URL of the first chapter of the fanfic, not the index page
    6. +
    7. Fics with a single chapter are not supported (you can just copy and paste it)
    8. +
    9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
    10. +
    11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
    12. +
    13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
    14. +
    15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
    16. +
    17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
    18. +
    + Otherwise, just have fun, and if you want to say thank you — use the email above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Roman Kirillov +
    + +
    + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..fa29953d --- /dev/null +++ b/index.html @@ -0,0 +1,271 @@ + + + + + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + + + + + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + + {{yourfile}} + + + {% if authorized %} +
    +
    +
    +

    Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites + much easier.

    +
    + +

    Please Help Test New Version

    +

    + We have a new, more efficient, version of the system up + for testing. Please try the + Testing + Version here. +

    +

    New Google Quotas

    +

    + Google has changed their quota limits for free + applications using their AppEngine system, like this one. + We expect that there will be times when the + system exceeds it's permitted processing quota. We're + looking at ways to make the system more efficient, but + there will be issues for a while. +

    +

    + You also have the option of running the downloader on your + own computer if you have Python available. + Download here. +

    +

    + If you have any problems with this application, please + report them in + the Fanfiction + Downloader Google Group. The + Previous + Version is also available for you to use if necessary. +

    +
    + {{ error_message }} +
    +
    + +
    +
    URL:
    +
    +
    Ebook format
    +
    + EPub + HTML + Plain Text + Mobi(Kindle) +
    +
    +
    + +

    For most readers, including Sony Reader, Nook and iPad, use EPub.

    +
    +
    +
    +

    + Customize your User Configuration. +

    +

    + Or see your personal list of previously downloaded fanfics. +

    +
    +
    + {% else %} +
    +
    +

    + This is a fan fiction downloader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so downloader can remember your fanfics and store them. +

    +

    Login using Google account

    +
    +
    + {% endif %} + +
    +
    +
    fictionalley.org
    +
    + Use the URL of the story's chapter list, such as +
    http://www.fictionalley.org/authors/drt/DA.html. +
    Or a chapter URL (or one-shot text), such as +
    http://www.fictionalley.org/authors/drt/JOTP01a.html. +
    Both will work for both chaptered and one-shot stories now. +
    +
    fanfiction.net
    +
    + Use the URL of any story chapter, with or without story title such as +
    http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
    http://www.fanfiction.net/s/2345466/3/. +
    +
    fictionpress.com
    +
    + Use the URL of any story chapter, such as +
    http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
    http://www.fictionpress.com/s/2847338/6/. +
    +
    twilighted.net
    +
    + Use the URL of the start of the story, such as +
    http://twilighted.net/viewstory.php?sid=8422. +
    +
    twiwrite.net
    +
    + Use the URL of the start of the story, such as +
    http://twiwrite.net/viewstory.php?sid=427. +
    +
    ficwad.com
    +
    + Use the URL of the story's chapter list, such as +
    http://www.ficwad.com/story/74884. +
    Note that this is changed from the previous version. The system will still accept chapter URLs, however. +
    +
    harrypotterfanfiction.com
    +
    + Use the URL of the story's chapter list, such as +
    http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
    +
    potionsandsnitches.net
    +
    + Use the URL of the story's chapter list, such as +
    http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
    +
    mediaminer.org
    +
    + Use the URL of the story's chapter list, such as +
    http://www.mediaminer.org/fanfic/view_st.php/166653. +
    Or the story URL for one-shots, such as +
    http://www.mediaminer.org/fanfic/view_st.php/167618 or +
    http://www.mediaminer.org/fanfic/view_ch.php/1234123/123444#fic_c +
    +
    adastrafanfic.com
    +
    + Use the URL of the story's chapter list, such as +
    http://www.adastrafanfic.com/viewstory.php?sid=854. +
    +
    whofic.com
    +
    + Use the URL of the story's chapter list, such as +
    http://www.whofic.com/viewstory.php?sid=16334. +
    +
    thewriterscoffeeshop.com
    +
    + Use the URL of the story's chapter list, such as +
    http://www.thewriterscoffeeshop.com/library/viewstory.php?sid=2110. +
    +
    fanfiction.tenhawkpresents.com
    +
    + Use the URL of the story's chapter list, such as +
    http://fanfiction.tenhawkpresents.com/viewstory.php?sid=294. +
    +
    fanfic.castletv.net
    +
    + Use the URL of the story's chapter list, such as +
    http://fanfic.castletv.net/viewstory.php?sid=123. +
    +
    fimfiction.net
    +
    + Use the URL of the story's chapter list, such as +
    http://www.fimfiction.com/story/123/ +
    or the URL of any chapter, such as +
    http://www.fimfiction.com/story/123/1/. +
    +
    + + + A few additional things to know, which will make your life substantially easier: +
      +
    1. + First thing to know: I do not use your Google login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
    2. +
    3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
    4. +
    5. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
    6. +
    7. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
    8. +
    9. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
    10. +
    11. + If you think that something that should work in fact doesn't, post a message to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
    12. +
    + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Fanficdownloader team +
    + +
    + + +
    +
    + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..16bcaefe --- /dev/null +++ b/index.yaml @@ -0,0 +1,33 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadData + properties: + - name: download + - name: index + +- kind: DownloadMeta + properties: + - name: user + - name: date + direction: desc + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
    " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "", "
    " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + +
    +

    + FanFiction Downloader +

    + +
    + + +
    + + {% if fic.failure %} +
    + {{ fic.failure }} +
    + {% endif %} +
    + + +
    + + {% if is_login %} + +

    Login and Password

    +
    + {{ site }} requires a Login/Password for this story. + You need to provide your Login/Password for {{ site }} + to download it. +
    +
    +
    Login
    +
    +
    + +
    +
    Password
    +
    +
    + + {% else %} + + + +
    +
    Are you an Adult?
    +
    + + {% endif %} + +
    + +
    + +
    +
    + +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Fanficdownloader team +
    + +
    + + +
    +
    + + diff --git a/main.py b/main.py new file mode 100644 index 00000000..4240b4ac --- /dev/null +++ b/main.py @@ -0,0 +1,575 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google Inc. +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +logging.getLogger().setLevel(logging.DEBUG) + +import os +from os.path import dirname, basename, normpath +import re +import sys +import zlib +import urllib +import datetime + +import traceback +from StringIO import StringIO +import ConfigParser + +## Just to shut up the appengine warning about "You are using the +## default Django version (0.96). The default Django version will +## change in an App Engine release in the near future. Please call +## use_library() to explicitly select a Django version. For more +## information see +## http://code.google.com/appengine/docs/python/tools/libraries.html#Django" +## Note that if you are using the SDK App Engine Launcher and hit an SDK +## Console page first, you will get a django version mismatch error when you +## to go hit one of the application pages. Just change a file again, and +## make sure to hit an app page before the SDK page to clear it. +os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' +from google.appengine.dist import use_library +use_library('django', '1.2') + +from google.appengine.ext import db +from google.appengine.api import taskqueue +from google.appengine.api import users +from google.appengine.ext import webapp +from google.appengine.ext.webapp import template +from google.appengine.ext.webapp import util +from google.appengine.runtime import DeadlineExceededError + +from ffstorage import * + +from fanficdownloader import adapters, writers, exceptions + +class UserConfigServer(webapp.RequestHandler): + def getUserConfig(self,user): + config = ConfigParser.SafeConfigParser() + + logging.debug('reading defaults.ini config file') + config.read('defaults.ini') + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l and l[0].config: + uconfig=l[0] + #logging.debug('reading config from UserConfig(%s)'%uconfig.config) + config.readfp(StringIO(uconfig.config)) + + return config + +class MainHandler(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if user: + error = self.request.get('error') + template_values = {'nickname' : user.nickname(), 'authorized': True} + url = self.request.get('url') + template_values['url'] = url + + if error: + if error == 'login_required': + template_values['error_message'] = 'This story (or one of the chapters) requires you to be logged in.' + elif error == 'bad_url': + template_values['error_message'] = 'Unsupported URL: ' + url + elif error == 'custom': + template_values['error_message'] = 'Error happened: ' + self.request.get('errtext') + elif error == 'configsaved': + template_values['error_message'] = 'Configuration Saved' + elif error == 'recentcleared': + template_values['error_message'] = 'Your Recent Downloads List has been Cleared' + + filename = self.request.get('file') + if len(filename) > 1: + template_values['yourfile'] = '''''' % (filename, self.request.get('name'), self.request.get('author')) + + self.response.headers['Content-Type'] = 'text/html' + path = os.path.join(os.path.dirname(__file__), 'index.html') + + self.response.out.write(template.render(path, template_values)) + else: + logging.debug(users.create_login_url('/')) + url = users.create_login_url(self.request.uri) + template_values = {'login_url' : url, 'authorized': False} + path = os.path.join(os.path.dirname(__file__), 'index.html') + self.response.out.write(template.render(path, template_values)) + + +class EditConfigServer(UserConfigServer): + def get(self): + self.post() + + def post(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + template_values = {'nickname' : user.nickname(), 'authorized': True} + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l: + uconfig=l[0] + else: + uconfig=None + + if self.request.get('update'): + if uconfig is None: + uconfig = UserConfig() + uconfig.user = user + uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case. + uconfig.put() + try: + config = self.getUserConfig(user) + self.redirect("/?error=configsaved") + except Exception, e: + logging.info("Saved Config Failed:%s"%e) + self.redirect("/?error=custom&errtext=%s"%urlEscape(str(e))) + else: # not update, assume display for edit + if uconfig is not None and uconfig.config: + config = uconfig.config + else: + configfile = open("example.ini","rb") + config = configfile.read() + configfile.close() + template_values['config'] = config + + configfile = open("defaults.ini","rb") + config = configfile.read() + configfile.close() + template_values['defaultsini'] = config + + path = os.path.join(os.path.dirname(__file__), 'editconfig.html') + self.response.headers['Content-Type'] = 'text/html' + self.response.out.write(template.render(path, template_values)) + + +class FileServer(webapp.RequestHandler): + + def get(self): + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + return + + try: + download = getDownloadMeta(id=fileId) + + name = download.name.encode('utf-8') + + logging.info("Serving file: %s" % name) + + if name.endswith('.epub'): + self.response.headers['Content-Type'] = 'application/epub+zip' + elif name.endswith('.html'): + self.response.headers['Content-Type'] = 'text/html' + elif name.endswith('.txt'): + self.response.headers['Content-Type'] = 'text/plain' + elif name.endswith('.mobi'): + self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' + elif name.endswith('.zip'): + self.response.headers['Content-Type'] = 'application/zip' + else: + self.response.headers['Content-Type'] = 'application/octet-stream' + + self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name + + data = DownloadData.all().filter("download =", download).order("index") + # epubs are all already compressed. + # Each chunk is compress individually to avoid having + # to hold the whole in memory just for the + # compress/uncompress + if download.format != 'epub': + def dc(data): + try: + return zlib.decompress(data) + # if error, assume it's a chunk from before we started compessing. + except zlib.error: + return data + else: + def dc(data): + return data + + for datum in data: + self.response.out.write(dc(datum.blob)) + + except Exception, e: + fic = DownloadMeta() + fic.failure = unicode(e) + + template_values = dict(fic = fic, + #nickname = user.nickname(), + #escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + +class FileStatusServer(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + + escaped_url=False + + try: + download = getDownloadMeta(id=fileId) + + if download: + logging.info("Status url: %s" % download.url) + if download.completed and download.format=='epub': + escaped_url = urlEscape(self.request.host_url+"/file/"+download.name+"."+download.format+"?id="+fileId+"&fake=file."+download.format) + else: + download = DownloadMeta() + download.failure = "Download not found" + + except Exception, e: + download = DownloadMeta() + download.failure = unicode(e) + + template_values = dict(fic = download, + nickname = user.nickname(), + escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + +class ClearRecentServer(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + logging.info("Clearing Recent List for user: "+user.nickname()) + q = DownloadMeta.all() + q.filter('user =', user) + num=0 + while( True ): + results = q.fetch(100) + if results: + for d in results: + d.delete() + for c in d.data_chunks: + c.delete() + num = num + 1 + logging.debug('Delete '+d.url) + else: + break + logging.info('Deleted %d instances download.' % num) + self.redirect("/?error=recentcleared") + +class RecentFilesServer(webapp.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + q = DownloadMeta.all() + q.filter('user =', user).order('-date') + fics = q.fetch(100) + logging.info("Recent fetched %d downloads for user %s."%(len(fics),user.nickname())) + + for fic in fics: + if fic.completed and fic.format == 'epub': + fic.escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+str(fic.key())+"&fake=file."+fic.format) + + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + +class FanfictionDownloader(UserConfigServer): + def get(self): + self.post() + + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + format = self.request.get('format') + url = self.request.get('url') + + if not url or url.strip() == "": + self.redirect('/') + return + + logging.info("Queuing Download: %s" % url) + login = self.request.get('login') + password = self.request.get('password') + is_adult = self.request.get('is_adult') == "on" + + # use existing record if available. Fetched/Created before + # the adapter can normalize the URL in case we need to record + # an exception. + download = getDownloadMeta(url=url,user=user,format=format,new=True) + + adapter = None + try: + try: + config = self.getUserConfig(user) + except Exception, e: + self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e))) + return + + adapter = adapters.getAdapter(config,url) + logging.info('Created an adaper: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password + adapter.is_adult=is_adult + + ## This scrapes the metadata, which will be + ## duplicated in the queue task, but it + ## detects bad URLs, bad login, bad story, etc + ## without waiting for the queue. So I think + ## it's worth the double up. Could maybe save + ## it all in the download object someday. + story = adapter.getStoryMetadataOnly() + + ## Fetch again using normalized story URL. The one + ## fetched/created above, if different, will not be saved. + download = getDownloadMeta(url=story.getMetadata('storyUrl'), + user=user,format=format,new=True) + + download.title = story.getMetadata('title') + download.author = story.getMetadata('author') + download.url = story.getMetadata('storyUrl') + download.put() + + taskqueue.add(url='/fdowntask', + queue_name="download", + params={'id':str(download.key()), + 'format':format, + 'url':download.url, + 'login':login, + 'password':password, + 'user':user.email(), + 'is_adult':is_adult}) + + logging.info("enqueued download key: " + str(download.key())) + + except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e: + download.failure = unicode(e) + download.put() + logging.info(unicode(e)) + is_login= ( isinstance(e, exceptions.FailedToLogin) ) + template_values = dict(nickname = user.nickname(), + url = url, + format = format, + site = adapter.getSiteDomain(), + fic = download, + is_login=is_login, + ) + # thewriterscoffeeshop.com can do adult check *and* user required. + if isinstance(e,exceptions.AdultCheckRequired): + template_values['login']=login + template_values['password']=password + + path = os.path.join(os.path.dirname(__file__), 'login.html') + self.response.out.write(template.render(path, template_values)) + return + except (exceptions.InvalidStoryURL,exceptions.UnknownSite,exceptions.StoryDoesNotExist), e: + logging.warn(unicode(e)) + download.failure = unicode(e) + download.put() + except Exception, e: + logging.error("Failure Queuing Download: url:%s" % url) + logging.exception(e) + download.failure = unicode(e) + download.put() + + self.redirect('/status?id='+str(download.key())) + + return + + +class FanfictionDownloaderTask(UserConfigServer): + + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + fileId = self.request.get('id') + # User object can't pass, just email address + user = users.User(self.request.get('user')) + format = self.request.get('format') + url = self.request.get('url') + login = self.request.get('login') + password = self.request.get('password') + is_adult = self.request.get('is_adult') + + logging.info("Downloading: " + url + " for user: "+user.nickname()) + logging.info("ID: " + fileId) + + adapter = None + writerClass = None + + # use existing record if available. + # fileId should have record from /fdown. + download = getDownloadMeta(id=fileId,url=url,user=user,format=format,new=True) + for c in download.data_chunks: + c.delete() + download.put() + + logging.info('Creating adapter...') + + try: + config = self.getUserConfig(user) + adapter = adapters.getAdapter(config,url) + + logging.info('Created an adapter: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password + adapter.is_adult=is_adult + + # adapter.getStory() is what does all the heavy lifting. + # adapter.getStoryMetadataOnly() only fetches enough to + # get metadata. writer.writeStory() will call + # adapter.getStory(), too. + writer = writers.getWriter(format,config,adapter) + download.name = writer.getOutputFileName() + #logging.debug('output_filename:'+writer.getConfig('output_filename')) + logging.debug('getOutputFileName:'+writer.getOutputFileName()) + download.title = adapter.getStory().getMetadata('title') + download.author = adapter.getStory().getMetadata('author') + download.url = adapter.getStory().getMetadata('storyUrl') + download.put() + + outbuffer = StringIO() + writer.writeStory(outbuffer) + data = outbuffer.getvalue() + outbuffer.close() + del outbuffer + #del writer.adapter + #del writer.story + del writer + #del adapter.story + del adapter + + # epubs are all already compressed. Each chunk is + # compressed individually to avoid having to hold the + # whole in memory just for the compress/uncompress. + if format != 'epub': + def c(data): + return zlib.compress(data) + else: + def c(data): + return data + + index=0 + while( len(data) > 0 ): + DownloadData(download=download, + index=index, + blob=c(data[:1000000])).put() + index += 1 + data = data[1000000:] + download.completed=True + download.put() + + logging.info("Download finished OK") + del data + + except Exception, e: + logging.exception(e) + download.failure = unicode(e) + download.put() + return + + return + +def getDownloadMeta(id=None,url=None,user=None,format=None,new=False): + ## try to get download rec from passed id first. then fall back + ## to user/url/format + download = None + if id: + try: + download = db.get(db.Key(id)) + logging.info("DownloadMeta found by ID:"+id) + except: + pass + + if not download and url and user and format: + try: + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is not None and len(q) > 0 ): + logging.debug("DownloadMeta found by user:%s url:%s format:%s"%(user,url,format)) + download = q[0] + except: + pass + + if new: + # NOT clearing existing chunks here, because this record may + # never be saved. + if not download: + logging.debug("New DownloadMeta") + download = DownloadMeta() + + download.completed=False + download.failure=None + download.date=datetime.datetime.now() + + download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID']) + if user: + download.user = user + if url: + download.url = url + if format: + download.format = format + + return download + +def toPercentDecimal(match): + "Return the %decimal number for the character for url escaping" + s = match.group(1) + return "%%%02x" % ord(s) + +def urlEscape(data): + "Escape text, including unicode, for use in URLs" + p = re.compile(r'([^\w])') + return p.sub(toPercentDecimal, data.encode("utf-8")) + +def main(): + application = webapp.WSGIApplication([('/', MainHandler), + ('/fdowntask', FanfictionDownloaderTask), + ('/fdown', FanfictionDownloader), + (r'/file.*', FileServer), + ('/status', FileStatusServer), + ('/recent', RecentFilesServer), + ('/editconfig', EditConfigServer), + ('/clearrecent', ClearRecentServer), + ], + debug=False) + util.run_wsgi_app(application) + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.DEBUG) + main() diff --git a/queue.yaml b/queue.yaml new file mode 100644 index 00000000..77c4e83b --- /dev/null +++ b/queue.yaml @@ -0,0 +1,7 @@ +queue: +- name: default + rate: 1/s +- name: download + rate: 10/s + retry_parameters: + task_retry_limit: 2 diff --git a/readme.txt b/readme.txt new file mode 100644 index 00000000..a6e59751 --- /dev/null +++ b/readme.txt @@ -0,0 +1,14 @@ +To use, do: + +python downloader.py [-f (epub|html|txt)] + +Default format is epub. + +Eg: + +python downloader.py http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo + +Do 'python downloader.py -h' for more options. + +This tool uses Python 2.5.2, but should work with newer versions of Python. + diff --git a/recent.html b/recent.html new file mode 100644 index 00000000..4311b801 --- /dev/null +++ b/recent.html @@ -0,0 +1,92 @@ + + + + + Fanfiction Downloader (fanfiction.net, fanficauthors, fictionalley, ficwad to epub and HTML) + + + + +
    +

    + FanFiction Downloader +

    + + + + + {{yourfile}} + + +
    +
    +

    Hi, {{ nickname }}! These are the fanfics you've recently requested.

    +

    Clear your Recent Downloads List

    +

    Please Help Test New Version

    +

    + We have a new, more efficient, version of the system up + for testing. Please try the + Testing + Version here. +

    +
    +
    + +
    + {% for fic in fics %} +

    + {% if fic.completed %} + Download {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) + {% endif %} + {% if not fic.completed and not fic.failure %} + Processing {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) + {% endif %} + {% if fic.failure %} + {{ fic.failure }} + {% endif %} + Source + {% if fic.completed and fic.escaped_url %} + Convert + {% endif %} +

    + {% endfor %} +
    + + + + +
    + + diff --git a/settings.py b/settings.py new file mode 100644 index 00000000..1e2a09d2 --- /dev/null +++ b/settings.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## Just to shut up the appengine warning about "You are using the +## default Django version (0.96). The default Django version will +## change in an App Engine release in the near future. Please call +## use_library() to explicitly select a Django version. For more +## information see +## http://code.google.com/appengine/docs/python/tools/libraries.html#Django" + +pass diff --git a/simplejson/__init__.py b/simplejson/__init__.py new file mode 100644 index 00000000..d5b4d399 --- /dev/null +++ b/simplejson/__init__.py @@ -0,0 +1,318 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print json.dumps("\"foo\bar") + "\"foo\bar" + >>> print json.dumps(u'\u1234') + "\u1234" + >>> print json.dumps('\\') + "\\" + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> import decimal + >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) +""" +__version__ = '2.0.9' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +__author__ = 'Bob Ippolito ' + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c new file mode 100644 index 00000000..23b5f4a6 --- /dev/null +++ b/simplejson/_speedups.c @@ -0,0 +1,2329 @@ +#include "Python.h" +#include "structmember.h" +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#ifndef Py_IS_FINITE +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *skipkeys; + int fast_encode; + int allow_nan; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {NULL} +}; + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +void init_speedups(void); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *const); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()); + return 1; + return 0; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t max_output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + max_output_size = 2 + (input_chars * MAX_EXPANSION); + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + Py_ssize_t new_output_size = output_size * 2; + /* This is an upper bound */ + if (new_output_size > max_output_size) { + new_output_size = max_output_size; + } + /* Make sure that the output size changed before resizing */ + if (new_output_size != output_size) { + output_size = new_output_size; + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (!S_CHAR(c)) { + /* If we have to escape something, scan the string for unicode */ + Py_ssize_t j; + for (j = i; j < input_chars; j++) { + c = (Py_UNICODE)(unsigned char)input_str[j]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + } + break; + } + } + + if (i == input_chars) { + /* Input is already ASCII */ + output_size = 2 + input_chars; + } + else { + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + } + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + output[0] = '"'; + + /* We know that everything up to i is ASCII already */ + chars = i + 1; + memcpy(&output[1], input_str, i); + + for (; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use the Python function simplejson.decoder.errmsg to raise a nice + looking ValueError exception */ + static PyObject *errmsg_fn = NULL; + PyObject *pymsg; + if (errmsg_fn == NULL) { + PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); + if (decoder == NULL) + return; + errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + Py_DECREF(decoder); + if (errmsg_fn == NULL) + return; + } + pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (pymsg) { + PyErr_SetObject(PyExc_ValueError, pymsg); + Py_DECREF(pymsg); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + if (c > 0x7f) { + has_unicode = 1; + } + if (has_unicode) { + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyUnicode_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); + PyObject *chunks = PyList_New(0); + if (chunks == NULL) { + goto bail; + } + if (end < 0 || len <= end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + PyObject *chunk = NULL; + for (next = end; next < len; next++) { + c = buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + chunk = PyUnicode_FromUnicode(&buf[end], next - end); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg("Unterminated string starting at", pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg("Invalid \\escape", pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + Py_UNICODE digit = buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } +#ifdef Py_UNICODE_WIDE + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + Py_UNICODE c2 = 0; + if (end + 6 >= len) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + if (buf[next++] != '\\' || buf[next++] != 'u') { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + end += 6; + /* Decode 4 hex digits */ + for (; next < end; next++) { + c2 <<= 4; + Py_UNICODE digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + raise_errmsg("Unpaired high surrogate", pystr, end - 5); + goto bail; + } + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + else if ((c & 0xfc00) == 0xdc00) { + raise_errmsg("Unpaired low surrogate", pystr, end - 5); + goto bail; + } +#endif + } + chunk = PyUnicode_FromUnicode(&c, 1); + if (chunk == NULL) { + goto bail; + } + if (PyList_Append(chunks, chunk)) { + Py_DECREF(chunk); + goto bail; + } + Py_DECREF(chunk); + } + + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_DECREF(chunks); + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } + else if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return 0; +} + +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = PyString_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyDict_New(); + PyObject *key = NULL; + int strict = PyObject_IsTrue(s->strict); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + while (idx <= end_idx) { + /* read key */ + if (str[idx] != '"') { + raise_errmsg("Expecting property name", pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg("Expecting : delimiter", pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyDict_SetItem(rval, key, val) == -1) + goto bail; + + Py_CLEAR(key); + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(key); + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + while (idx <= end_idx) { + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg("Expecting , delimiter", pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + raise_errmsg("Expecting object", pystr, end_idx); + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = PyString_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += PyString_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + idx++; + if (idx > end_idx) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyUnicode_FromUnicode(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + rval = PyFloat_FromString(numstr, NULL); + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_str(pystr, idx + 1, + PyString_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_str(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); + Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + if (idx >= length) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + return scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + case '{': + /* object */ + return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + case '[': + /* array */ + return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + return Py_None; + } + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + return Py_True; + } + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + return Py_False; + } + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + return _parse_constant(s, "NaN", idx, next_idx_ptr); + } + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + return _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + break; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + return _match_number_unicode(s, pystr, idx, next_idx_ptr); +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } + else if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + /* PyString_AS_STRING is used on encoding */ + s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == Py_None) { + Py_DECREF(Py_None); + s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + } + else if (PyUnicode_Check(s->encoding)) { + PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); + Py_DECREF(s->encoding); + s->encoding = tmp; + } + if (s->encoding == NULL || !PyString_Check(s->encoding)) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->sort_keys = NULL; + s->skipkeys = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + + PyEncoderObject *s; + PyObject *allow_nan; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, + &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + return -1; + + Py_INCREF(s->markers); + Py_INCREF(s->defaultfn); + Py_INCREF(s->encoder); + Py_INCREF(s->indent); + Py_INCREF(s->key_separator); + Py_INCREF(s->item_separator); + Py_INCREF(s->sort_keys); + Py_INCREF(s->skipkeys); + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + PyObject *rval; + Py_ssize_t indent_level; + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + rval = PyList_New(0); + if (rval == NULL) + return NULL; + if (encoder_listencode_obj(s, rval, obj, indent_level)) { + Py_DECREF(rval); + return NULL; + } + return rval; +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = PyString_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = PyString_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = PyString_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (i > 0) { + return PyString_FromString("Infinity"); + } + else if (i < 0) { + return PyString_FromString("-Infinity"); + } + else { + return PyString_FromString("NaN"); + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_list_append(PyObject *lst, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = PyList_Append(lst, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + PyObject *newobj; + int rv; + + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr == NULL) + return -1; + return _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded == NULL) + return -1; + return _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + return encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + return encoder_listencode_dict(s, rval, obj, indent_level); + } + else { + PyObject *ident = NULL; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + return -1; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + return -1; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + return -1; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + return -1; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + return -1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + return -1; + } + Py_XDECREF(ident); + } + return rv; + } +} + +static int +encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term, rval is a PyList */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *key, *value; + Py_ssize_t pos; + int skipkeys; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = PyString_InternFromString("{"); + close_dict = PyString_InternFromString("}"); + empty_dict = PyString_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return PyList_Append(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (PyList_Append(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + /* TODO: C speedup not implemented for sort_keys */ + + pos = 0; + skipkeys = PyObject_IsTrue(s->skipkeys); + idx = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + PyObject *encoded; + + if (PyString_Check(key) || PyUnicode_Check(key)) { + Py_INCREF(key); + kstr = key; + } + else if (PyFloat_Check(key)) { + kstr = encoder_encode_float(s, key); + if (kstr == NULL) + goto bail; + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); + if (kstr == NULL) + goto bail; + } + else if (key == Py_True || key == Py_False || key == Py_None) { + kstr = _encoded_const(key); + if (kstr == NULL) + goto bail; + } + else if (skipkeys) { + continue; + } + else { + /* TODO: include repr of key */ + PyErr_SetString(PyExc_ValueError, "keys must be a string"); + goto bail; + } + + if (idx) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyList_Append(rval, encoded)) { + Py_DECREF(encoded); + goto bail; + } + Py_DECREF(encoded); + if (PyList_Append(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + idx += 1; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term, rval is a PyList */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *s_fast = NULL; + Py_ssize_t num_items; + PyObject **seq_items; + Py_ssize_t i; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = PyString_InternFromString("["); + close_array = PyString_InternFromString("]"); + empty_array = PyString_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + if (s_fast == NULL) + return -1; + num_items = PySequence_Fast_GET_SIZE(s_fast); + if (num_items == 0) { + Py_DECREF(s_fast); + return PyList_Append(rval, empty_array); + } + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + seq_items = PySequence_Fast_ITEMS(s_fast); + if (PyList_Append(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + for (i = 0; i < num_items; i++) { + PyObject *obj = seq_items[i]; + if (i) { + if (PyList_Append(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + } + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (' ' * (_indent * _current_indent_level)) + */ + } + if (PyList_Append(rval, close_array)) + goto bail; + Py_DECREF(s_fast); + return 0; + +bail: + Py_XDECREF(ident); + Py_DECREF(s_fast); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->sort_keys); + Py_VISIT(s->skipkeys); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->skipkeys); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyObject_HEAD_INIT(NULL) + 0, /* tp_internal */ + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +void +init_speedups(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); +} diff --git a/simplejson/decoder.py b/simplejson/decoder.py new file mode 100644 index 00000000..b769ea48 --- /dev/null +++ b/simplejson/decoder.py @@ -0,0 +1,354 @@ +"""Implementation of JSONDecoder +""" +import re +import sys +import struct + +from simplejson.scanner import make_scanner +try: + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _speedups + lineno, colno = linecol(doc, pos) + if end is None: + #fmt = '{0}: line {1} column {2} (char {3})' + #return fmt.format(msg, lineno, colno, pos) + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" % (terminator,) + #msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + # Unicode escape sequence + esc = s[end + 1:end + 5] + next_end = end + 5 + if len(esc) != 4: + msg = "Invalid \\uXXXX escape" + raise ValueError(errmsg(msg, s, end)) + uni = int(esc, 16) + # Check for surrogate pair on UCS-4 systems + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError(errmsg(msg, s, end)) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError(errmsg(msg, s, end)) + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + char = unichr(uni) + end = next_end + # Append the unescaped character + _append(char) + return u''.join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = {} + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + _append = values.append + while True: + try: + value, end = scan_once(s, end) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, idx=0): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + try: + obj, end = self.scan_once(s, idx) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end diff --git a/simplejson/encoder.py b/simplejson/encoder.py new file mode 100644 index 00000000..cf582903 --- /dev/null +++ b/simplejson/encoder.py @@ -0,0 +1,440 @@ +"""Implementation of JSONEncoder +""" +import re + +try: + from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii +except ImportError: + c_encode_basestring_ascii = None +try: + from simplejson._speedups import make_encoder as c_make_encoder +except ImportError: + c_make_encoder = None + +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(0x20): + #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +# Assume this produces an infinity on all machines (probably not guaranteed) +INFINITY = float('1e66666') +FLOAT_REPR = repr + +def encode_basestring(s): + """Return a JSON representation of a Python string + + """ + def replace(match): + return ESCAPE_DCT[match.group(0)] + return '"' + ESCAPE.sub(replace, s) + '"' + + +def py_encode_basestring_ascii(s): + """Return an ASCII-only JSON representation of a Python string + + """ + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + #return '\\u{0:04x}'.format(n) + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) + else: + items = dct.iteritems() + for key, value in items: + if isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, (int, long)): + key = str(key) + elif _skipkeys: + continue + else: + raise TypeError("key " + repr(key) + " is not a string") + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, basestring): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] + + return _iterencode diff --git a/simplejson/scanner.py b/simplejson/scanner.py new file mode 100644 index 00000000..adbc6ec9 --- /dev/null +++ b/simplejson/scanner.py @@ -0,0 +1,65 @@ +"""JSON token scanner +""" +import re +try: + from simplejson._speedups import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None + +__all__ = ['make_scanner'] + +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) + +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration + + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + + return _scan_once + +make_scanner = c_make_scanner or py_make_scanner diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py new file mode 100644 index 00000000..17c97963 --- /dev/null +++ b/simplejson/tests/__init__.py @@ -0,0 +1,23 @@ +import unittest +import doctest + +def additional_tests(): + import simplejson + import simplejson.encoder + import simplejson.decoder + suite = unittest.TestSuite() + for mod in (simplejson, simplejson.encoder, simplejson.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(doctest.DocFileSuite('../../index.rst')) + return suite + +def main(): + suite = additional_tests() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + import os + import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/simplejson/tests/test_check_circular.py b/simplejson/tests/test_check_circular.py new file mode 100644 index 00000000..af6463d6 --- /dev/null +++ b/simplejson/tests/test_check_circular.py @@ -0,0 +1,30 @@ +from unittest import TestCase +import simplejson as json + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(TestCase): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, json.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, json.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, json.dumps, dct2) + + def test_circular_default(self): + json.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, json.dumps, [set()]) + + def test_circular_off_default(self): + json.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, json.dumps, [set()], check_circular=False) diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py new file mode 100644 index 00000000..1cd701d4 --- /dev/null +++ b/simplejson/tests/test_decode.py @@ -0,0 +1,22 @@ +import decimal +from unittest import TestCase + +import simplejson as json + +class TestDecode(TestCase): + def test_decimal(self): + rval = json.loads('1.1', parse_float=decimal.Decimal) + self.assert_(isinstance(rval, decimal.Decimal)) + self.assertEquals(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = json.loads('1', parse_int=float) + self.assert_(isinstance(rval, float)) + self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = json.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"}) diff --git a/simplejson/tests/test_default.py b/simplejson/tests/test_default.py new file mode 100644 index 00000000..139e42bf --- /dev/null +++ b/simplejson/tests/test_default.py @@ -0,0 +1,9 @@ +from unittest import TestCase + +import simplejson as json + +class TestDefault(TestCase): + def test_default(self): + self.assertEquals( + json.dumps(type, default=repr), + json.dumps(repr(type))) diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py new file mode 100644 index 00000000..4de37cf4 --- /dev/null +++ b/simplejson/tests/test_dump.py @@ -0,0 +1,21 @@ +from unittest import TestCase +from cStringIO import StringIO + +import simplejson as json + +class TestDump(TestCase): + def test_dump(self): + sio = StringIO() + json.dump({}, sio) + self.assertEquals(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEquals(json.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEquals(json.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEquals(json.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}') diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 index 00000000..7128495f --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,38 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + if not simplejson.encoder.c_encode_basestring_ascii: + return + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py new file mode 100644 index 00000000..002eea08 --- /dev/null +++ b/simplejson/tests/test_fail.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# Fri Dec 30 18:57:26 2005 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '["Illegal backslash escape: \\\'"]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(TestCase): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + json.loads(doc) + continue + try: + json.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py new file mode 100644 index 00000000..1a2b98a2 --- /dev/null +++ b/simplejson/tests/test_float.py @@ -0,0 +1,15 @@ +import math +from unittest import TestCase + +import simplejson as json + +class TestFloat(TestCase): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]: + self.assertEquals(float(json.dumps(num)), num) + self.assertEquals(json.loads(json.dumps(num)), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEquals(json.dumps(num), str(num)) + self.assertEquals(int(json.dumps(num)), num) diff --git a/simplejson/tests/test_indent.py b/simplejson/tests/test_indent.py new file mode 100644 index 00000000..66e19b9e --- /dev/null +++ b/simplejson/tests/test_indent.py @@ -0,0 +1,41 @@ +from unittest import TestCase + +import simplejson as json +import textwrap + +class TestIndent(TestCase): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_pass1.py b/simplejson/tests/test_pass1.py new file mode 100644 index 00000000..c3d6302d --- /dev/null +++ b/simplejson/tests/test_pass1.py @@ -0,0 +1,76 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E666, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ], + "compact": [1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066 + + +,"rosebud"] +''' + +class TestPass1(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) + try: + json.dumps(res, allow_nan=False) + except ValueError: + pass + else: + self.fail("23456789012E666 should be out of range") diff --git a/simplejson/tests/test_pass2.py b/simplejson/tests/test_pass2.py new file mode 100644 index 00000000..de4ee00b --- /dev/null +++ b/simplejson/tests/test_pass2.py @@ -0,0 +1,14 @@ +from unittest import TestCase +import simplejson as json + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_pass3.py b/simplejson/tests/test_pass3.py new file mode 100644 index 00000000..f591aba9 --- /dev/null +++ b/simplejson/tests/test_pass3.py @@ -0,0 +1,20 @@ +from unittest import TestCase + +import simplejson as json + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + +class TestPass3(TestCase): + def test_parse(self): + # test in/out equivalence and parsing + res = json.loads(JSON) + out = json.dumps(res) + self.assertEquals(res, json.loads(out)) diff --git a/simplejson/tests/test_recursion.py b/simplejson/tests/test_recursion.py new file mode 100644 index 00000000..97422a66 --- /dev/null +++ b/simplejson/tests/test_recursion.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +import simplejson as json + +class JSONTestObject: + pass + + +class RecursiveJSONEncoder(json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return json.JSONEncoder.default(o) + + +class TestRecursion(TestCase): + def test_listrecursion(self): + x = [] + x.append(x) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + json.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + json.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + json.dumps(x) + + def test_defaultrecursion(self): + enc = RecursiveJSONEncoder() + self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 index 00000000..b08dec71 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,111 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson as json +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + if not simplejson.decoder.c_scanstring: + return + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_issue3623(self): + self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + json.encoder.encode_basestring_ascii, "xx\xff") diff --git a/simplejson/tests/test_separators.py b/simplejson/tests/test_separators.py new file mode 100644 index 00000000..8fa0dac6 --- /dev/null +++ b/simplejson/tests/test_separators.py @@ -0,0 +1,42 @@ +import textwrap +from unittest import TestCase + +import simplejson as json + + +class TestSeparators(TestCase): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = json.dumps(h) + d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = json.loads(d1) + h2 = json.loads(d2) + + self.assertEquals(h1, h) + self.assertEquals(h2, h) + self.assertEquals(d2, expect) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py new file mode 100644 index 00000000..6f4384a5 --- /dev/null +++ b/simplejson/tests/test_unicode.py @@ -0,0 +1,64 @@ +from unittest import TestCase + +import simplejson as json + +class TestUnicode(TestCase): + def test_encoding1(self): + encoder = json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEquals(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = json.dumps(u, encoding='utf-8') + js = json.dumps(s, encoding='utf-8') + self.assertEquals(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u) + self.assertEquals(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u]) + self.assertEquals(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps(u, ensure_ascii=False) + self.assertEquals(j, u'"%s"' % (u,)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = json.dumps([u], ensure_ascii=False) + self.assertEquals(j, u'["%s"]' % (u,)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') + self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEquals(json.loads('"' + u + '"'), u) + self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u%04x"' % (i,) + self.assertEquals(json.loads(s), u) + + def test_default_encoding(self): + self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEquals(type(json.loads(u'""')), unicode) + self.assertEquals(type(json.loads(u'"a"')), unicode) + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 00000000..90443317 --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,37 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = simplejson.load(infile) + except ValueError, e: + raise SystemExit(e) + simplejson.dump(obj, outfile, sort_keys=True, indent=4) + outfile.write('\n') + + +if __name__ == '__main__': + main() diff --git a/static/ajax-loader.gif b/static/ajax-loader.gif new file mode 100644 index 0000000000000000000000000000000000000000..f16ebf7cbd4f28620c0daba2f4a36ae0196b3d4c GIT binary patch literal 10819 zcmb`NXHZjX->;L91QJksO9Fy4X^NnNiVC_BuprxlbVKhX^w84?z4u7CGf5evn_!Tr3?d(NECJ2Pu0AJ)uTvu54bx_-a^t*&`j>8i;TfD`Z)060EAmb_Eg z)wf#RL@#+W)ka%x?pW*}*_fIC+&j2FF}pLjxVO0SWp(}A+xH6{y(=4A-?w-72Szu) zd^`O1{b+b%YyaTK$DxhUsqNjpgDLv%hfiy@)VYJh9~E^o`Bf9b$IM!4PoLaT)mD=~ zFUJ4`006?j5qF#|Ok6F@g*p)Y6Z7*nj+PjJ@F5rmKRHY0xC_AA8$W@FCrO7u?zADA-VZ`x?tC|{EeZ$+PS}L}+>0Lu3YulS{ zT4!^6L+^W9RZs87mhra9j_Iz!%2vK2CI%TW6b5HzImN~dVJ!O2!OQxY?dQ)g-}ZOc zHbx@}1$YJ+?Kz}{t&;P~OkgsY^BS?Vr*pM7Zz+VGy zGfn8J#m3_yoX+b#a zDpjI8oCMB@qx33`Xw_VU0cm=BragGsS+$bevLq%hnO&KCcBH=3mDfvuPp-Rdj{6&R zHa8Kz$xJs|?B}2IQQUsp?)jr!!0=#iu`;qPU^}gDYqibg#q;S@SNLF|EZL!#x_2db)8GULs#w(5+~toZv=1YdVas3W&uFU4AzBgx@vEBz#;l z-pu9tZ)?LyrFGoG)7w(&W-(&|Hk`4;RUNspd=I*5q2ess%;1CmgIS<4v~VcVWaPQH zANrnVeu}B%aK0|an0w{9^-)|O9DS)D9?uwbcai9O^ScC@pw(ezOGyY?wF-n!Sm#Ke zZy3)y(6~018z!}0`0yn?_&rY+<>YT(f~^#{5m)wlacRx^dP!x6)JAFbi0ww11o>n& z{9EsPXXod0%vV-{ohI1IX%fgQW4U^o;-lR1D2o$i=iuqZ>VxzPB9Jhr87P6lwqYLL zzAmxh0lsOW7;-|8F98?s9psNE+IvPo(qi06Ws!-wc9~e{KMO$+^=Bn?AdpZ9E6B?F z1A=K_u-m%NuW@h!26eF-W1SeBa_##tKQP?qva-5}9POT49d`BGnqS=b_~meVq;S`! zgb_=kIvvv?GI&6n9GF3ujY-D@NP@D-pFf91Cy^u1oI&?!IJb}(xK)nDnXbl&f}s>; z;#;8si@QnEHk&J3gSNcYVjbRBWRIc( zvKzVb5N~!MFj(-4jL_9=*-X>+JQKFctTLloj)kA> z9G&XPDujY_f!g_$c1eN~62X^~Q5s<^MdWux&z|6ZtNq_x%Nv)pIs% z<@aqc7|CWG#B)lO27spFDxV{ zdYr!DyZ`O|uSUbIfsI>M$}+#ezuJX`W_;eY)V^^4y_63+2J*_W0U% zepJ^$z`V=Il-pRMH-O)ERX<>)`;978*j-52{u7^zX^6D`^EA)}O5Ge(0N%+LbVWZ3 z7{O(%Xc&Ih8OP#|MC*!1{kR+_8YV)k6N#2`rwV(Ym*O{JWIvY#0x~KwnK+*jILrpH zgsEGa3u@KiQ+VRWO+tOg#nRJmpN$nKR4-XirTUoGoH4@A*S|=2kmW7Na^0?<&XIR7 zOh~8kdrdpif%1ohd7+!_eVX}T-u z5(Z#qmf?p!n&=XXnBierie$(vu4-qu#{&x`ScFc29Zax*vO;;4wG^3Ly>pi2dQBUe zbn0P&ztrG*zPEG!kCk45uPjJ2c}{EBa&mFKYwyEbh)Y9wN5z&wFSW8F5PKN_dY3b9 z;`7C$R?X?Lu(C_}IaUiLI}_}`M=#WHpaA=gENGdp>e$%g{UTdV>#4Ev@4p1+=Eh|Y zro1mdCCr%)mL1?cSM=v5Lq9Hsj%wJ?9j;^ro=bC<80f*9MQpkSM;v zL4@!y?`T^T#xa7`-zO=YObN=sCp&rsK!P*uQ*+WYZ3q>q-g%jJaSkZK)98~=ryUBy z2k%0_Pmlq!a)q!m4GqDWdi$e#2Zx7ykyeufGs7^bb-%U6(h%L%cXMWRWg-Z<_=RaD z#O82mXO+I86OmbWX%C*m%p0DiNZOz0kJ1LV0C>PqrE_SsDg%>{>IpYgh~2Rqg2z5A zl6$I}Fc>=JX*Y)`iIH17<*mx=<%c2iX8DHQdzM=1%E1)FU8;FN(#6;jGWN^Mz6`Zg z&<)Aqf{`qahF9kBr`Ecq!X-Ie#WAW3V6YsIbQW-t+~{$LdQAb4@DSW*OFC&20rFy& z2lu6B?cSk=GAiD6KiHjZ_Y_mQlBneTpfipWpo2P4u;}B|Sh(zvl(O1bEWwtawu#vs zDoxa*pm3GTgXO|jcs36RP#Groh?);nb#dXA(O`Z1h~(A`%ekL<(?k`t@zKbxR*vAa ze{vyYUvs+mw~hW<#_t);DWKTB-sc_>zRKTrezuR!80~y^%k=lqqYE7S#8-7r8!^k* zu@c*hu!cZ3ZM2|$xRF&72sAaU>RIVf`2g2N9%~6ACNs@m=ChA#32CY?oRJsOQZWma zc%1tn;!=!N3wm^FY!rGnMkz1+0{;Ffh|Fz;WC!gr9iD1 zMbiLDHYx$kZmpCK5pN38^Ko=!}~{zBHD+P|(rL2v)lL1JiZcvF1V8IYn36ul$2* zPXmQA!6NM!`ns)A&3bQWMdGq~84qeH)gynuzVDvLTL$P*=d`W^bhACum14kqbeDh~ zp_|ifzbq4R#1?r$S+!ur`v(`>1hn)7e!bfAk@|=bIq{*e2U%I=tP_;ClNTT5apGYplxKjaubZEPcYqHo($AKNbHXsNM!Sbn zpuPlK5683&f*%7Glan49m06IMkdz#S^YTh8!&k>)Q5LLLGDaR42)y$IEGJ~K4n6_P zAQODB*FVbU)6jIUwO{nq^e7bh(SKoaEM{_GXoP8VZhQJOYKy0duT_T$K{jZrJ6 zfw0{Fa!$S6Q1uq;)Ke8XJcRIsl`4m)RxJ=gjG{p(&pB06gdp>A&;ucY1u;hA@7ub& z=)8wX@>Vws`(G7+)8uU?3}IYiq!ez$5SjI=<^Du*7r}U{i|A0&xq$EHhYH0#xytqD zzomRr?TY89xoC~YI5bFd*^bjJG7If(LTG|fBTfvJ%7Y=zzH|H1k()r@ou!h> z^c2FBs_5;*M>-Ul@f1okzm#+hBjTc!zu%o>fp8XrgZ zb5XdTUO|K~2l0yP-o+uH3!TxU&-#3XzQP71v?a&Cg6fBP7-@|Vyo7}s6H2Sq;U2fEjE zkDvm0ep{%LSKY)_pu9H|@MIe5iAgYjPT~W35#u0~vye{i^DsWa#1<&+X<|=^;C5lQ zvC7R3vpc9)xY6LLX${jrkI!8K%G_r?vWb#kb?{IwdLZh)@Jt)k!<`T;XvQ36!|x(V zi?#Ip6kOet_Q75*--oAbZC)NW6`Yh@Sd?4ao}73o;S*{qpj zdE1rZ@Zr1()!az|g|P~%@v5CXO^{^wIxodG^0?|%->BW?r@++ABGReRtzteY-tnA{ zqd+k@7@MV1m^)G@97TuKCtcQDW)hApI2*O&|3fM@EF2k?v=R7oyFjHyjE2ZICx8BC z?*5$vD%_%g2AuT}riLuc< z#TDg;X0vJ@E_Km@`d`2|0+puvSj8QxKo3Mhz5?&)bGr2ku-sVsc$fL%W%-zkrwSl< zHeNm$8jpY2H@p-tP+oyKvQkx(#ybswbIwySsZ2k~MU_I6Ni5HdOqI@bAp~`ma%jS* ztH6w0g7yo-tx|--@k4~tnBoOvqrN16SW zgW4D&i&oMCH@wLn4sL>_qU_#o5H|HKzTHwb0SPzB<=lHsSRr_%;GihDqkEyKoD` zNyxD^)TZOfkwRjILv>ZLWU$1oV|{xSc@Yvw_(_QQ_@3CkI0;TezVVX)6oq#;Ew$m0 z=wiI2DSrUtSfM4bC4hdtRR%JpNSTapCqPq(L~#UJK=Y{Zsyo(kuYdH+TQb>O(< zlqymlin{i7`BlJ$PUA-{ov{8{`3uIqUq8R&tlRl^UihlR5|EJ3kg^z+A^@#CYa! zQj!Ntzarvlw}!R_4x<+Q!1HOj#=QEugnA$Yn--geg{2oX17fHD`T@uwL=hO5vemwv zJg>L%GEK5UEeB^VZYHSm&{;AO{9#w|p_Nwa8%ufn0T?e{l0KsD{m8u^$PoMRFuGvg z6TWU7NL!IC%8XH$4Ek7h#N%{^M-Nyur|B`4J52DfD&f4QR9u=b&sS>JR<6HkIi!Kz zi2hR4$qW71Y*l)k+c|f%QPjA1?@azVYkdItQJ>N@S!yDA0?M~)vk=tW7X*wJ+SWaP zFHTE(KDs-)c(nXhvs!U|P<_IALv`qAO7q3$wW8oMO_*o|du}T5T|YvpA{SWFxZTmY z?Jdd7pn=ANeD{7x5FPYjGnwIU(x!T_g81S%{qLIA8R*Bsg3v1$(!^3Qt&p!?N{gm}IRq=E{aKwWGg%&?lZQ5u8=5M0ZLcR} zml9LnJ$$VI$(s4)xf(Xi(pkxBJ^acK&NECv1`US2+@K z!jbFm1H!$Ugs;zF`-Gab#;#&WcaE2pH}SUSP_j411qlldfsuT?ph3a0;kYPrG^<-! zLVyoLN@{$P&*?08+pvf{EB~-!R7O$^-aUfi?Gj?Ij{6TvTu7|te^AoX!-VXP3x?2` zMn?OyLt)*cGvh_E(=#Leb45doOP@k*yW#8ojc!A`i&NQ!$hFVV)IruEVXA~|s)~#K zW|#=0))OED2eeX%PM9Du;|2fYD2#s`MN-vFJYgeoQSM1` z-njF^horN>V0+ImOX?}2_jzu>o{HU1d|g z@gsXTU!(Y5E4HPv-`7phyJOMpBzEqK!^h9ymf{MNLhbC$zx>ZKhm!3~hyhk0k1s}S zdg)~NTVGDrh^g{`rI4mx_sJi!U+o&iL^CVXD&MSOyl-77Z<%x&8f7-PerpP2JlH7v zbpJ+_vn!-^>vm?^^d=kx71ajXz_P@Xyh3Ef0mmd ze3o=dp2bKK#@)s`Qi16&r8%&GD^>`!)PkD}i6r12dWu}LI_n@2;uKNu^1b9`B*AwFL-4nvGGQJ0!6bA#Xq zK^qYUHL0p@cdyhD^vOm_6_EE6p)J0v2^*=DWBG(d11~rQlYT@~FKq~z>q?1KiGkWT z_EXg5!b&dpAX{3*HL7Z5ScC`w#)IyPI1W@)RuEZybk(ZiqV{qoCmM@y4=Z|*?jw6& zXAEEdkgeK=haXram+vgt!jDlU<%oXf= zjd?VxL3|vAs)Qk0@Or6x9Y<{?h(R0+fNcFrwGQF#ku92mTo50adK*xj5Z3TZKq)EY zK2^yuC{jv>?nzt0%}0NftyG?2zoUE5>$rA9$|puKgZgf<2YLVov3$A0VX#>KWBVVD zJazs1{h##nha(mLO+T4zpnt7S2>`XUNnYY0<9(Xq9~MKjgeF7<$9SOvtb;r}VNMCor~N%bb0QN13qnG$C3!*FF;$er z0!O5}1rS1^rm}!Y!W|U4;qUYmtTUqT6 zaah`#bPO5(0u3$PU+F*kzTB*uDjGgpi`^Le=7KK317RoatDME=Z9y39{{aeDGM(5F zAqYGU7qxK~xpx8-v|;e)#Sck*oTWWubg>?@YgQ?nhkGIq(`9`-SnfS;)y_O~Az~5s zY*|a0H7LhiTWEz=pva8sPY%St2SQ~`bwmG@C!F)Nbo@#xs201T)ulbUZP4W8meIzX zR8F>obJYG~=6hrcL2z)6d{AtWhcJmMjBF?wEcQAd3f>LV16tP;71XtSyej&J$4(Mf zc=l}w%dQ+f{( zPSwy0?^F8H6Ee5eE=*xxR~n_f6y_O+UKH%k4Md=|(QroNEIJV>*PmzDZDO4X)q89` z7T()cAci)&+rg5=q9S4FY$qS9l6uq*kx;lP4oHNwe@!5E)^H{d*TF05y$AXVu6<2 zf}g~IJ}?`oRcly2j+Zd2U^f=AFBT}BD6VQeo4OfXmcl4qO!Eq)9{ap6vQ0MX%&=-H z4f!Rp4r6o=XfXl-))vh!6eB01jKwV%8%NbB!eO-o8phH5EREBYE8D5L4aYTO>(rWZ zyUf6=6SR`3m(!zip_})DDzR;F@wq1O26Ed>D#ET6z`=x7f#8-5R-2PALXGls)fxf$ ztrlJY3WK=5OCCq5yO^KVi-&0Ge06oP}- zG~e`&j`7ueaPjq;HdRnRVF}+eEERY`{rQXbF2 zssTI7ci6=Nnu?bOc=Sj9@ z9s{y1Dpm0x((Ve_=f-!=Lc4}7CVV-ZzYCGeEe?cDdad!oRwY%8yc7Xfz$9tEJFo@H zpQV9NV3)$?;CPhy&_w^<;T+26GF>xUZjdG#4(56S4Biu?;zRB9RM;_aTcIVgtr34V z(V)-brMXy40u{8TCubSlYfl9vCQ6bPOs(Kn5^8(#tK#t=%k;u6Bb*8>vAEl7EDXai zb@mAVl!N>fQpD8)pTd`cR=Eh7U=YyFa6HLgmXgBb32c2vD4a4{IaT=a?@p?hqa-~; zU$$8)JxX8<&RSBvoRVk26(F@)l3yiJ0&b<;+l=hgk1DOmXlLYbYinJ9etfyN{+wai z2jA!6`4X2TxuEnPA}Oa78s3HJU(R(V$|Lh?CMGC zsG4<=l)!dze4Vz%ve;wm=p1anG36pPyq+Jz88yliqFE7XkTE6{`ucoKi3KitJigp= z=0@teVqPw2CIj%U`h+KSEC11wKRo&0>g0d5(EHD zUnYI16H34#y~)1*1R^UX%+Ji-BNRr7^&>=`Bp!yKM4S&984(3d4uDzN;qCH$66}Is zdd3DsRE0QsK^V;c!3l_i^>hm61Sm|1-d-d$yBgVj5_6bZQap#pMkcV;(=#n$7}&zt z(p=XGPCj`Jtc`9Au5Rxv?PY@w7eCz&$YG3}y-H*X;s3)Epb4CduP#fSs0NEgsEl5fJ6E7xf#wA@dUw=)&B*)yj_$#CwHVJ#y4QFl!8 z!IB(ny|Gtq^dG?8#5+Pb3Z28Go=Ma6U`$F}St`pA7CyGT@N@svB?=E-<lhO5|E@f1c-g)%4-6as`B$nM3B0$`o#9 zb+F`&Vnn)Q({ktR-3z}2cE4=5a%=Sa8F?)?z%P}0uuS{@+>6Mqh(LKZyVZ@gkMe@P zTvcqr#eDuV6Xvk41Va0!0#!i~O$h+I50y2lIInn~7v@OBx#xfV9KLJ?N^8= zO3Eb##*8pg@t!>BYJUImx^FMgr!0BQl9wOiqZP~tmeZUtt^EAPR+MB*a$2&@l#647 zm6etMdi^-w`K(8phTIhrK5D30>|ExHJ)F*LH{s)GCcloUGk?r`F~>0e}+8&W_xm?DZA%b zl_IKzw|2R%>>e!llzR&Xzx9i_a&XLmgCRZ1C|`d+0x5tL=w?qKhr=T6@x-`De=}%k z7#8N57?K14nQKXb#5 zPjnMOB?9Ci%zQ$BCdKWc9DVvbH8c5qNg%b1FcF z2of7wB}+xM|DUiU^q6qD7DVq$k5Uz#wB_SkoJNlB=r`Ika$cG0DML4TZ%}kAxBAwhl`K(e;;AnS&v+`X@ zdgZn62e02{+TFOabTrw%GV(`LfDG&^m4suntv6y+7hemV3=$h{lt+Q9Reh3pSoM+$ z<=5>HRuCi>3cp}B&0?`*)wj&H6uVuA_73;un>XgqI~bK0hX&;s^z8DPG75CXFZjgy zYUnbu6Yrwo(6iR;!8sSRP)<*XBUu0)4oi0L-$jEjBQtIE9)i4r6;&Da$_sm-X3#P5 zB!noH>N@nMJ`9R=ELe^kD|(H`;nsuk^6p2HiE1ehl+`TU`9zD$K^>CMlM%|nL;^EY zLb46)sfwtNfZT4*<+Fzxc}0EE`nIluet|QEpQo32aJl7hyYSi$HEGv4UkPAzeKXC@ zfQQ{6ue>CHr})+TS2P_`>$wf-10^G#G<$^B*#~F5=j1-v#5l53iNE7nvvm)zSL3YcQq`!7K5@OdZe^d>PEMBXgo8Sj2hPG z6GL8=rq~emG*JoanRhRBKVUEv8@f^wU_LL_7_0j literal 0 HcmV?d00001 diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..ad4ca66a17637746a5c33e5a1cfc46e35754fac8 GIT binary patch literal 21792 zcmeHv2UwKH^Z&l}`q2ag8^(eim7pLN>;*+)iBgRs8VhO=>2e36#%@FrrA3LTNz^C? zMVis5!2-qxh=LspNE9QSl>5!zduQcPN%`ja|Jlb6yKmXq*?DJoXJ_X`h(HX&i9lw^ zR0pBYq1?SYcl~91gciU(Gc)eGgC0V8RtPn1%3U88z{|rC($!VJPa|~L5up{14#Oov zQz1N+j2tm!9Da(yXCZU--ZcXu>CKTt22SuNN@KlNyG}GYm1E;K!?)^yTPKTIXI70F z^-UgWI-;%qFFH>@xo^Dm*I#wEMnyGESP^15Y*rhCJl^uU$&5n}va?G{O1zYr(Iwf}wglz$%P9+ZT$XV*-Ln6V zJ59|HF@_vyId}UqME4|UyB2nKQHa!a=)Y|+A{;4Q-S%=69x?X5t{u9B=x~7k&z{q>evb`!UXIQN-_%iw8 z9Odkvo%bbmujyp)N5?-motgMJGPpe8uIrlgq?i(GbmX$4m$%Cr;^1(DIYSWfqh93> zqtux>eqIK)hc2~UoL(I5u>0_$!gZ52Jesv|?!6IL61Et08`Cmr$4uM1?6CAhi7{To z$0ZabPS1(!*NZoJ3*SuM)6*lvqOj_Rn1UxufAK3lYMI?4bN-=)MFE)$bDv!ZanBkn zJUXU2Cxt4ru6S6G-EXE>TF)Yf{xPl>oJ+i(4+^%v=q>%iIwr{ZbfToyk=emDEsE~X zO7tAv*G*O%y23!`n%|s7cMK*iz4ap7>4;Z)`N4o6;+1J*g7Z#1$n^+(eEE|9ow6Q< z%CaU?nlIY>lLsjk^!(9sx*cja`qJ66`?CyczbL%up8rK^aiQXQO8A5P6@wM@?IAt} z%Lo5n=v+W=vrd^8?;GHxo9ulm&3JtUJGVks<50Y>N^YN9`t^gv;U2fmb55R2PubJ= z`mAQR){N7$Lpw&D%F6bi)U5Nds$P2!&dJKx3o86-zRtES^Ib-YTy7Sm>`VQ0k#!1H zn&q(gme;MTeTuX5OKO(gNRK`caVnzQm}1{9kRY)=GYq(zV-~> z;YNFw#LSMLar>8<<~v(mO}bDyywm3EpLNVGxV$I!(6r#2p(n2$ij2xGSn2LkR$~(# zn|Wuu?%uR(%bOhvU+~L!vDLOyPu2QP`|5b<&{CecYt8+O`=2~&clFjDePFxYs9`^J zUgKZ6WA}@qj9k;QiXN3uO3&yW4PVGpese26wVQb>UY_HcdC#6YUi0_rlu`MOWAKct zzKYVL1K6^awksd5D+xYo$r~SdxqWPWf=x5!s1b8DQ(+s#U-h@Nlp{eZNf zy&-2ty~s{G-uk@NgX0fad#4d*C+?V@J3|;}6h4~ld?o8}+O4bMvF4&kS4X4J8pH9~ zEfiiJ&mS4*pNiZZerxO87Hj)wpW=c|lnx{TGfAL{*xYoo^Nz*?sO( zof*4#7g^oQn6=N!Gh&LR;5(O&UvctqaMt<$D&U};%DXk?A%%>v6eDEfZ4HEwNv`8}d1=5T>S`Pdn=PIm!R_ z$1VFqPWY7Uxos({nU(4GE`^mYqr6Xn&^j z!BU~`9wxe*j-qqs!{p?gH9?uB3y1z0CrU5cVnLiut@X{H_T&qX=X*W6wV3U2E_0m0 z{jJZ&Rm5dW!tFA`d(S@?G;8i=okQ0hCu9UJ>d=2yMf2VVhWr?HtTr!i&H$;mU(5KS z&i8jXCAI(?tNZjDo~|*A{VEyT;L5xQFCr(+Nd4hUpI@uiD3%XQW-@~|Tg)qXc-*Mq zLhln(%Z!JQ3oOn!d2Le3h0O8ugnir!(lU~1-J@5ZTAa9GA-=gXr|PQ3*xW72D5l`c z0g~jD88cCdbRCTMWQ$iF8XsI9QnhkH)o1H>=DmntJ6+*E9Dhj^v~g}(e`Vav zIltY0zOwW{LB#V%R>iqq<&O_snXsx=d49SgbGqO2(FHld!&9Hktcfa5$STZlR@^e+ zyB>bI_8n_m7u7}=Ou5G1zS~xgMn!FQKKJ=PLfoc}$L)m63$NNZOue0G;h|GHDR$dm+;%%GkOve&$!h`kTr16TKXlBO~Yjt>?@3Y@1be&q5IsyzkE! zPg{8{{N_;8#a>qIqIn*QbkEcCV9M(~C`~6cDM}uD$y1K#31CxSK5NjMUX93a#b#!s z5y3Z)Ki^u8b`gBOfgL)QW$wc_L$eWijMkMecq(W?^5v#-d3S;~iwPdI9ICO6)fz?e zAJmc?%J9^Atxc-V1jC*ejU4JQWbdFa!a6`1HrMD20N!zHOG>2@HKI`ljXKb%1OE#h zP$(4Wui%A3K>{cZfEK~w1*K4Y6i9d}zqTe3Q9czOlVrmE&@Kh-fR9uWK%oFA>UQfxr(&Fp|qd zNx3|X=J=Bnb@8cfUHgFhBz~tQLZJf@Iz9lQj6Mkc0=GILq(2NHm*WUIWpKGOlqVn4 zj!8!mnwWx+!)}CZ*MZ;K73i1(a3TPl>sxjC<{y@2QA5CESrXTqpwnbexM53Wm|-(z zph0V;xq!7{`9s|GB|DTllJAuIlATIjNg@E}^%{K!wUiX zO_hM%P^}#sl>#6@RX(|Za{>{062pt3H&YomZaf-6kn3y zD=|_olo-62CvK`RqrhuV+B%8ZNF|@$SSbX)SSBEcriKO3)aWHNHA+TP_MrgzD>$xX zDAzb1voM*@tiC5;p&#t}XHd5mml@hA8dQi6B7<{}zb9$k zhjaWgU&^Ds(gkcZ^kE&y0rZG+*~;~hd*|QdAIl#nN~6Efm->>r4}@tq;QdblyRk~h zZmr=nVRs1Bv?Is2HeIZb?yF}XCx7P{t|q0K$MM{S`=AEa0}WL7;1SnXmiG7o>p#q8 z&+#{)i6M(PS}Z=bY;gLE{A0egCG|7aIH5?Q<8U?qFc&A(GSswPoSwb>mfGmld;cHg zU)?U|A18q>pIP?|<{?-=wkUZ_@MVMy+i}lc8}l%rpaKJ+KLjvYu=ssE|9`;z^S1uf zCx4E$mw!o`r52u2t#7J6tg9f%_Jg^{G^`1KEe++1qyItvp`QfFQcls_T)Gx?C?3{= zo|8DPS)oP`j7ZwDFDqQpn-v7wutK>tD_jf+(SpVBj;uE;40IFep7-I_ud2D+`c>Eb z-%$Chz7wp|VFES|#_;MgA-l7dBKyu%^Dn|WfXm%k!Q3z%W91^Lp)yEnr1S&$zXt*E ze38TuYzLM7Q1kcm&y4|^8tN2&gA=0}tU8I+*4e&e< zaCx4%$1B_7&OZ)tW=YFsy zo@2X|B)d;g^Z#-U#(G2(gb*NjA1HW-0V9zl2-co2Nz}-{9{aNa@_)I0>G9dMU;}M} zy}+g#0lVr6Nw(?7@vpJtK3@Ksd3VGs`@cT>`6Kw(*u*OPfh5U}BRJWE9nFiag+0y* z2-Y&@I9Bx$S0L4(Ab&5Qu4=8uG+_Vo-G|GcR?XFTO?qXoY3#Pwb4;1D^%#1(%6=2D zYs!VZ1hAneZNg7MUH;PA3;l$D2z{XZKgNCrx_F;}h1$Xr@1X^-9|jv5_F)^IkVH2J z{7l0?2!g`8{4v1ege3?Gi~}C%|IYWz-!+btH;ebMU^-#hV*61ATn6C%Er!O%=IgNZ z{7h_na^p5m$4eLHoV>hh~w1fl;4 z-fQtyd#?zEkllo3|0hXw8o||h#XrbVSD&Q18hjkOG$E;OP_MHCU%E%bzxMeb%OCFz z1rTG2=WHxPo>&dI7U&R!PBW4)?M{=nU-D?5OqDN%-3WW%h)N+ZPDxV^Uvs>_w3$^M zZb!4@W{3^Wd43$u;yTmC2B#N@o0Q@8*GFPd{VxAlcALR}5|YPI6C?0Go}ng3!QOl; z_DQG#W%unY?Ue%i!yMpOHOC8h>tW6WpAg>n(r()k;zQpx`*zjXQ}N%G5qJbijcWp= z##I2ESAQr1;awl8;fwhaqZbP#1{Jf!O`m}O!4mvelf>Y&;DqByXj)&Z!g;Lf) zENRk0xwJ`3Wq6Zj%Ebm|%8o*|5B47#sQiZsNP_8 zaNdtXzV&35<*EFXEGwX){En{^dSe3)*T{wfoB@#?5ZVNYXp7JWpw|L|eg`5n_7LD8 zLXMDQsIIl-4#^1F??q@JjPovwVSTa(I-oDAgk`Q?H@G=leP5M@fd4#57Sdn@L)}-9 zAxuvuUnY{v)-`(z&EZ-L)|$#?EA+6>Tqg4Y_yIz&|C|Eqnzp z8s*Zc1An6qyuOw-mK!DTu@V5g3z-4z0EDUpg*sB>I#;K?^RLfkYJ2H@@hzH|8XOM; z@2cz|ZSgeq#?1Un-yPeE*lv8UJnM=fdHdd~&`{IJuq8cLhlV!C&qC!J+lI=wb`6zp z>GCF`I?3hh;hg}mxBbE1*4Wx$!>i+J-sY)}q4CZw2v(S5hfuaLLTW#;T|C61WC4DI z7?jsA=>$TflMu364SRXDPmqhN&=7sQ{kmc}-o7_PthXwpNps~$ss2X%Oeze!q$}C3 z+TY=rl2_OT_YMH|Dy5<&xdOiBSO`mbV~_WC`x?IL%lojxCEjojF@Pt6enHP@A2|Pt zsAj0oH&SGu>95dl7ERiR@tD91LSDR*&nzz{2!qzI`-K|PzAYHMK z!S|{pi1x6rd^eHAC$>EBQQ*Bj_!_=_{f)n~eUj)gk@JtK{AX&v8Fvm&5?viIkKl8t zB#_uh!^7X(zQt!6x^L#3Pl(4X&IMm1oNx4=qIs^ZeF8e74R8*)tS_p<7q~q2DB&clOi<@JYrtqR*8M9WWc`+R17WqtD3Jkyr9M-?E zy5tY*&!D>M5JYPmSOd>ve=_*N(_Vk)ZTZ8Yi*O)MyrDig3w3|{N@Dl|*56f_7LQ&8 z=cf_X5K9?Bk=9dR0k6MfmhzZ@^W6Ho`VW$5hJSW~zo{y{-M%i%Gg@ubN?9M4<9Jr^ zOWRBJw{Z4H=tW)ecBRhSP{p(ESKA-UuM7CLE-XvD&;Ag1LnyI6SRk~k2SPtc5mK*X zgA*VQIQv~-n`{jDj(e?zDQ$?l%PM_Pm*3GpxlBeZlgo8J1WROcK4*VMnumtQ^aS(> z4UK*U`Bn?L{F_!>ytJRn-z$^JU)r9y?%VSJhps=^{>J+Wb<95AW7G?H7yEV9`;RKv zf7FHgWA^`{{{CBX{ePw%&3O3#HOGDu_|I%qzsYO#le~YQu5H}br#}=Tc$N%*K|fZq znol4hN%V4p*uyO}^%=z1k6+7B<5n}&XR9C<-=BcERzh_ijwxzlZM0HXt!E4(bex2e z%sJwwB{~Gsr>JS$MEIPRje|4t1f`I-9f0%i;kE{AT+Dx(WL*QSRqNc>bS+f!m&0slvGhrKsM`N2}6T~}Wy9Xy6L!dnYeepzr zBz1<^_yO<_^xdN|eXxz-=QR}6#jyuCW{D5+2YA0^K2*ql4}EZo;;6pt1GdRD`$HTX zgv%rg^2huG{fC@<++#TUX1%!jn(xTjg9MToK}^669PoU2ejd%jrznTN>e zmM-Wg3Gx@I^nHUEH?}nGyc}Pk z{~GaFpG zV|(!U>M~l$?uPN@7LTtrI>nXW{rj5!KjP$r=Lfu>G41m9+XT_3ACHk2aQ+@kC+<1T z7ZIe%1c6TKIDt;`c!5p|;IJAg69l?P=8E-8K|gPD`subs1pL#KTO3U~EK`AUTmio^ z_@2)!DT3b{z#QtL=ADam)FY_&GSE%X&zi-Mt5F*QWr*Xc4UjfbdWZ}$eHbQKKcm4X z76oxk_%~Jf_dIbBXS1zVz-)n7zR8<4bCIThcz@J}V#a(zG4_DbZ-O1&o@U0GQG9#w zVUPsbb3CjmYjJ!yO^pdx{l1H|52eU{vk8RfRMi*;9l_HXm$a>GjrTd5KqIIBN{$C5 z4!$x2{Fm)EU=Jsio`D_2#~0?b(9J=>+fERztOQKRRhVb4@tLK!0JkBQGKC5`Ok_0$YZ0DrX=;c+p9#$sFkcrzY#RP;r6y_W z3-ApKMzX+F$fiR5;R0sKEugH9+f=R31fTi(HbaeFs;P|UIVj_GUW)JaWF)>*gzQlq zHz{DSPgBPfOaY)Fg`*L$XQ*Z%T{FxFLcIU0BXcNm6a+BNZY z{aMkf86y2Vu%`a*Ce%3xcHW=PLiP|%cD4szp}r8S0wk^>s^F~7hHtw5sK_I3SvMJ(*k$AS%&NzlenDI=4?{qZs&8V+N&-0fbq4vZC;=aZ>OTH$Am)Oh++xAE za{`y4VF->UBqSmrNSADa?}7aNUVXo1_ra_I`;F5TEIWu-1iPV^3x2mH#SHcZpRSk0 zupEBR3%S7`{!&w=yO5Vk6CFn3I9?tde2GVgv$f97};1{7vw=p2(ndgMi5}h z2o_i|LSL}s=UXwNfc6yp- + + + + {% if fic.completed %} Finished {% else %} {% if fic.failure %} Failed {% else %} Working... {% endif %} {% endif %} - Fanfiction Downloader + + + {% if not fic.completed and not fic.failure %} + + {% endif %} + + + +
    +

    + FanFiction Downloader +

    +
    + + +
    + +
    + {% if fic.url %} +
    +

    + {% if fic.completed %} +

    Your fic has finished processing and you can download it now.

    + Download {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) + {% endif %} + {% if fic.failure %} + {{ fic.failure }} + {% endif %} + {% if not fic.completed and not fic.failure %} +

    Not done yet. This page will periodically poll to see if your story has finished.

    + Processing {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) + {% endif %} + Source + {% if fic.completed and escaped_url %} + Convert + {% endif %} +

    +
    + {% endif %} +

    See your personal list of previously downloaded fanfics.

    +

    Please Help Test New Version

    +

    + We have a new, more efficient, version of the system up + for testing. Please try the + Testing + Version here. +

    +
    +
    + Powered by Google App Engine +

    + FanfictionLoader is a web front-end to fanficdownloader
    + Copyright © Fanficdownloader team +
    + +
    + + +
    +
    + + diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 00000000..4c48b5ac --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/utils/remover.py b/utils/remover.py new file mode 100644 index 00000000..23e4b9bc --- /dev/null +++ b/utils/remover.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# encoding: utf-8 +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +remover.py + +Created by Roman on 2010-06-20. +Copyright 2011 Fanficdownloader team +""" + +import datetime +import logging + +from google.appengine.ext.webapp import util +from google.appengine.ext import webapp +from google.appengine.api import users +from google.appengine.api import taskqueue +from google.appengine.api import memcache + +from ffstorage import * + +class Remover(webapp.RequestHandler): + def get(self): + logging.debug("Starting r3m0v3r") + user = users.get_current_user() + logging.debug("Working as user %s" % user) + theDate = datetime.datetime.now() - datetime.timedelta(days=3) + logging.debug("Will delete stuff older than %s" % theDate) + + fics = DownloadMeta.all() + fics.filter("date <",theDate).order("date") + + results = fics.fetch(100) + logging.debug([x.name for x in results]) + + num=0 + for d in results: + d.delete() + for c in d.data_chunks: + c.delete() + num += 1 + logging.debug('Delete '+d.url) + + logging.info('Deleted instances: %d' % num) + self.response.out.write('Deleted instances: %d
    ' % num) + +class RemoveOrphanDataChunks(webapp.RequestHandler): + + def get(self): + logging.debug("Starting RemoveOrphanDataChunks") + user = users.get_current_user() + logging.debug("Working as user %s" % user) + + ## Can't search for all chunks in web req because it's too + ## long. Can't do it in a queue task, because it's still too + ## long. Can't try ordering by id or download because the ids + ## are not increasing. Instead, use a saved cursor to walk + ## all the way through over time, then starting at the top + ## again when finished. + + chunks = DownloadData.all() + + cursor = memcache.get('orphan_search_cursor') + if cursor: + chunks.with_cursor(cursor) + + deleted = 0 + num = 0 + step = 100 + results = chunks.fetch(step) + for d in results: + ## This is the only way to test for orphans I could find. + try: + meta = d.download + except db.ReferencePropertyResolveError: + ## delete orphan chunk. + d.delete() + deleted += 1 + num += 1 + if num < step: + memcache.delete('orphan_search_cursor') + logging.warn('Orphan search reached end, starting over next time.') + else: + memcache.set('orphan_search_cursor',chunks.cursor()) + + logging.info('Deleted %d orphan chunks from %d total.' % (deleted,num)) + self.response.out.write('Deleted %d orphan chunks from %d total.' % (deleted,num)) + +def main(): + application = webapp.WSGIApplication([('/r3m0v3r', Remover), + ('/r3m0v3rOrphans', RemoveOrphanDataChunks)], + debug=False) + util.run_wsgi_app(application) + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.DEBUG) + main() diff --git a/utils/tally.py b/utils/tally.py new file mode 100644 index 00000000..7c1b6f0f --- /dev/null +++ b/utils/tally.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# encoding: utf-8 +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime +import logging + +from google.appengine.ext.webapp import util +from google.appengine.ext import webapp +from google.appengine.api import users +from google.appengine.api import taskqueue +from google.appengine.api import memcache + +from ffstorage import * + +class Tally(webapp.RequestHandler): + def get(self): + logging.debug("Starting Tally") + user = users.get_current_user() + logging.debug("Working as user %s" % user) + + fics = DownloadMeta.all() + + cursor = memcache.get('tally_search_cursor') + if cursor: + fics.with_cursor(cursor) + + self.response.out.write('"user","url","name","title","author","format","failure","completed","date","version"
    ') + num = 0 + step = 500 + results = fics.fetch(step) + for d in results: + self.response.out.write('"%s","%s","%s","%s","%s","%s","%s","%s","%s","%s"
    ' % + (d.user,d.url,d.name,d.title,d.author, + d.format,d.failure,d.completed,d.date, + d.version)) + num += 1 + if num < step: + memcache.delete('tally_search_cursor') + logging.warn('Tally search reached end, starting over next time.') + else: + memcache.set('tally_search_cursor',fics.cursor()) + + logging.info('Tallied %d fics.' % num) + self.response.out.write('
    Tallied %d fics.
    ' % num) + +def main(): + application = webapp.WSGIApplication([('/tally', Tally), + ], + debug=False) + util.run_wsgi_app(application) + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.DEBUG) + main() From 9b98cb8cce1928c7f1c85e09db9836bc33d24533 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 21 Nov 2011 14:07:49 -0600 Subject: [PATCH 225/482] Warning that AppEngine Python2.7 has a bug fetching large reqs. --- index.html | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/index.html b/index.html index a47a44ef..94a4b2a3 100644 --- a/index.html +++ b/index.html @@ -64,6 +64,13 @@ multithreading to try and reduce our usage. Google considers Python 2.7 Experimental still, so there may be issues.

    +

    + One issue that has already cropped up is an AppEngine/Python 2.7 bug with + fetching large results from datastore. For very large + stories (~>270k words), you'll need to use + the Previous + Version until this bug is fixed. +

    If you have any problems with this application, please report them in From f9f7c6ea2a108d862f2188f94359e2947dc3a79f Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 14 Dec 2011 11:08:26 -0600 Subject: [PATCH 226/482] Twisting the Hellmouth epubs are .html, not .xhtml. Bump stored version to 4.1. --- epubmerge.py | 2 +- fanficdownloader/story.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/epubmerge.py b/epubmerge.py index a9c51933..99e25238 100644 --- a/epubmerge.py +++ b/epubmerge.py @@ -216,7 +216,7 @@ def doMerge(outputio,files,authoropts=[],titleopt=None,descopt=None, try: outputepub.writestr(href, epub.read(relpath+item.getAttribute("href"))) - if re.match(r'.*/(file|chapter)\d+\.xhtml',href): + if re.match(r'.*/(file|chapter)\d+\.x?html',href): filecount+=1 items.append((id,href,item.getAttribute("media-type"))) filelist.append(href) diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index ca48cde7..68b24be1 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -25,7 +25,7 @@ class Story: try: self.metadata = {'version':os.environ['CURRENT_VERSION_ID']} except: - self.metadata = {'version':'4.0'} + self.metadata = {'version':'4.1'} self.chapters = [] # chapters will be tuples of (title,html) self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists. From 6e41e5d59abfb9531aafd9e29fa9270a006c2f83 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 14 Dec 2011 11:54:28 -0600 Subject: [PATCH 227/482] Add background_color for html/epub, change notice, bump to 4-1-1. --- app.yaml | 2 +- defaults.ini | 4 ++++ fanficdownloader/writers/writer_epub.py | 9 ++++++--- fanficdownloader/writers/writer_html.py | 4 ++++ index.html | 17 ++++++++--------- 5 files changed, 23 insertions(+), 13 deletions(-) diff --git a/app.yaml b/app.yaml index fc1d1ebc..852b0641 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanfictiondownloader application: fanfictiondownloader -version: 4-1-0 +version: 4-1-1 runtime: python27 api_version: 1 threadsafe: true diff --git a/defaults.ini b/defaults.ini index d591fb04..713caffa 100644 --- a/defaults.ini +++ b/defaults.ini @@ -113,6 +113,10 @@ extratags: FanFiction ## Primarily for commandline. #slow_down_sleep_time:0.5 +## output background color--only used by html and epub (and ignored in +## epub by many readers). Must be hex code, # will be added. +background_color: ffffff + ## Each output format has a section that overrides [defaults] [html] diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index ec9b11dc..7b3fac2f 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -41,7 +41,10 @@ class EpubWriter(BaseStoryWriter): def __init__(self, config, story): BaseStoryWriter.__init__(self, config, story) - self.EPUB_CSS='''body { margin-left: 2%; margin-right: 2%; margin-top: 2%; margin-bottom: 2%; text-align: justify; } + self.EPUB_CSS = string.Template(''' +body { margin: 2%; + text-align: justify; + background-color: #${background_color}; } pre { font-size: x-small; } sml { font-size: small; } h1 { text-align: center; } @@ -63,7 +66,7 @@ h6 { text-align: center; } .smcap {font-variant: small-caps;} .u {text-decoration: underline;} .bold {font-weight: bold;} -''' +''') self.EPUB_TITLE_PAGE_START = string.Template(''' @@ -376,7 +379,7 @@ h6 { text-align: center; } del tocncxdom # write stylesheet.css file. - outputepub.writestr("OEBPS/stylesheet.css",self.EPUB_CSS) + outputepub.writestr("OEBPS/stylesheet.css",self.EPUB_CSS.substitute({"background_color":self.getConfig("background_color")})) # write title page. if self.getConfig("titlepage_use_table"): diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py index 32d27d62..cec520a1 100644 --- a/fanficdownloader/writers/writer_html.py +++ b/fanficdownloader/writers/writer_html.py @@ -39,6 +39,7 @@ class HTMLWriter(BaseStoryWriter): ${title} by ${author} @@ -95,9 +82,6 @@ body { background-color: #${background_color}; } def writeStoryImpl(self, out): - # minor cheat, tucking bg into metadata. - if self.getConfig("background_color"): - self.story.metadata["background_color"] = self.getConfig("background_color") self._write(out,self.HTML_FILE_START.substitute(self.story.metadata)) self.writeTitlePage(out, diff --git a/fanficdownloader/writers/writer_mobi.py b/fanficdownloader/writers/writer_mobi.py index bb141c65..d6ced534 100644 --- a/fanficdownloader/writers/writer_mobi.py +++ b/fanficdownloader/writers/writer_mobi.py @@ -41,7 +41,6 @@ class MobiWriter(BaseStoryWriter): ${title} by ${author} -

    ${title} by ${author}

    @@ -64,7 +63,6 @@ class MobiWriter(BaseStoryWriter): ${title} by ${author} -

    ${title} by ${author}

    @@ -91,7 +89,6 @@ class MobiWriter(BaseStoryWriter): ${title} by ${author} -
    @@ -113,7 +110,6 @@ class MobiWriter(BaseStoryWriter): ${chapter} -

    ${chapter}

    diff --git a/index.html b/index.html index a498875f..d96a0ac5 100644 --- a/index.html +++ b/index.html @@ -54,34 +54,15 @@ much easier.

    -

    Support for 'Series'

    +

    Support for Custom CSS

    - We now collect 'Series' name and number for the sites: - harrypotterfanfiction.com, - potionsandsnitches.net, - adastrafanfic.com, - whofic.com, - fanfiction.tenhawkpresents.com, - castlefans.org, - tthfanfic.org, - www.siye.co.uk, - twilighted.net*, - twilighted.net* and - thewriterscoffeeshop.com*. -

    -

    - * The last three use series as reading lists and stories collections as much as true story series, - so they default to not collect series info. You can turn it on in your User Configuration if you want. -

    -

    New Site: archiveofourown.org

    -

    - Thanks to Ida Leter for writing the code to support a new site: archiveofourown.org. + The CSS included in the HTML and EPUB output formats is now a customizable parameter.

    If you have any problems with this application, please report them in the FanFictionDownLoader Google Group. The - Previous Version is also available for you to use if necessary. + Previous Version is also available for you to use if necessary.

    {{ error_message }} diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 3cc6ab14..251ce3b5 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -114,6 +114,37 @@ background_color: ffffff ## Each output format has a section that overrides [defaults] [html] +## output background color--only used by html and epub (and ignored in +## epub by many readers). Included below in output_css--will be +## ignored if not in output_css. +background_color: ffffff + +## Allow customization of CSS. Make sure to keep at least one space +## at the start of each line and to escape % to %%. Also need +## background_color to be in the same section, if included in CSS. +output_css: + body { background-color: #%(background_color)s; } + .CI { + text-align:center; + margin-top:0px; + margin-bottom:0px; + padding:0px; + } + .center {text-align: center;} + .cover {text-align: center;} + .full {width: 100%%; } + .quarter {width: 25%%; } + .smcap {font-variant: small-caps;} + .u {text-decoration: underline;} + .bold {font-weight: bold;} + +[txt] +## Add URLs since there aren't links. +titlepage_entries: series,category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description + +## use \r\n for line endings, the windows convention. text output only. +windows_eol: true + [txt] ## Add URLs since there aren't links. titlepage_entries: series,category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description @@ -133,6 +164,40 @@ titlepage_use_table: false ## When using tables, make these span both columns. wide_titlepage_entries: description, storyUrl, author URL +## output background color--only used by html and epub (and ignored in +## epub by many readers). Included below in output_css--will be +## ignored if not in output_css. +background_color: ffffff + +## Allow customization of CSS. Make sure to keep at least one space +## at the start of each line and to escape % to %%. Also need +## background_color to be in the same section, if included in CSS. +output_css: + body { background-color: #%(background_color)s; + text-align: justify; + margin: 2%%; } + pre { font-size: x-small; } + sml { font-size: small; } + h1 { text-align: center; } + h2 { text-align: center; } + h3 { text-align: center; } + h4 { text-align: center; } + h5 { text-align: center; } + h6 { text-align: center; } + .CI { + text-align:center; + margin-top:0px; + margin-bottom:0px; + padding:0px; + } + .center {text-align: center;} + .cover {text-align: center;} + .full {width: 100%%; } + .quarter {width: 25%%; } + .smcap {font-variant: small-caps;} + .u {text-decoration: underline;} + .bold {font-weight: bold;} + [mobi] ## mobi TOC cannot be turned off right now. #include_tocpage: true From dac306d0ba929bd0e6a1d96c19cf8f9d38b5ff98 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sat, 11 Feb 2012 13:40:41 -0600 Subject: [PATCH 338/482] replace_metadata feature--allow regexp replacement of story metadata from ini. --- calibre-plugin/ffdl_plugin.py | 30 +++++++++++----------- defaults.ini | 13 ++++++++++ downloader.py | 6 ++--- fanficdownloader/adapters/adapter_test1.py | 3 ++- fanficdownloader/story.py | 26 ++++++++++++++++--- fanficdownloader/writers/base_writer.py | 3 +++ plugin-defaults.ini | 14 ++++++++++ 7 files changed, 72 insertions(+), 23 deletions(-) diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index fddf86e5..d1254216 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -318,13 +318,11 @@ class FanFictionDownLoaderPlugin(InterfaceAction): self._update_existing_2, init_label="Collecting stories for update...", win_title="Get stories for updates", - status_prefix="URL retrieved") - + status_prefix="URL retrieved") #books = self._convert_calibre_ids_to_books(db, book_ids) #print("update books:%s"%books) - ## XXX split here. def _update_existing_2(self,book_list): d = UpdateExistingDialog(self.gui, @@ -370,10 +368,11 @@ class FanFictionDownLoaderPlugin(InterfaceAction): self.gui.status_bar.show_message(_('Started fetching metadata for %s stories.'%len(books)), 3000) - LoopProgressDialog(self.gui, - books, - partial(self.get_metadata_for_book, options = options), - partial(self.start_download_list, options = options)) + if 0 < len(filter(lambda x : x['good'], books)): + LoopProgressDialog(self.gui, + books, + partial(self.get_metadata_for_book, options = options), + partial(self.start_download_list, options = options)) # LoopProgressDialog calls get_metadata_for_book for each 'good' story, # get_metadata_for_book updates book for each, # LoopProgressDialog calls start_download_list at the end which goes @@ -689,14 +688,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction): total_good = len(good_list) self.gui.status_bar.show_message(_('Adding/Updating %s books.'%total_good)) - - LoopProgressDialog(self.gui, - good_list, - partial(self._update_book, options=options, db=self.gui.current_db), - partial(self._update_books_completed, options=options), - init_label="Updating calibre for stories...", - win_title="Update calibre for stories", - status_prefix="Updated") + + if total_good > 0: + LoopProgressDialog(self.gui, + good_list, + partial(self._update_book, options=options, db=self.gui.current_db), + partial(self._update_books_completed, options=options), + init_label="Updating calibre for stories...", + win_title="Update calibre for stories", + status_prefix="Updated") def _add_or_update_book(self,book,options,prefs,mi=None): db = self.gui.current_db diff --git a/defaults.ini b/defaults.ini index a8323935..86d583c4 100644 --- a/defaults.ini +++ b/defaults.ini @@ -133,6 +133,19 @@ extratags: FanFiction ## values are available, plus output_filename. #post_process_cmd: addbook -f "${output_filename}" -t "${title}" +## Use regular expressions to find and replace (or remove) metadata. +## For example, you could change Sci-Fi=>SF, remove *-Centered tags, +## etc. See http://docs.python.org/library/re.html (look for re.sub) +## for regexp details. +## Make sure to keep at least one space at the start of each line and +## to escape % to %%, if used. +#replace_metadata: +# Sci-Fi=>SF +# Puella Magi Madoka Magica.* => Madoka +# Comedy=>Humor +# Crossover: (.*)=>\1 +# (.*)Great(.*)=>\1Moderate\2 +# .*-Centered=> ## Each output format has a section that overrides [defaults] [html] diff --git a/downloader.py b/downloader.py index 4bfcf792..70262c49 100644 --- a/downloader.py +++ b/downloader.py @@ -25,7 +25,6 @@ from StringIO import StringIO from optparse import OptionParser import getpass import string -import time from subprocess import call @@ -215,6 +214,7 @@ def main(): print us if __name__ == "__main__": - start = time.time() + #import time + #start = time.time() main() - print("Total time seconds:%f"%(time.time()-start)) + #print("Total time seconds:%f"%(time.time()-start)) diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 86e5aa6c..9ed33178 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -104,7 +104,8 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" self.story.addToList('category','Harry Potter') self.story.addToList('category','Furbie') self.story.addToList('category','Crossover') - + self.story.addToList('category',u'Puella Magi Madoka Magica/魔法少女まどか★マギカ') + self.story.addToList('category',u'Magical Girl Lyrical Nanoha') self.story.addToList('genre','Fantasy') self.story.addToList('genre','SF') self.story.addToList('genre','Noir') diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index b936cb20..81336b59 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -15,7 +15,7 @@ # limitations under the License. # -import os +import os, re from htmlcleanup import conditionalRemoveEntities, removeAllEntities @@ -26,6 +26,7 @@ class Story: self.metadata = {'version':os.environ['CURRENT_VERSION_ID']} except: self.metadata = {'version':'4.3'} + self.replacements = [] self.chapters = [] # chapters will be tuples of (title,html) self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists. @@ -36,6 +37,12 @@ class Story: def getMetadataRaw(self,key): if self.metadata.has_key(key): return self.metadata[key] + + def doReplacments(self,value): + for (p,v) in self.replacements: + if (isinstance(value,str) or isinstance(value,unicode)) and re.match(p,value): + value = re.sub(p,v,value) + return value; def getMetadata(self, key, removeallentities=False): value = None @@ -50,7 +57,8 @@ class Story: value = value.strftime("%Y-%m-%d %H:%M:%S") if key == "datePublished" or key == "dateUpdated": value = value.strftime("%Y-%m-%d") - + + value=self.doReplacments(value) if removeallentities and value != None: return removeAllEntities(value) else: @@ -81,10 +89,14 @@ class Story: def getList(self,listname): if not self.listables.has_key(listname): return [] - return self.listables[listname] + return filter( lambda x : x!=None and x!='' , + map(self.doReplacments,self.listables[listname]) ) def getLists(self): - return self.listables + lsts = {} + for ln in self.listables.keys(): + lsts[ln] = self.getList(ln) + return lsts def addChapter(self, title, html): self.chapters.append( (title,html) ) @@ -96,6 +108,12 @@ class Story: def __str__(self): return "Metadata: " +str(self.metadata) + "\nListables: " +str(self.listables) #+ "\nChapters: "+str(self.chapters) + def setReplace(self,replace): + for line in replace.splitlines(): + if "=>" in line: + print("line:%s"%line) + self.replacements.append(map( lambda x: x.strip(), line.split("=>") )) + def commaGroups(s): groups = [] while s and s[-1].isdigit(): diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index 121f943d..de1514bd 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -46,6 +46,9 @@ class BaseStoryWriter(Configurable): self.adapter = adapter self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially. + + self.story.setReplace(self.getConfig('replace_metadata')) + self.validEntries = [ 'category', 'genre', diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 251ce3b5..3152ed6e 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -111,6 +111,20 @@ extratags: FanFiction ## epub by many readers). Must be hex code, # will be added. background_color: ffffff +## Use regular expressions to find and replace (or remove) metadata. +## For example, you could change Sci-Fi=>SF, remove *-Centered tags, +## etc. See http://docs.python.org/library/re.html (look for re.sub) +## for regexp details. +## Make sure to keep at least one space at the start of each line and +## to escape % to %%, if used. +#replace_metadata: +# Sci-Fi=>SF +# Puella Magi Madoka Magica.* => Madoka +# Comedy=>Humor +# Crossover: (.*)=>\1 +# (.*)Great(.*)=>\1Moderate\2 +# .*-Centered=> + ## Each output format has a section that overrides [defaults] [html] From d001799372c0c6227435a3a9dc959a6f0db3e279 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 12 Feb 2012 15:21:55 -0600 Subject: [PATCH 339/482] Plugin-Make ini edit courier font & 1pt larger than default. --- calibre-plugin/__init__.py | 2 +- calibre-plugin/config.py | 17 ++++++++++++----- index.html | 8 ++++++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 96593896..8a770951 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 4, 0) + version = (1, 4, 1) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py index 4ec189e8..14693b61 100644 --- a/calibre-plugin/config.py +++ b/calibre-plugin/config.py @@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en' import traceback, copy -from PyQt4.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, +from PyQt4.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QFont, QTextEdit, QComboBox, QCheckBox, QPushButton, QTabWidget, QVariant) from calibre.gui2 import dynamic, info_dialog @@ -58,10 +58,7 @@ copylist = ['personal.ini', 'updatedefault', 'fileform', 'collision', - 'deleteotherforms', - 'addtolists', - 'addtoreadlists', - 'addtolistsonread'] + 'deleteotherforms'] # fake out so I don't have to change the prefs calls anywhere. The # Java programmer in me is offended by op-overloading, but it's very @@ -293,6 +290,11 @@ class PersonalIniTab(QWidget): self.l.addWidget(self.label) self.ini = QTextEdit(self) + try: + self.ini.setFont(QFont("Courier", + self.plugin_action.gui.font().pointSize()+1)); + except Exception as e: + print("Couldn't get font: %s"%e) self.ini.setLineWrapMode(QTextEdit.NoWrap) self.ini.setText(prefs['personal.ini']) self.l.addWidget(self.ini) @@ -324,6 +326,11 @@ class ShowDefaultsIniDialog(QDialog): self.ini = QTextEdit(self) self.ini.setToolTip("These are all of the plugin's configurable options\nand their default settings.") + try: + self.ini.setFont(QFont("Courier", + get_gui().font().pointSize()+1)); + except Exception as e: + print("Couldn't get font: %s"%e) self.ini.setLineWrapMode(QTextEdit.NoWrap) self.ini.setText(text) self.ini.setReadOnly(True) diff --git a/index.html b/index.html index d96a0ac5..28a3b515 100644 --- a/index.html +++ b/index.html @@ -58,6 +58,14 @@

    The CSS included in the HTML and EPUB output formats is now a customizable parameter.

    +

    Support for Custom Replacement of Metadata

    +

    + There's now a customizable parameter to include a list of regular expressions to replace metadata as you see fit. +

    +

    + Examples of how to use both new features can be found in the + plugin forum. +

    If you have any problems with this application, please report them in From 5acf21119ace2571e16f8b8047a0c1cb2c159eb1 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 12 Feb 2012 15:22:14 -0600 Subject: [PATCH 340/482] Added tag calibre-plugin-1.4.1 for changeset c5c2166ebbc4 From af65982c9a10e0e68f1a62ae57736789daa4c285 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 12 Feb 2012 15:23:16 -0600 Subject: [PATCH 341/482] Added tag FanFictionDownLoader-4.3.2 for changeset c5c2166ebbc4 From ed004e8637dd24ded882c8c555faf6c6597ea554 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 13 Feb 2012 19:39:09 -0600 Subject: [PATCH 342/482] Fix entity removal to recoginize hex correctly. --- fanficdownloader/htmlcleanup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/htmlcleanup.py b/fanficdownloader/htmlcleanup.py index 7e91f190..d9e2d848 100644 --- a/fanficdownloader/htmlcleanup.py +++ b/fanficdownloader/htmlcleanup.py @@ -27,7 +27,7 @@ def _unirepl(match): return unichr(value) def _replaceNumberEntities(data): - p = re.compile(r'&#(x?)(\d+);') + p = re.compile(r'&#(x?)([0-9a-fA-F]+);') return p.sub(_unirepl, data) def _replaceNotEntities(data): From 50c1cd2d1de7eb9076988c03779baa304e4c5ff6 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 13 Feb 2012 19:40:11 -0600 Subject: [PATCH 343/482] Bump plugin version. --- calibre-plugin/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 8a770951..36ea376b 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 4, 1) + version = (1, 4, 2) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code From 994b5aa6762e967281325cf3d4ff580f21b1ec0f Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 13 Feb 2012 19:40:22 -0600 Subject: [PATCH 344/482] Added tag calibre-plugin-1.4.2 for changeset 1e41ecdceb9c From 986c7181a2605248997ffe4decc9117e769b7f08 Mon Sep 17 00:00:00 2001 From: ia6eia Date: Tue, 14 Feb 2012 08:17:45 +0000 Subject: [PATCH 345/482] v1 of adapter for ficbook.net --- .../adapters/adapter_ficbooknet.py | 206 ++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 fanficdownloader/adapters/adapter_ficbooknet.py diff --git a/fanficdownloader/adapters/adapter_ficbooknet.py b/fanficdownloader/adapters/adapter_ficbooknet.py new file mode 100644 index 00000000..61fd8852 --- /dev/null +++ b/fanficdownloader/adapters/adapter_ficbooknet.py @@ -0,0 +1,206 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import datetime +import logging +import re +import urllib2 +from .. import translit + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + + +def getClass(): + return FicBookNetAdapter + + +class FicBookNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["utf8", + "Windows-1252"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/readfic/'+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','fbn') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %m %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.ficbook.net' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/readfic/12345" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/readfic/")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + url=self.url + logging.debug("URL: "+url) + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # Now go hunting for all the meta data and the chapter list. + + table = soup.find('td',{'width':'50%'}) + + ## Title + a = soup.find('h1') + self.story.setMetadata('title',a.string) + logging.debug("Title: (%s)"%self.story.getMetadata('title')) + + # Find authorid and URL from... author url. + a = table.find('a') + self.story.setMetadata('authorId',a.text) # Author's name is unique + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.text) + logging.debug("Author: (%s)"%self.story.getMetadata('author')) + + # Find the chapters: + chapters = soup.find('div', {'class' : 'part_list'}) + if chapters != None: + chapters=chapters.findAll('a', href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+"/\d+#part_content$")) + self.story.setMetadata('numChapters',len(chapters)) + for x in range(0,len(chapters)): + chapter=chapters[x] + churl='http://'+self.host+chapter['href'] + self.chapterUrls.append((stripHTML(chapter),churl)) + if x == 0: + pubdate = translit.translit(stripHTML(bs.BeautifulSoup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span'))) + if x == len(chapters)-1: + update = translit.translit(stripHTML(bs.BeautifulSoup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span'))) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + self.story.setMetadata('numChapters',1) + pubdate=translit.translit(stripHTML(soup.find('div', {'class' : 'part_added'}).find('span'))) + update=pubdate + + logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) + + if not ',' in pubdate: + pubdate=datetime.date.today().strftime(self.dateformat) + if not ',' in update: + update=datetime.date.today().strftime(self.dateformat) + pubdate=pubdate.split(',')[0] + update=update.split(',')[0] + + fullmon = {"yanvarya":"01", "fievralya":"02", "marta":"03", "aprielya":"04", "maya":"05", + "iyunya":"06","iyulya":"07", "avghusta":"08", "sentyabrya":"09", "oktyabrya":"10", + "noyabrya":"11", "diekabrya":"12" } + for (name,num) in fullmon.items(): + if name in pubdate: + pubdate = pubdate.replace(name,num) + if name in update: + update = update.replace(name,num) + + self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat)) + self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat)) + + pr=soup.find('a', href=re.compile(r'/printfic/\w+')) + pr='http://'+self.host+pr['href'] + pr = bs.BeautifulSoup(self._fetchUrl(pr)) + pr=pr.findAll('div', {'class' : 'part_text'}) + i=0 + for part in pr: + i=i+len(stripHTML(part).split(' ')) + self.story.setMetadata('numWords', str(i)) + + + fandoms = table.find('a', href=re.compile(r'/fanfiction/\w+')) + self.story.addToList('category',fandoms.string) + + meta=table.findAll('a', href=re.compile(r'/ratings/')) + i=0 + for m in meta: + if i == 0: + self.story.setMetadata('rating', m.find('b').text) + i=1 + elif i == 1: + if not "," in m.nextSibling: + i=2 + self.story.addToList('genre', m.find('b').text) + elif i == 2: + self.story.addToList('warnings', m.find('b').text) + + + if table.find('span', {'style' : 'color: green'}): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In Progress') + + + tags = table.findAll('b') + for tag in tags: + label = translit.translit(tag.text) + if 'Piersonazhi:' in label: + chars=tag.nextSibling.string.split(', ') + for char in chars: + self.story.addToList('characters',char) + break + + summary=soup.find('span', {'class' : 'urlize'}) + self.story.setMetadata('description', summary.text) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + soup = soup.find('div', {'class' : 'public_beta'}) + + + if None == soup: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(soup) From b69ede76bbb3a714058a7bd687594d7fcf3dba79 Mon Sep 17 00:00:00 2001 From: ia6eia Date: Tue, 14 Feb 2012 08:20:24 +0000 Subject: [PATCH 346/482] Something to assist with non-latin (cyrillic) websites. --- fanficdownloader/translit.py | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 fanficdownloader/translit.py diff --git a/fanficdownloader/translit.py b/fanficdownloader/translit.py new file mode 100644 index 00000000..bf205a6d --- /dev/null +++ b/fanficdownloader/translit.py @@ -0,0 +1,57 @@ +#-*-coding:utf-8-*- +# Code taken from http://python.su/forum/viewtopic.php?pid=66946 +import unicodedata +def is_syllable(letter): + syllables = ("A", "E", "I", "O", "U", "a", "e", "i", "o", "u") + if letter in syllables: + return True + return False +def is_consonant(letter): + return not is_syllable(letter) +def romanize(letter): + try: + str(letter) + except UnicodeEncodeError: + pass + else: + return str(letter) + unid = unicodedata.name(letter) + exceptions = {"NUMERO SIGN": "No", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK": "\"", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK": "\"", "DASH": "-"} + for name_contains in exceptions: + if unid.find(name_contains)!=-1: + return exceptions[name_contains] + assert(unid.startswith("CYRILLIC"))# Not ready to romanize anything but cyrillics + transformation_pairs = {"CYRILLIC CAPITAL LETTER ": str.capitalize, "CYRILLIC SMALL LETTER ": str.lower} + func = str.lower + for name_contains in transformation_pairs: + if unid.find(name_contains)!=-1: + func = transformation_pairs[name_contains] + unid = unid.replace(name_contains, "") + cyrillic_exceptions = {"YERU": "y", "SHORT I": "y", "HARD SIGN": "\'", "SOFT SIGN": "\'", "BYELORUSSIAN-UKRAINIAN I": "i", "GHE WITH UPTURN": "g", "UKRAINIAN IE": "ie", "YU": "yu", "YA": "ya"} + for name_contains in cyrillic_exceptions: + if unid.find(name_contains)!=-1: + return cyrillic_exceptions[name_contains] + if all(map(is_syllable, unid)): + return func(unid) + else: + return func(filter(is_consonant, unid)) +def translit(text): + output = "" + for letter in text: + output += romanize(letter) + return output +#def main(): + #text = u"русск.: Любя, съешь щипцы, — вздохнёт мэр, — кайф жгуч." + #print translit(text) + #text = u"укр.: Гей, хлопці, не вспію - на ґанку ваша файна їжа знищується бурундучком." + #print translit(text) + #text = u"болг.: Ах, чудна българска земьо, полюшквай цъфтящи жита." + #print translit(text) + #text = u"серб.: Неуредне ноћне даме досађивале су Џеку К." + #print translit(text) + #russk.: Lyubya, s'iesh' shchiptsy, - vzdohniot mer, - kayf zhghuch. + #ukr.: Ghiey, hloptsi, nie vspiyu - na ganku vasha fayna yzha znishchuiet'sya burunduchkom. + #bolgh.: Ah, chudna b'lgharska ziem'o, polyushkvay ts'ftyashchi zhita. + #sierb.: Nieuriednie notshnie damie dosadjivalie su Dzhieku K. +if __name__=="__main__": + main() \ No newline at end of file From 7cf76b7a472e5f672a6774a1a7e57d9a6b28cac9 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 14 Feb 2012 22:31:13 -0600 Subject: [PATCH 347/482] Fix Characters parsing in ffnet, add Language metadata (ffnet only right now). --- calibre-plugin/config.py | 2 + calibre-plugin/ffdl_plugin.py | 52 ++++--------------- defaults.ini | 5 +- .../adapters/adapter_fanfictionnet.py | 13 +++-- fanficdownloader/adapters/adapter_test1.py | 12 ++++- fanficdownloader/story.py | 50 +++++++++++++++++- fanficdownloader/writers/base_writer.py | 2 + fanficdownloader/writers/writer_epub.py | 7 ++- plugin-defaults.ini | 5 +- 9 files changed, 94 insertions(+), 54 deletions(-) diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py index 14693b61..8e3d4dc6 100644 --- a/calibre-plugin/config.py +++ b/calibre-plugin/config.py @@ -445,6 +445,7 @@ permitted_values = { 'series' : ['series'], 'enumeration' : ['category', 'genre', + 'language', 'series', 'characters', 'status', @@ -477,6 +478,7 @@ permitted_values['comments'] = permitted_values['enumeration'] titleLabels = { 'category':'Category', 'genre':'Genre', + 'language':'Language', 'status':'Status', 'status-C':'Status:Completed', 'status-I':'Status:In-Progress', diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index d1254216..19003405 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -52,9 +52,6 @@ formmapping = { PLUGIN_ICONS = ['images/icon.png'] -sendlists = ["Send to Nook", "Send to Kindle", "Send to Droid", "Add to Nook", "Add to Kindle", "Add to Droid"] -readlists = ["000"] - class FanFictionDownLoaderPlugin(InterfaceAction): name = 'FanFictionDownLoader' @@ -742,11 +739,16 @@ class FanFictionDownLoaderPlugin(InterfaceAction): if len(filter( lambda x : not x.startswith("Last Update"), mi.tags)) > 0: old_tags = filter( lambda x : not x.startswith("Last Update"), old_tags) # mi.tags needs to be list, but set kills dups. - mi.tags = list(set(list(old_tags)+mi.tags)) - # Set language english, but only if not already set. - oldmi = db.get_metadata(book_id,index_is_id=True) - if not oldmi.languages: - mi.languages=['eng'] + mi.tags = list(set(list(old_tags)+mi.tags)) + + if 'langcode' in book['all_metadata']: + mi.languages=[book['all_metadata']['langcode']] + else: + # Set language english, but only if not already set. + oldmi = db.get_metadata(book_id,index_is_id=True) + if not oldmi.languages: + mi.languages=['eng'] + db.set_metadata(book_id,mi) # do configured column updates here. @@ -971,40 +973,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction): except: return None; - - -def get_job_details(job): - ''' - Convert the job result into a set of parameters including a detail message - summarising the success of the extraction operation. - This is used by both the threaded and worker approaches to extraction - ''' - extracted_ids, same_isbn_ids, failed_ids, no_format_ids = job.result - if not hasattr(job, 'html_details'): - job.html_details = job.details - det_msg = [] - for i, title in failed_ids: - if i in no_format_ids: - msg = title + ' (No formats)' - else: - msg = title + ' (ISBN not found)' - det_msg.append(msg) - if same_isbn_ids: - if det_msg: - det_msg.append('----------------------------------') - for i, title in same_isbn_ids: - msg = title + ' (Same ISBN)' - det_msg.append(msg) - if len(extracted_ids) > 0: - if det_msg: - det_msg.append('----------------------------------') - for i, title, last_modified, isbn in extracted_ids: - msg = '%s (Extracted %s)'%(title, isbn) - det_msg.append(msg) - - det_msg = '\n'.join(det_msg) - return extracted_ids, same_isbn_ids, failed_ids, det_msg - def get_url_list(urls): def f(x): if x.strip(): return True diff --git a/defaults.ini b/defaults.ini index 86d583c4..5b623f97 100644 --- a/defaults.ini +++ b/defaults.ini @@ -36,6 +36,7 @@ formatext_label:File Extension ## Sometimes there are multiple categories and/or genres. category_label:Category genre_label:Genre +language_label:Language characters_label:Characters series_label:Series ## Completed/In-Progress @@ -67,7 +68,7 @@ version_label:FFDL Version ## items to include in the title page ## Empty entries will *not* appear, even if in the list. ## All current formats already include title and author. -titlepage_entries: series,category,genre,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description +titlepage_entries: series,category,genre,language,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description ## Try to collect series name and number of this story in series. ## Some sites (ab)use 'series' for reading lists and personal @@ -176,7 +177,7 @@ output_css: [txt] ## Add URLs since there aren't links. -titlepage_entries: series,category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description +titlepage_entries: series,category,genre,language,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description ## use \r\n for line endings, the windows convention. text output only. windows_eol: true diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index 29d65a1c..aa50d420 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -201,7 +201,6 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): metatext = a.findNext(text=re.compile(r' - Reviews:')) if metatext == None: # indicates there's no Reviews, look for id: instead. metatext = a.findNext(text=re.compile(r' - id:')) - #print("========= metatext:\n%s"%metatext) # after Rating, the same bit of text containing id:123456 contains # Complete--if completed. @@ -215,7 +214,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): # # (fp) # - m = re.match(r"^(?:Chapter \d+ of a|A) (?:.*?) (?:- (?P.*?) )?(?:crossover )?(?:fan)?fiction(?:[ ]+with characters (?P.*?\.?)(?: & (?P.*?\.?))?\. )?", + # + m = re.match(r"^(?:Chapter \d+ of a|A) (?:.*?) (?:- (?P.*?) )?(?:crossover )?(?:fan)?fiction(?P[ ]+with characters)?", soup.find('meta',{'name':'description'})['content']) if m != None: genres=m.group('genres') @@ -225,7 +225,8 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): for g in genres.split('/'): self.story.addToList('genre',g) - if m.group('char1') != None: + if m.group('chars') != None: + # At this point we've proven that there's character(s) # We can't reliably parse characters out of meta name="description". # There's no way to tell that "with characters Ichigo K. & Neliel T. O./Nel. " ends at "Nel.", not "T." @@ -233,12 +234,16 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): # reviewstext can take form of: # - English - Shinji H. - Updated: 01-13-12 - Published: 12-20-11 - id:7654123 # - English - Adventure/Angst - Ichigo K. & Neliel T. O./Nel - Reviews: + # - English - Humor/Adventure - Harry P. & Ironhide - Reviews: mc = re.match(r" - (?P[^ ]+ - )(?P[^ ]+ - )? (?P.+?) - (Reviews|Updated|Published)", metatext) chars = mc.group("chars") for c in chars.split(' & '): self.story.addToList('characters',c) - + m = re.match(r" - (?P[^ ]+)",metatext) + if m.group('lang') != None: + self.story.setMetadata('language',m.group('lang')) + return def getChapterText(self, url): diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 9ed33178..4fbd6021 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -90,6 +90,16 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" self.story.setMetadata('status','In-Progress') else: self.story.setMetadata('status','Completed') + + langs = { + 0:"English", + 1:"Russian", + 2:"French", + 3:"German", + } + if idnum < 10: + self.story.setMetadata('language',langs[idnum%len(langs)]) + # greater than 10, no language. self.setSeries('The Great Test',idnum) @@ -117,7 +127,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" ('Chapter 4',self.url+"&chapter=5"), ('Chapter 5',self.url+"&chapter=6"), ('Chapter 6',self.url+"&chapter=6"), - # ('Chapter 7',self.url+"&chapter=6"), + ('Chapter 7',self.url+"&chapter=6"), # ('Chapter 8',self.url+"&chapter=6"), # ('Chapter 9',self.url+"&chapter=6"), # ('Chapter 0',self.url+"&chapter=6"), diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index 81336b59..ba3def4a 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -19,6 +19,50 @@ import os, re from htmlcleanup import conditionalRemoveEntities, removeAllEntities +# The list comes from ffnet, the only multi-language site we support +# at the time of writing. Values are taken largely from pycountry, +# but with some corrections and guesses. +langs = { + "English":"en", + "Spanish":"es", + "French":"fr", + "German":"de", + "Chinese":"zh", + "Japanese":"ja", + "Dutch":"nl", + "Portuguese":"pt", + "Russian":"ru", + "Italian":"it", + "Bulgarian":"bg", + "Polish":"pl", + "Hungarian":"hu", + "Hebrew":"he", + "Arabic":"ar", + "Swedish":"sv", + "Norwegian":"no", + "Danish":"da", + "Finnish":"fi", + "Filipino":"fil", + "Esperanto":"eo", + "Hindi":"hi", + "Punjabi":"pa", + "Farsi":"fa", + "Greek":"el", + "Romanian":"ro", + "Albanian":"sq", + "Serbian":"sr", + "Turkish":"tr", + "Czech":"cs", + "Indonesian":"id", + "Croatian":"hr", + "Catalan":"ca", + "Latin":"la", + "Korean":"ko", + "Vietnamese":"vi", + "Thai":"th", + "Devanagari":"hi", + } + class Story: def __init__(self): @@ -33,6 +77,11 @@ class Story: def setMetadata(self, key, value): ## still keeps < < and & self.metadata[key]=conditionalRemoveEntities(value) + if key == "language": + try: + self.metadata['langcode'] = langs[self.metadata[key]] + except: + self.metadata['langcode'] = 'en' def getMetadataRaw(self,key): if self.metadata.has_key(key): @@ -111,7 +160,6 @@ class Story: def setReplace(self,replace): for line in replace.splitlines(): if "=>" in line: - print("line:%s"%line) self.replacements.append(map( lambda x: x.strip(), line.split("=>") )) def commaGroups(s): diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py index de1514bd..84a6f5c5 100644 --- a/fanficdownloader/writers/base_writer.py +++ b/fanficdownloader/writers/base_writer.py @@ -52,6 +52,7 @@ class BaseStoryWriter(Configurable): self.validEntries = [ 'category', 'genre', + 'language', 'characters', 'series', 'status', @@ -80,6 +81,7 @@ class BaseStoryWriter(Configurable): self.titleLabels = { 'category':'Category', 'genre':'Genre', + 'language':'Language', 'status':'Status', 'series':'Series', 'characters':'Characters', diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index acd0dcda..e423556d 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -203,7 +203,10 @@ class EpubWriter(BaseStoryWriter): metadata.appendChild(newTag(contentdom,"dc:contributor",text="fanficdownloader [http://fanficdownloader.googlecode.com]",attrs={"opf:role":"bkp"})) metadata.appendChild(newTag(contentdom,"dc:rights",text="")) - metadata.appendChild(newTag(contentdom,"dc:language",text="en")) + if self.story.getMetadata('langcode') != None: + metadata.appendChild(newTag(contentdom,"dc:language",text=self.story.getMetadata('langcode'))) + else: + metadata.appendChild(newTag(contentdom,"dc:language",text='en')) # published, created, updated, calibre # Leave calling self.story.getMetadataRaw directly in case date format changes. @@ -399,4 +402,4 @@ def newTag(dom,name,attrs=None,text=None): if( text is not None ): tag.appendChild(dom.createTextNode(text)) return tag - + diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 3152ed6e..55eed582 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -41,6 +41,7 @@ formatext_label:File Extension ## Sometimes there are multiple categories and/or genres. category_label:Category genre_label:Genre +language_label:Language characters_label:Characters series_label:Series ## Completed/In-Progress @@ -72,7 +73,7 @@ version_label:FFDL Version ## items to include in the title page ## Empty entries will *not* appear, even if in the list. ## All current formats already include title and author. -titlepage_entries: series,category,genre,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description +titlepage_entries: series,category,genre,language,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description ## Try to collect series name and number of this story in series. ## Some sites (ab)use 'series' for reading lists and personal @@ -154,7 +155,7 @@ output_css: [txt] ## Add URLs since there aren't links. -titlepage_entries: series,category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description +titlepage_entries: series,category,genre,language,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description ## use \r\n for line endings, the windows convention. text output only. windows_eol: true From baa037254cddf1b0ac545834a41d97b0301807ac Mon Sep 17 00:00:00 2001 From: ia6eia Date: Wed, 15 Feb 2012 04:52:27 +0000 Subject: [PATCH 348/482] Fix for translit in Calibre, also, added language metadata. --- .../adapters/adapter_ficbooknet.py | 426 +++++++++--------- 1 file changed, 220 insertions(+), 206 deletions(-) diff --git a/fanficdownloader/adapters/adapter_ficbooknet.py b/fanficdownloader/adapters/adapter_ficbooknet.py index 61fd8852..1e95d0f5 100644 --- a/fanficdownloader/adapters/adapter_ficbooknet.py +++ b/fanficdownloader/adapters/adapter_ficbooknet.py @@ -1,206 +1,220 @@ -# -*- coding: utf-8 -*- - -# Copyright 2011 Fanficdownloader team -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import time -import datetime -import logging -import re -import urllib2 -from .. import translit - -from .. import BeautifulSoup as bs -from ..htmlcleanup import stripHTML -from .. import exceptions as exceptions - -from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate - - -def getClass(): - return FicBookNetAdapter - - -class FicBookNetAdapter(BaseSiteAdapter): - - def __init__(self, config, url): - BaseSiteAdapter.__init__(self, config, url) - - self.decode = ["utf8", - "Windows-1252"] # 1252 is a superset of iso-8859-1. - # Most sites that claim to be - # iso-8859-1 (and some that claim to be - # utf8) are really windows-1252. - self.username = "NoneGiven" # if left empty, site doesn't return any message at all. - self.password = "" - self.is_adult=False - - # get storyId from url--url validation guarantees query is only sid=1234 - self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) - logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) - - # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/readfic/'+self.story.getMetadata('storyId')) - - # Each adapter needs to have a unique site abbreviation. - self.story.setMetadata('siteabbrev','fbn') - - # The date format will vary from site to site. - # http://docs.python.org/library/datetime.html#strftime-strptime-behavior - self.dateformat = "%d %m %Y" - - @staticmethod # must be @staticmethod, don't remove it. - def getSiteDomain(): - # The site domain. Does have www here, if it uses it. - return 'www.ficbook.net' - - def getSiteExampleURLs(self): - return "http://"+self.getSiteDomain()+"/readfic/12345" - - def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/readfic/")+r"\d+$" - - ## Getting the chapter list and the meta data, plus 'is adult' checking. - def extractChapterUrlsAndMetadata(self): - url=self.url - logging.debug("URL: "+url) - try: - data = self._fetchUrl(url) - except urllib2.HTTPError, e: - if e.code == 404: - raise exceptions.StoryDoesNotExist(self.url) - else: - raise e - - - # use BeautifulSoup HTML parser to make everything easier to find. - soup = bs.BeautifulSoup(data) - - # Now go hunting for all the meta data and the chapter list. - - table = soup.find('td',{'width':'50%'}) - - ## Title - a = soup.find('h1') - self.story.setMetadata('title',a.string) - logging.debug("Title: (%s)"%self.story.getMetadata('title')) - - # Find authorid and URL from... author url. - a = table.find('a') - self.story.setMetadata('authorId',a.text) # Author's name is unique - self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) - self.story.setMetadata('author',a.text) - logging.debug("Author: (%s)"%self.story.getMetadata('author')) - - # Find the chapters: - chapters = soup.find('div', {'class' : 'part_list'}) - if chapters != None: - chapters=chapters.findAll('a', href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+"/\d+#part_content$")) - self.story.setMetadata('numChapters',len(chapters)) - for x in range(0,len(chapters)): - chapter=chapters[x] - churl='http://'+self.host+chapter['href'] - self.chapterUrls.append((stripHTML(chapter),churl)) - if x == 0: - pubdate = translit.translit(stripHTML(bs.BeautifulSoup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span'))) - if x == len(chapters)-1: - update = translit.translit(stripHTML(bs.BeautifulSoup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span'))) - else: - self.chapterUrls.append((self.story.getMetadata('title'),url)) - self.story.setMetadata('numChapters',1) - pubdate=translit.translit(stripHTML(soup.find('div', {'class' : 'part_added'}).find('span'))) - update=pubdate - - logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) - - if not ',' in pubdate: - pubdate=datetime.date.today().strftime(self.dateformat) - if not ',' in update: - update=datetime.date.today().strftime(self.dateformat) - pubdate=pubdate.split(',')[0] - update=update.split(',')[0] - - fullmon = {"yanvarya":"01", "fievralya":"02", "marta":"03", "aprielya":"04", "maya":"05", - "iyunya":"06","iyulya":"07", "avghusta":"08", "sentyabrya":"09", "oktyabrya":"10", - "noyabrya":"11", "diekabrya":"12" } - for (name,num) in fullmon.items(): - if name in pubdate: - pubdate = pubdate.replace(name,num) - if name in update: - update = update.replace(name,num) - - self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat)) - self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat)) - - pr=soup.find('a', href=re.compile(r'/printfic/\w+')) - pr='http://'+self.host+pr['href'] - pr = bs.BeautifulSoup(self._fetchUrl(pr)) - pr=pr.findAll('div', {'class' : 'part_text'}) - i=0 - for part in pr: - i=i+len(stripHTML(part).split(' ')) - self.story.setMetadata('numWords', str(i)) - - - fandoms = table.find('a', href=re.compile(r'/fanfiction/\w+')) - self.story.addToList('category',fandoms.string) - - meta=table.findAll('a', href=re.compile(r'/ratings/')) - i=0 - for m in meta: - if i == 0: - self.story.setMetadata('rating', m.find('b').text) - i=1 - elif i == 1: - if not "," in m.nextSibling: - i=2 - self.story.addToList('genre', m.find('b').text) - elif i == 2: - self.story.addToList('warnings', m.find('b').text) - - - if table.find('span', {'style' : 'color: green'}): - self.story.setMetadata('status', 'Completed') - else: - self.story.setMetadata('status', 'In Progress') - - - tags = table.findAll('b') - for tag in tags: - label = translit.translit(tag.text) - if 'Piersonazhi:' in label: - chars=tag.nextSibling.string.split(', ') - for char in chars: - self.story.addToList('characters',char) - break - - summary=soup.find('span', {'class' : 'urlize'}) - self.story.setMetadata('description', summary.text) - - # grab the text for an individual chapter. - def getChapterText(self, url): - - logging.debug('Getting chapter text from: %s' % url) - - soup = bs.BeautifulStoneSoup(self._fetchUrl(url), - selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. - - soup = soup.find('div', {'class' : 'public_beta'}) - - - if None == soup: - raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) - - return utf8FromSoup(soup) +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import datetime +import logging +import re +import urllib2 +from .. import translit + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + + +def getClass(): + return FicBookNetAdapter + + +class FicBookNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["utf8", + "Windows-1252"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/readfic/'+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','fbn') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %m %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.ficbook.net' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/readfic/12345" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/readfic/")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + url=self.url + logging.debug("URL: "+url) + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # Now go hunting for all the meta data and the chapter list. + + table = soup.find('td',{'width':'50%'}) + + ## Title + a = soup.find('h1') + self.story.setMetadata('title',a.string) + logging.debug("Title: (%s)"%self.story.getMetadata('title')) + + # Find authorid and URL from... author url. + a = table.find('a') + self.story.setMetadata('authorId',a.text) # Author's name is unique + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.text) + logging.debug("Author: (%s)"%self.story.getMetadata('author')) + + # Find the chapters: + chapters = soup.find('div', {'class' : 'part_list'}) + if chapters != None: + chapters=chapters.findAll('a', href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+"/\d+#part_content$")) + self.story.setMetadata('numChapters',len(chapters)) + for x in range(0,len(chapters)): + chapter=chapters[x] + churl='http://'+self.host+chapter['href'] + self.chapterUrls.append((stripHTML(chapter),churl)) + if x == 0: + pubdate = translit.translit(stripHTML(bs.BeautifulSoup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span'))) + if x == len(chapters)-1: + update = translit.translit(stripHTML(bs.BeautifulSoup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span'))) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + self.story.setMetadata('numChapters',1) + pubdate=translit.translit(stripHTML(soup.find('div', {'class' : 'part_added'}).find('span'))) + update=pubdate + + logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) + + if not ',' in pubdate: + pubdate=datetime.date.today().strftime(self.dateformat) + if not ',' in update: + update=datetime.date.today().strftime(self.dateformat) + pubdate=pubdate.split(',')[0] + update=update.split(',')[0] + + fullmon = {"yanvarya":"01", "января":"01", + "fievralya":"02", "февраля":"02", + "marta":"03", "марта":"03", + "aprielya":"04", "апреля":"04", + "maya":"05", "мая":"05", + "iyunya":"06", "июня":"06", + "iyulya":"07", "июля":"07", + "avghusta":"08", "августа":"08", + "sentyabrya":"09", "сентября":"09", + "oktyabrya":"10", "октября":"10", + "noyabrya":"11", "ноября":"11", + "diekabrya":"12", "декабря":"12" } + for (name,num) in fullmon.items(): + if name in pubdate: + pubdate = pubdate.replace(name,num) + if name in update: + update = update.replace(name,num) + + self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat)) + self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat)) + self.story.setMetadata('language','Russian') + + pr=soup.find('a', href=re.compile(r'/printfic/\w+')) + pr='http://'+self.host+pr['href'] + pr = bs.BeautifulSoup(self._fetchUrl(pr)) + pr=pr.findAll('div', {'class' : 'part_text'}) + i=0 + for part in pr: + i=i+len(stripHTML(part).split(' ')) + self.story.setMetadata('numWords', str(i)) + + i=0 + fandoms = table.findAll('a', href=re.compile(r'/fanfiction/\w+')) + for fandom in fandoms: + self.story.addToList('category',fandom.string) + i=i+1 + if i > 0: + self.story.addToList('genre', 'Кроссовер') + + meta=table.findAll('a', href=re.compile(r'/ratings/')) + i=0 + for m in meta: + if i == 0: + self.story.setMetadata('rating', m.find('b').text) + i=1 + elif i == 1: + if not "," in m.nextSibling: + i=2 + self.story.addToList('genre', m.find('b').text) + elif i == 2: + self.story.addToList('warnings', m.find('b').text) + + + if table.find('span', {'style' : 'color: green'}): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In Progress') + + + tags = table.findAll('b') + for tag in tags: + label = translit.translit(tag.text) + if 'Piersonazhi:' in label or 'Персонажи:' in label: + chars=tag.nextSibling.string.split(', ') + for char in chars: + self.story.addToList('characters',char) + break + + summary=soup.find('span', {'class' : 'urlize'}) + self.story.setMetadata('description', summary.text) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + soup = soup.find('div', {'class' : 'public_beta'}) + + + if None == soup: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(soup) From 56157546be2e88f991f6da81ae5e87f20914f538 Mon Sep 17 00:00:00 2001 From: ia6eia Date: Wed, 15 Feb 2012 08:42:10 +0000 Subject: [PATCH 349/482] Fix for stories that do not allow Public Beta. --- fanficdownloader/adapters/adapter_ficbooknet.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fanficdownloader/adapters/adapter_ficbooknet.py b/fanficdownloader/adapters/adapter_ficbooknet.py index 1e95d0f5..852a8def 100644 --- a/fanficdownloader/adapters/adapter_ficbooknet.py +++ b/fanficdownloader/adapters/adapter_ficbooknet.py @@ -168,7 +168,7 @@ class FicBookNetAdapter(BaseSiteAdapter): for fandom in fandoms: self.story.addToList('category',fandom.string) i=i+1 - if i > 0: + if i > 1: self.story.addToList('genre', 'Кроссовер') meta=table.findAll('a', href=re.compile(r'/ratings/')) @@ -211,10 +211,11 @@ class FicBookNetAdapter(BaseSiteAdapter): soup = bs.BeautifulStoneSoup(self._fetchUrl(url), selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. - soup = soup.find('div', {'class' : 'public_beta'}) - + chapter = soup.find('div', {'class' : 'public_beta'}) + if chapter == None: + chapter = soup.find('div', {'class' : 'public_beta_disabled'}) - if None == soup: + if None == chapter: raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) - return utf8FromSoup(soup) + return utf8FromSoup(chapter) \ No newline at end of file From 6134ba7e7976f9eb00dfe8a2e22fc824487e77ea Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 15 Feb 2012 09:20:36 -0600 Subject: [PATCH 350/482] Turn on ficbook.net and allow chapter URLs in fbn. --- calibre-plugin/__init__.py | 2 +- fanficdownloader/adapters/__init__.py | 1 + .../adapters/adapter_ficbooknet.py | 4 ++-- index.html | 24 +++++++++++++++---- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 36ea376b..b4442ae1 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 4, 2) + version = (1, 4, 3) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py index abb77ab4..6b841870 100644 --- a/fanficdownloader/adapters/__init__.py +++ b/fanficdownloader/adapters/__init__.py @@ -44,6 +44,7 @@ import adapter_twiwritenet import adapter_whoficcom import adapter_siyecouk import adapter_archiveofourownorg +import adapter_ficbooknet ## This bit of complexity allows adapters to be added by just adding ## importing. It eliminates the long if/else clauses we used to need diff --git a/fanficdownloader/adapters/adapter_ficbooknet.py b/fanficdownloader/adapters/adapter_ficbooknet.py index 852a8def..c9a98bc7 100644 --- a/fanficdownloader/adapters/adapter_ficbooknet.py +++ b/fanficdownloader/adapters/adapter_ficbooknet.py @@ -70,7 +70,7 @@ class FicBookNetAdapter(BaseSiteAdapter): return "http://"+self.getSiteDomain()+"/readfic/12345" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/readfic/")+r"\d+$" + return re.escape("http://"+self.getSiteDomain()+"/readfic/")+r"\d+" ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): @@ -218,4 +218,4 @@ class FicBookNetAdapter(BaseSiteAdapter): if None == chapter: raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) - return utf8FromSoup(chapter) \ No newline at end of file + return utf8FromSoup(chapter) diff --git a/index.html b/index.html index 28a3b515..735472a3 100644 --- a/index.html +++ b/index.html @@ -54,11 +54,18 @@ much easier.

    -

    Support for Custom CSS

    +

    New Russian Language Site ficbook.net

    +

    + Thanks to Ida Leter's hard work, we now support ficbook.net, a Russian language fanfiction site. +

    +

    Support for Language, Custom CSS and Replacement of Metadata

    +

    + There's now a 'Language' metadata field that can be filled, if the site supports different languages. Currently, it's only used + with fanfiction.net and ficbook.net. +

    The CSS included in the HTML and EPUB output formats is now a customizable parameter.

    -

    Support for Custom Replacement of Metadata

    There's now a customizable parameter to include a list of regular expressions to replace metadata as you see fit.

    @@ -240,9 +247,16 @@
    http://archiveofourown.org/works/76366/chapters/101584.
  • - - - A few additional things to know, which will make your life substantially easier: +
    ficbook.net(Russian)
    +
    + Use the URL of the story, or one of it's chapters, such as +
    http://ficbook.net/readfic/93626. +
    http://ficbook.net/readfic/93626/246417#part_content. +
    + +

    + A few additional things to know, which will make your life substantially easier: +

    1. First thing to know: I do not use your Google login and password. In fact, all I know about it is your ID – password From f17d837fc89dbfa8a28267470d08874657a38901 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 15 Feb 2012 09:20:54 -0600 Subject: [PATCH 351/482] Added tag calibre-plugin-1.4.3 for changeset 69dcc138d548 From 2d7790fc8374eb1d1e693af71319fdfc39507697 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 20 Feb 2012 17:29:53 -0600 Subject: [PATCH 352/482] Mildy better 'not found' check for ffnet, minor plugin tweak. --- calibre-plugin/config.py | 4 +++- fanficdownloader/adapters/adapter_fanfictionnet.py | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py index 8e3d4dc6..50cfb3cd 100644 --- a/calibre-plugin/config.py +++ b/calibre-plugin/config.py @@ -435,7 +435,9 @@ class OtherTab(QWidget): and dynamic[key] is False: dynamic[key] = True info_dialog(self, _('Done'), - _('Confirmation dialogs have all been reset'), show=True) + _('Confirmation dialogs have all been reset'), + show=True, + show_copy_button=False) permitted_values = { 'int' : ['numWords','numChapters'], diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index aa50d420..f307b409 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -83,8 +83,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): if "Unable to locate story with id of " in data: raise exceptions.StoryDoesNotExist(url) - - if "Chapter not found. Please check to see you are not using an outdated url." in data: + + # some times "Chapter not found...", sometimes "Chapter text not found..." + if "not found. Please check to see you are not using an outdated url." in data: raise exceptions.FailedToDownload("Error downloading Chapter: %s! 'Chapter not found. Please check to see you are not using an outdated url.'" % url) try: @@ -102,7 +103,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): chapcount+1) print('=Trying newer chapter: %s' % tryurl) newdata = self._fetchUrl(tryurl) - if "Chapter not found. Please check to see you are not using an outdated url." \ + if "not found. Please check to see you are not using an outdated url." \ not in newdata: print('=======Found newer chapter: %s' % tryurl) soup = bs.BeautifulSoup(newdata) From 1c50d7c918cb4583081e6b01f65ceea096f19fd1 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 21 Feb 2012 14:39:55 -0600 Subject: [PATCH 353/482] Add skip header kludge to fictionalleyorg, bump plugin to 1.4.4. --- calibre-plugin/__init__.py | 2 +- fanficdownloader/adapters/adapter_fictionalleyorg.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index b4442ae1..cfb5710b 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 4, 3) + version = (1, 4, 4) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/fanficdownloader/adapters/adapter_fictionalleyorg.py b/fanficdownloader/adapters/adapter_fictionalleyorg.py index 7eb9e37e..fba44110 100644 --- a/fanficdownloader/adapters/adapter_fictionalleyorg.py +++ b/fanficdownloader/adapters/adapter_fictionalleyorg.py @@ -203,6 +203,10 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): # our div with poor html inside the story text. data = data.replace('','').replace('','') + # problems with some stories confusing Soup. This is a nasty + # hack, but it works. + data = data[data.index(" Date: Tue, 21 Feb 2012 14:40:06 -0600 Subject: [PATCH 354/482] Added tag calibre-plugin-1.4.4 for changeset 86e8f489854b From f29844fe141bbb1562b9ee39558e6cfc8304e479 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 22 Feb 2012 10:39:05 -0600 Subject: [PATCH 355/482] fanfiction.net is added CR characters sometimes now. Bump plugin version. --- calibre-plugin/__init__.py | 2 +- fanficdownloader/adapters/adapter_fanfictionnet.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index cfb5710b..d2345878 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 4, 4) + version = (1, 4, 5) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index f307b409..73c8f635 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -74,6 +74,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): # use BeautifulSoup HTML parser to make everything easier to find. try: data = self._fetchUrl(url) + #print("\n===================\n%s\n===================\n"%data) soup = bs.BeautifulSoup(data) except urllib2.HTTPError, e: if e.code == 404: @@ -141,7 +142,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): continue if 'var storyid' in script.string: for line in script.string.split('\n'): - m = re.match(r"^ +var ([^ ]+) = '?(.*?)'?;$",line) + m = re.match(r"^ +var ([^ ]+) = '?(.*?)'?;\r?$",line) if m == None : continue var,value = m.groups() # remove javascript escaping from values. From 640c676cc5cde4a41c4f57975a336ba29d1d78a1 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 22 Feb 2012 10:42:21 -0600 Subject: [PATCH 356/482] Change web index.html message. --- index.html | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/index.html b/index.html index 735472a3..f43930d2 100644 --- a/index.html +++ b/index.html @@ -54,25 +54,14 @@ much easier.

      +

      fanfiction.net fixed

      +

      + fanfiction.net changed their formatting slightly, which broken the downloader for a while. It's fixed now. +

      New Russian Language Site ficbook.net

      Thanks to Ida Leter's hard work, we now support ficbook.net, a Russian language fanfiction site.

      -

      Support for Language, Custom CSS and Replacement of Metadata

      -

      - There's now a 'Language' metadata field that can be filled, if the site supports different languages. Currently, it's only used - with fanfiction.net and ficbook.net. -

      -

      - The CSS included in the HTML and EPUB output formats is now a customizable parameter. -

      -

      - There's now a customizable parameter to include a list of regular expressions to replace metadata as you see fit. -

      -

      - Examples of how to use both new features can be found in the - plugin forum. -

      If you have any problems with this application, please report them in From 71e33ed8eab44c724f56fa9e401ef633c06d7b57 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 22 Feb 2012 10:43:40 -0600 Subject: [PATCH 357/482] Added tag calibre-plugin-1.4.5 for changeset 0157333ee07b --- app.yaml | 46 + calibre-plugin/__init__.py | 90 + calibre-plugin/about.txt | 20 + calibre-plugin/common_utils.py | 447 ++ calibre-plugin/config.py | 557 +++ calibre-plugin/dcsource.py | 30 + calibre-plugin/dialogs.py | 646 +++ calibre-plugin/ffdl_plugin.py | 982 ++++ calibre-plugin/images/icon.png | Bin 0 -> 24649 bytes calibre-plugin/images/icon.xcf | Bin 0 -> 63927 bytes calibre-plugin/jobs.py | 188 + ...mport-name-fanfictiondownloader_plugin.txt | 0 cron.yaml | 10 + css/index.css | 73 + defaults.ini | 363 ++ delete_fic.py | 59 + downloader.py | 220 + editconfig.html | 89 + epubmerge.py | 389 ++ example.ini | 40 + fanficdownloader/BeautifulSoup.py | 2014 ++++++++ fanficdownloader/__init__.py | 1 + fanficdownloader/adapters/__init__.py | 98 + .../adapters/adapter_adastrafanficcom.py | 227 + .../adapters/adapter_archiveofourownorg.py | 267 + .../adapters/adapter_castlefansorg.py | 308 ++ .../adapters/adapter_fanfictionnet.py | 277 ++ .../adapters/adapter_ficbooknet.py | 221 + .../adapters/adapter_fictionalleyorg.py | 230 + .../adapters/adapter_fictionpresscom.py | 49 + .../adapters/adapter_ficwadcom.py | 216 + .../adapters/adapter_fimfictionnet.py | 175 + .../adapter_harrypotterfanfictioncom.py | 200 + .../adapters/adapter_mediaminerorg.py | 234 + .../adapters/adapter_potionsandsnitchesnet.py | 216 + fanficdownloader/adapters/adapter_siyecouk.py | 298 ++ .../adapters/adapter_tenhawkpresentscom.py | 245 + fanficdownloader/adapters/adapter_test1.py | 198 + .../adapter_thewriterscoffeeshopcom.py | 252 + .../adapters/adapter_tthfanficorg.py | 245 + .../adapters/adapter_twilightednet.py | 250 + .../adapters/adapter_twiwritenet.py | 262 + .../adapters/adapter_whoficcom.py | 231 + fanficdownloader/adapters/base_adapter.py | 282 ++ fanficdownloader/chardet/__init__.py | 26 + fanficdownloader/chardet/big5freq.py | 923 ++++ fanficdownloader/chardet/big5prober.py | 41 + fanficdownloader/chardet/chardistribution.py | 200 + .../chardet/charsetgroupprober.py | 96 + fanficdownloader/chardet/charsetprober.py | 60 + .../chardet/codingstatemachine.py | 56 + fanficdownloader/chardet/constants.py | 47 + fanficdownloader/chardet/escprober.py | 79 + fanficdownloader/chardet/escsm.py | 240 + fanficdownloader/chardet/eucjpprober.py | 85 + fanficdownloader/chardet/euckrfreq.py | 594 +++ fanficdownloader/chardet/euckrprober.py | 41 + fanficdownloader/chardet/euctwfreq.py | 426 ++ fanficdownloader/chardet/euctwprober.py | 41 + fanficdownloader/chardet/gb2312freq.py | 471 ++ fanficdownloader/chardet/gb2312prober.py | 41 + fanficdownloader/chardet/hebrewprober.py | 269 + fanficdownloader/chardet/jisfreq.py | 567 +++ fanficdownloader/chardet/jpcntx.py | 210 + .../chardet/langbulgarianmodel.py | 228 + fanficdownloader/chardet/langcyrillicmodel.py | 329 ++ fanficdownloader/chardet/langgreekmodel.py | 225 + fanficdownloader/chardet/langhebrewmodel.py | 201 + .../chardet/langhungarianmodel.py | 225 + fanficdownloader/chardet/langthaimodel.py | 200 + fanficdownloader/chardet/latin1prober.py | 136 + fanficdownloader/chardet/mbcharsetprober.py | 82 + fanficdownloader/chardet/mbcsgroupprober.py | 50 + fanficdownloader/chardet/mbcssm.py | 514 ++ fanficdownloader/chardet/sbcharsetprober.py | 106 + fanficdownloader/chardet/sbcsgroupprober.py | 64 + fanficdownloader/chardet/sjisprober.py | 85 + fanficdownloader/chardet/test.py | 20 + fanficdownloader/chardet/universaldetector.py | 154 + fanficdownloader/chardet/utf8prober.py | 76 + fanficdownloader/configurable.py | 64 + fanficdownloader/exceptions.py | 65 + fanficdownloader/gziphttp.py | 38 + fanficdownloader/html.py | 126 + fanficdownloader/html2text.py | 452 ++ fanficdownloader/htmlcleanup.py | 463 ++ fanficdownloader/mobi.py | 384 ++ fanficdownloader/story.py | 171 + fanficdownloader/translit.py | 57 + fanficdownloader/writers/__init__.py | 38 + fanficdownloader/writers/base_writer.py | 289 ++ fanficdownloader/writers/writer_epub.py | 405 ++ fanficdownloader/writers/writer_html.py | 103 + fanficdownloader/writers/writer_mobi.py | 191 + fanficdownloader/writers/writer_txt.py | 157 + ffstorage.py | 39 + index-ajax.html | 109 + index.html | 300 ++ index.yaml | 33 + js/fdownloader.js | 116 + js/jquery-1.3.2.js | 4376 +++++++++++++++++ login.html | 110 + main.py | 570 +++ makeplugin.py | 38 + makezip.py | 54 + plugin-defaults.ini | 343 ++ plugin-example.ini | 74 + queue.yaml | 7 + readme.txt | 14 + recent.html | 85 + settings.py | 25 + simplejson/__init__.py | 318 ++ simplejson/_speedups.c | 2329 +++++++++ simplejson/decoder.py | 354 ++ simplejson/encoder.py | 440 ++ simplejson/scanner.py | 65 + simplejson/tests/__init__.py | 23 + simplejson/tests/test_check_circular.py | 30 + simplejson/tests/test_decode.py | 22 + simplejson/tests/test_default.py | 9 + simplejson/tests/test_dump.py | 21 + .../tests/test_encode_basestring_ascii.py | 38 + simplejson/tests/test_fail.py | 76 + simplejson/tests/test_float.py | 15 + simplejson/tests/test_indent.py | 41 + simplejson/tests/test_pass1.py | 76 + simplejson/tests/test_pass2.py | 14 + simplejson/tests/test_pass3.py | 20 + simplejson/tests/test_recursion.py | 67 + simplejson/tests/test_scanstring.py | 111 + simplejson/tests/test_separators.py | 42 + simplejson/tests/test_unicode.py | 64 + simplejson/tool.py | 37 + static/ajax-loader.gif | Bin 0 -> 10819 bytes static/favicon.ico | Bin 0 -> 21792 bytes status.html | 94 + utils/__init__.py | 1 + utils/remover.py | 109 + utils/tally.py | 64 + 139 files changed, 32229 insertions(+) create mode 100644 app.yaml create mode 100644 calibre-plugin/__init__.py create mode 100644 calibre-plugin/about.txt create mode 100644 calibre-plugin/common_utils.py create mode 100644 calibre-plugin/config.py create mode 100644 calibre-plugin/dcsource.py create mode 100644 calibre-plugin/dialogs.py create mode 100644 calibre-plugin/ffdl_plugin.py create mode 100644 calibre-plugin/images/icon.png create mode 100644 calibre-plugin/images/icon.xcf create mode 100644 calibre-plugin/jobs.py create mode 100644 calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt create mode 100644 cron.yaml create mode 100644 css/index.css create mode 100644 defaults.ini create mode 100644 delete_fic.py create mode 100644 downloader.py create mode 100644 editconfig.html create mode 100644 epubmerge.py create mode 100644 example.ini create mode 100644 fanficdownloader/BeautifulSoup.py create mode 100644 fanficdownloader/__init__.py create mode 100644 fanficdownloader/adapters/__init__.py create mode 100644 fanficdownloader/adapters/adapter_adastrafanficcom.py create mode 100644 fanficdownloader/adapters/adapter_archiveofourownorg.py create mode 100644 fanficdownloader/adapters/adapter_castlefansorg.py create mode 100644 fanficdownloader/adapters/adapter_fanfictionnet.py create mode 100644 fanficdownloader/adapters/adapter_ficbooknet.py create mode 100644 fanficdownloader/adapters/adapter_fictionalleyorg.py create mode 100644 fanficdownloader/adapters/adapter_fictionpresscom.py create mode 100644 fanficdownloader/adapters/adapter_ficwadcom.py create mode 100644 fanficdownloader/adapters/adapter_fimfictionnet.py create mode 100644 fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py create mode 100644 fanficdownloader/adapters/adapter_mediaminerorg.py create mode 100644 fanficdownloader/adapters/adapter_potionsandsnitchesnet.py create mode 100644 fanficdownloader/adapters/adapter_siyecouk.py create mode 100644 fanficdownloader/adapters/adapter_tenhawkpresentscom.py create mode 100644 fanficdownloader/adapters/adapter_test1.py create mode 100644 fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py create mode 100644 fanficdownloader/adapters/adapter_tthfanficorg.py create mode 100644 fanficdownloader/adapters/adapter_twilightednet.py create mode 100644 fanficdownloader/adapters/adapter_twiwritenet.py create mode 100644 fanficdownloader/adapters/adapter_whoficcom.py create mode 100644 fanficdownloader/adapters/base_adapter.py create mode 100644 fanficdownloader/chardet/__init__.py create mode 100644 fanficdownloader/chardet/big5freq.py create mode 100644 fanficdownloader/chardet/big5prober.py create mode 100644 fanficdownloader/chardet/chardistribution.py create mode 100644 fanficdownloader/chardet/charsetgroupprober.py create mode 100644 fanficdownloader/chardet/charsetprober.py create mode 100644 fanficdownloader/chardet/codingstatemachine.py create mode 100644 fanficdownloader/chardet/constants.py create mode 100644 fanficdownloader/chardet/escprober.py create mode 100644 fanficdownloader/chardet/escsm.py create mode 100644 fanficdownloader/chardet/eucjpprober.py create mode 100644 fanficdownloader/chardet/euckrfreq.py create mode 100644 fanficdownloader/chardet/euckrprober.py create mode 100644 fanficdownloader/chardet/euctwfreq.py create mode 100644 fanficdownloader/chardet/euctwprober.py create mode 100644 fanficdownloader/chardet/gb2312freq.py create mode 100644 fanficdownloader/chardet/gb2312prober.py create mode 100644 fanficdownloader/chardet/hebrewprober.py create mode 100644 fanficdownloader/chardet/jisfreq.py create mode 100644 fanficdownloader/chardet/jpcntx.py create mode 100644 fanficdownloader/chardet/langbulgarianmodel.py create mode 100644 fanficdownloader/chardet/langcyrillicmodel.py create mode 100644 fanficdownloader/chardet/langgreekmodel.py create mode 100644 fanficdownloader/chardet/langhebrewmodel.py create mode 100644 fanficdownloader/chardet/langhungarianmodel.py create mode 100644 fanficdownloader/chardet/langthaimodel.py create mode 100644 fanficdownloader/chardet/latin1prober.py create mode 100644 fanficdownloader/chardet/mbcharsetprober.py create mode 100644 fanficdownloader/chardet/mbcsgroupprober.py create mode 100644 fanficdownloader/chardet/mbcssm.py create mode 100644 fanficdownloader/chardet/sbcharsetprober.py create mode 100644 fanficdownloader/chardet/sbcsgroupprober.py create mode 100644 fanficdownloader/chardet/sjisprober.py create mode 100644 fanficdownloader/chardet/test.py create mode 100644 fanficdownloader/chardet/universaldetector.py create mode 100644 fanficdownloader/chardet/utf8prober.py create mode 100644 fanficdownloader/configurable.py create mode 100644 fanficdownloader/exceptions.py create mode 100644 fanficdownloader/gziphttp.py create mode 100644 fanficdownloader/html.py create mode 100644 fanficdownloader/html2text.py create mode 100644 fanficdownloader/htmlcleanup.py create mode 100644 fanficdownloader/mobi.py create mode 100644 fanficdownloader/story.py create mode 100644 fanficdownloader/translit.py create mode 100644 fanficdownloader/writers/__init__.py create mode 100644 fanficdownloader/writers/base_writer.py create mode 100644 fanficdownloader/writers/writer_epub.py create mode 100644 fanficdownloader/writers/writer_html.py create mode 100644 fanficdownloader/writers/writer_mobi.py create mode 100644 fanficdownloader/writers/writer_txt.py create mode 100644 ffstorage.py create mode 100644 index-ajax.html create mode 100644 index.html create mode 100644 index.yaml create mode 100644 js/fdownloader.js create mode 100644 js/jquery-1.3.2.js create mode 100644 login.html create mode 100644 main.py create mode 100644 makeplugin.py create mode 100644 makezip.py create mode 100644 plugin-defaults.ini create mode 100644 plugin-example.ini create mode 100644 queue.yaml create mode 100644 readme.txt create mode 100644 recent.html create mode 100644 settings.py create mode 100644 simplejson/__init__.py create mode 100644 simplejson/_speedups.c create mode 100644 simplejson/decoder.py create mode 100644 simplejson/encoder.py create mode 100644 simplejson/scanner.py create mode 100644 simplejson/tests/__init__.py create mode 100644 simplejson/tests/test_check_circular.py create mode 100644 simplejson/tests/test_decode.py create mode 100644 simplejson/tests/test_default.py create mode 100644 simplejson/tests/test_dump.py create mode 100644 simplejson/tests/test_encode_basestring_ascii.py create mode 100644 simplejson/tests/test_fail.py create mode 100644 simplejson/tests/test_float.py create mode 100644 simplejson/tests/test_indent.py create mode 100644 simplejson/tests/test_pass1.py create mode 100644 simplejson/tests/test_pass2.py create mode 100644 simplejson/tests/test_pass3.py create mode 100644 simplejson/tests/test_recursion.py create mode 100644 simplejson/tests/test_scanstring.py create mode 100644 simplejson/tests/test_separators.py create mode 100644 simplejson/tests/test_unicode.py create mode 100644 simplejson/tool.py create mode 100644 static/ajax-loader.gif create mode 100644 static/favicon.ico create mode 100644 status.html create mode 100644 utils/__init__.py create mode 100644 utils/remover.py create mode 100644 utils/tally.py diff --git a/app.yaml b/app.yaml new file mode 100644 index 00000000..1fd9e75b --- /dev/null +++ b/app.yaml @@ -0,0 +1,46 @@ +# ffd-retief-hrd fanfictiondownloader +application: fanfictiondownloader +version: 4-3-2 +runtime: python27 +api_version: 1 +threadsafe: true + +handlers: + +- url: /r3m0v3r.* + script: utils.remover.app + login: admin + +- url: /tally.* + script: utils.tally.app + login: admin + +- url: /fdownloadtask + script: main.app + login: admin + +- url: /css + static_dir: css + +- url: /js + static_dir: js + +- url: /static + static_dir: static + +- url: /favicon\.ico + static_files: static/favicon.ico + upload: static/favicon\.ico + +- url: /.* + script: main.app + +builtins: +- datastore_admin: on + +libraries: +- name: django + version: "1.2" + +- name: PIL + version: "1.1.7" diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py new file mode 100644 index 00000000..d2345878 --- /dev/null +++ b/calibre-plugin/__init__.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +# -*- coding: utf-8 -*- +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Jim Miller' +__docformat__ = 'restructuredtext en' + +# The class that all Interface Action plugin wrappers must inherit from +from calibre.customize import InterfaceActionBase + +## Apparently the name for this class doesn't matter--it was still +## 'demo' for the first few versions. +class FanFictionDownLoaderBase(InterfaceActionBase): + ''' + This class is a simple wrapper that provides information about the + actual plugin class. The actual interface plugin class is called + InterfacePlugin and is defined in the ffdl_plugin.py file, as + specified in the actual_plugin field below. + + The reason for having two classes is that it allows the command line + calibre utilities to run without needing to load the GUI libraries. + ''' + name = 'FanFictionDownLoader' + description = 'UI plugin to download FanFiction stories from various sites.' + supported_platforms = ['windows', 'osx', 'linux'] + author = 'Jim Miller' + version = (1, 4, 5) + minimum_calibre_version = (0, 8, 30) + + #: This field defines the GUI plugin class that contains all the code + #: that actually does something. Its format is module_path:class_name + #: The specified class must be defined in the specified module. + actual_plugin = 'calibre_plugins.fanfictiondownloader_plugin.ffdl_plugin:FanFictionDownLoaderPlugin' + + def is_customizable(self): + ''' + This method must return True to enable customization via + Preferences->Plugins + ''' + return True + + def config_widget(self): + ''' + Implement this method and :meth:`save_settings` in your plugin to + use a custom configuration dialog. + + This method, if implemented, must return a QWidget. The widget can have + an optional method validate() that takes no arguments and is called + immediately after the user clicks OK. Changes are applied if and only + if the method returns True. + + If for some reason you cannot perform the configuration at this time, + return a tuple of two strings (message, details), these will be + displayed as a warning dialog to the user and the process will be + aborted. + + The base class implementation of this method raises NotImplementedError + so by default no user configuration is possible. + ''' + # It is important to put this import statement here rather than at the + # top of the module as importing the config class will also cause the + # GUI libraries to be loaded, which we do not want when using calibre + # from the command line + from calibre_plugins.fanfictiondownloader_plugin.config import ConfigWidget + return ConfigWidget(self.actual_plugin_) + + def save_settings(self, config_widget): + ''' + Save the settings specified by the user with config_widget. + + :param config_widget: The widget returned by :meth:`config_widget`. + ''' + config_widget.save_settings() + + # Apply the changes + ac = self.actual_plugin_ + if ac is not None: + ac.apply_settings() + +# For testing, run from command line with this: +# calibre-debug -e __init__.py +# +if __name__ == '__main__': + from PyQt4.Qt import QApplication + from calibre.gui2.preferences import test_widget + app = QApplication([]) + test_widget('Advanced', 'Plugins') diff --git a/calibre-plugin/about.txt b/calibre-plugin/about.txt new file mode 100644 index 00000000..b63c1acf --- /dev/null +++ b/calibre-plugin/about.txt @@ -0,0 +1,20 @@ +


      + +

      Created by Jim Miller, borrowing heavily from Grant Drake's +'Reading List', +'Extract ISBN' and +'Count Pages' +plugins.

      + +

      +Calibre officially distributes plugins from the mobileread.com forum site. +The official distro channel for this plugin is there: FanFictionDownLoader +

      + +

      I also monitor the +general users +group for the downloader. That covers the web application and CLI, too. +

      + +The source for this plugin is available +here. diff --git a/calibre-plugin/common_utils.py b/calibre-plugin/common_utils.py new file mode 100644 index 00000000..19e8697e --- /dev/null +++ b/calibre-plugin/common_utils.py @@ -0,0 +1,447 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Grant Drake ' +__docformat__ = 'restructuredtext en' + +import os +from PyQt4 import QtGui +from PyQt4.Qt import (Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout, + QTableWidgetItem, QFont, QLineEdit, QComboBox, + QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime) +from calibre.constants import iswindows +from calibre.gui2 import gprefs, error_dialog, UNDEFINED_QDATETIME +from calibre.gui2.actions import menu_action_unique_name +from calibre.gui2.keyboard import ShortcutConfig +from calibre.utils.config import config_dir +from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE + +# Global definition of our plugin name. Used for common functions that require this. +plugin_name = None +# Global definition of our plugin resources. Used to share between the xxxAction and xxxBase +# classes if you need any zip images to be displayed on the configuration dialog. +plugin_icon_resources = {} + + +def set_plugin_icon_resources(name, resources): + ''' + Set our global store of plugin name and icon resources for sharing between + the InterfaceAction class which reads them and the ConfigWidget + if needed for use on the customization dialog for this plugin. + ''' + global plugin_icon_resources, plugin_name + plugin_name = name + plugin_icon_resources = resources + + +def get_icon(icon_name): + ''' + Retrieve a QIcon for the named image from the zip file if it exists, + or if not then from Calibre's image cache. + ''' + if icon_name: + pixmap = get_pixmap(icon_name) + if pixmap is None: + # Look in Calibre's cache for the icon + return QIcon(I(icon_name)) + else: + return QIcon(pixmap) + return QIcon() + + +def get_pixmap(icon_name): + ''' + Retrieve a QPixmap for the named image + Any icons belonging to the plugin must be prefixed with 'images/' + ''' + global plugin_icon_resources, plugin_name + + if not icon_name.startswith('images/'): + # We know this is definitely not an icon belonging to this plugin + pixmap = QPixmap() + pixmap.load(I(icon_name)) + return pixmap + + # Check to see whether the icon exists as a Calibre resource + # This will enable skinning if the user stores icons within a folder like: + # ...\AppData\Roaming\calibre\resources\images\Plugin Name\ + if plugin_name: + local_images_dir = get_local_images_dir(plugin_name) + local_image_path = os.path.join(local_images_dir, icon_name.replace('images/', '')) + if os.path.exists(local_image_path): + pixmap = QPixmap() + pixmap.load(local_image_path) + return pixmap + + # As we did not find an icon elsewhere, look within our zip resources + if icon_name in plugin_icon_resources: + pixmap = QPixmap() + pixmap.loadFromData(plugin_icon_resources[icon_name]) + return pixmap + return None + + +def get_local_images_dir(subfolder=None): + ''' + Returns a path to the user's local resources/images folder + If a subfolder name parameter is specified, appends this to the path + ''' + images_dir = os.path.join(config_dir, 'resources/images') + if subfolder: + images_dir = os.path.join(images_dir, subfolder) + if iswindows: + images_dir = os.path.normpath(images_dir) + return images_dir + + +def create_menu_item(ia, parent_menu, menu_text, image=None, tooltip=None, + shortcut=(), triggered=None, is_checked=None): + ''' + Create a menu action with the specified criteria and action + Note that if no shortcut is specified, will not appear in Preferences->Keyboard + This method should only be used for actions which either have no shortcuts, + or register their menus only once. Use create_menu_action_unique for all else. + ''' + if shortcut is not None: + if len(shortcut) == 0: + shortcut = () + else: + shortcut = _(shortcut) + ac = ia.create_action(spec=(menu_text, None, tooltip, shortcut), + attr=menu_text) + if image: + ac.setIcon(get_icon(image)) + if triggered is not None: + ac.triggered.connect(triggered) + if is_checked is not None: + ac.setCheckable(True) + if is_checked: + ac.setChecked(True) + + parent_menu.addAction(ac) + return ac + + +def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=None, + shortcut=None, triggered=None, is_checked=None, shortcut_name=None, + unique_name=None): + ''' + Create a menu action with the specified criteria and action, using the new + InterfaceAction.create_menu_action() function which ensures that regardless of + whether a shortcut is specified it will appear in Preferences->Keyboard + ''' + orig_shortcut = shortcut + kb = ia.gui.keyboard + if unique_name is None: + unique_name = menu_text + if not shortcut == False: + full_unique_name = menu_action_unique_name(ia, unique_name) + if full_unique_name in kb.shortcuts: + shortcut = False + else: + if shortcut is not None and not shortcut == False: + if len(shortcut) == 0: + shortcut = None + else: + shortcut = _(shortcut) + + if shortcut_name is None: + shortcut_name = menu_text.replace('&','') + + ac = ia.create_menu_action(parent_menu, unique_name, menu_text, icon=None, shortcut=shortcut, + description=tooltip, triggered=triggered, shortcut_name=shortcut_name) + if shortcut == False and not orig_shortcut == False: + if ac.calibre_shortcut_unique_name in ia.gui.keyboard.shortcuts: + kb.replace_action(ac.calibre_shortcut_unique_name, ac) + if image: + ac.setIcon(get_icon(image)) + if is_checked is not None: + ac.setCheckable(True) + if is_checked: + ac.setChecked(True) + return ac + + +def swap_author_names(author): + if author.find(',') == -1: + return author + name_parts = author.strip().partition(',') + return name_parts[2].strip() + ' ' + name_parts[0] + + +def get_library_uuid(db): + try: + library_uuid = db.library_id + except: + library_uuid = '' + return library_uuid + + +class ImageLabel(QLabel): + + def __init__(self, parent, icon_name, size=16): + QLabel.__init__(self, parent) + pixmap = get_pixmap(icon_name) + self.setPixmap(pixmap) + self.setMaximumSize(size, size) + self.setScaledContents(True) + + +class ImageTitleLayout(QHBoxLayout): + ''' + A reusable layout widget displaying an image followed by a title + ''' + def __init__(self, parent, icon_name, title): + QHBoxLayout.__init__(self) + title_image_label = QLabel(parent) + pixmap = get_pixmap(icon_name) + if pixmap is None: + pixmap = get_pixmap('library.png') + # error_dialog(parent, _('Restart required'), + # _('You must restart Calibre before using this plugin!'), show=True) + else: + title_image_label.setPixmap(pixmap) + title_image_label.setMaximumSize(32, 32) + title_image_label.setScaledContents(True) + self.addWidget(title_image_label) + + title_font = QFont() + title_font.setPointSize(16) + shelf_label = QLabel(title, parent) + shelf_label.setFont(title_font) + self.addWidget(shelf_label) + self.insertStretch(-1) + + +class SizePersistedDialog(QDialog): + ''' + This dialog is a base class for any dialogs that want their size/position + restored when they are next opened. + ''' + def __init__(self, parent, unique_pref_name): + QDialog.__init__(self, parent) + self.unique_pref_name = unique_pref_name + self.geom = gprefs.get(unique_pref_name, None) + self.finished.connect(self.dialog_closing) + + def resize_dialog(self): + if self.geom is None: + self.resize(self.sizeHint()) + else: + self.restoreGeometry(self.geom) + + def dialog_closing(self, result): + geom = bytearray(self.saveGeometry()) + gprefs[self.unique_pref_name] = geom + + +class ReadOnlyTableWidgetItem(QTableWidgetItem): + + def __init__(self, text): + if text is None: + text = '' + QTableWidgetItem.__init__(self, text, QtGui.QTableWidgetItem.UserType) + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + + +class RatingTableWidgetItem(QTableWidgetItem): + + def __init__(self, rating, is_read_only=False): + QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType) + self.setData(Qt.DisplayRole, rating) + if is_read_only: + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + + +class DateTableWidgetItem(QTableWidgetItem): + + def __init__(self, date_read, is_read_only=False, default_to_today=False): + if date_read == UNDEFINED_DATE and default_to_today: + date_read = now() + if is_read_only: + QTableWidgetItem.__init__(self, format_date(date_read, None), QtGui.QTableWidgetItem.UserType) + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + else: + QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType) + self.setData(Qt.DisplayRole, QDateTime(date_read)) + + +class NoWheelComboBox(QComboBox): + + def wheelEvent (self, event): + # Disable the mouse wheel on top of the combo box changing selection as plays havoc in a grid + event.ignore() + + +class CheckableTableWidgetItem(QTableWidgetItem): + + def __init__(self, checked=False, is_tristate=False): + QTableWidgetItem.__init__(self, '') + self.setFlags(Qt.ItemFlags(Qt.ItemIsSelectable | Qt.ItemIsUserCheckable | Qt.ItemIsEnabled )) + if is_tristate: + self.setFlags(self.flags() | Qt.ItemIsTristate) + if checked: + self.setCheckState(Qt.Checked) + else: + if is_tristate and checked is None: + self.setCheckState(Qt.PartiallyChecked) + else: + self.setCheckState(Qt.Unchecked) + + def get_boolean_value(self): + ''' + Return a boolean value indicating whether checkbox is checked + If this is a tristate checkbox, a partially checked value is returned as None + ''' + if self.checkState() == Qt.PartiallyChecked: + return None + else: + return self.checkState() == Qt.Checked + + +class TextIconWidgetItem(QTableWidgetItem): + + def __init__(self, text, icon): + QTableWidgetItem.__init__(self, text) + if icon: + self.setIcon(icon) + + +class ReadOnlyTextIconWidgetItem(ReadOnlyTableWidgetItem): + + def __init__(self, text, icon): + ReadOnlyTableWidgetItem.__init__(self, text) + if icon: + self.setIcon(icon) + + +class ReadOnlyLineEdit(QLineEdit): + + def __init__(self, text, parent): + if text is None: + text = '' + QLineEdit.__init__(self, text, parent) + self.setEnabled(False) + + +class KeyValueComboBox(QComboBox): + + def __init__(self, parent, values, selected_key): + QComboBox.__init__(self, parent) + self.values = values + self.populate_combo(selected_key) + + def populate_combo(self, selected_key): + self.clear() + selected_idx = idx = -1 + for key, value in self.values.iteritems(): + idx = idx + 1 + self.addItem(value) + if key == selected_key: + selected_idx = idx + self.setCurrentIndex(selected_idx) + + def selected_key(self): + for key, value in self.values.iteritems(): + if value == unicode(self.currentText()).strip(): + return key + + +class CustomColumnComboBox(QComboBox): + + def __init__(self, parent, custom_columns, selected_column, initial_items=['']): + QComboBox.__init__(self, parent) + self.populate_combo(custom_columns, selected_column, initial_items) + + def populate_combo(self, custom_columns, selected_column, initial_items=['']): + self.clear() + self.column_names = initial_items + if len(initial_items) > 0: + self.addItems(initial_items) + selected_idx = 0 + for idx, value in enumerate(initial_items): + if value == selected_column: + selected_idx = idx + for key in sorted(custom_columns.keys()): + self.column_names.append(key) + self.addItem('%s (%s)'%(key, custom_columns[key]['name'])) + if key == selected_column: + selected_idx = len(self.column_names) - 1 + self.setCurrentIndex(selected_idx) + + def get_selected_column(self): + return self.column_names[self.currentIndex()] + + +class KeyboardConfigDialog(SizePersistedDialog): + ''' + This dialog is used to allow editing of keyboard shortcuts. + ''' + def __init__(self, gui, group_name): + SizePersistedDialog.__init__(self, gui, 'Keyboard shortcut dialog') + self.gui = gui + self.setWindowTitle('Keyboard shortcuts') + layout = QVBoxLayout(self) + self.setLayout(layout) + + self.keyboard_widget = ShortcutConfig(self) + layout.addWidget(self.keyboard_widget) + self.group_name = group_name + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.commit) + button_box.rejected.connect(self.reject) + layout.addWidget(button_box) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.initialize() + + def initialize(self): + self.keyboard_widget.initialize(self.gui.keyboard) + self.keyboard_widget.highlight_group(self.group_name) + + def commit(self): + self.keyboard_widget.commit() + self.accept() + + +class DateDelegate(QStyledItemDelegate): + ''' + Delegate for dates. Because this delegate stores the + format as an instance variable, a new instance must be created for each + column. This differs from all the other delegates. + ''' + def __init__(self, parent): + QStyledItemDelegate.__init__(self, parent) + self.format = 'dd MMM yyyy' + + def displayText(self, val, locale): + d = val.toDateTime() + if d <= UNDEFINED_QDATETIME: + return '' + return format_date(qt_to_dt(d, as_utc=False), self.format) + + def createEditor(self, parent, option, index): + qde = QStyledItemDelegate.createEditor(self, parent, option, index) + qde.setDisplayFormat(self.format) + qde.setMinimumDateTime(UNDEFINED_QDATETIME) + qde.setSpecialValueText(_('Undefined')) + qde.setCalendarPopup(True) + return qde + + def setEditorData(self, editor, index): + val = index.model().data(index, Qt.DisplayRole).toDateTime() + if val is None or val == UNDEFINED_QDATETIME: + val = now() + editor.setDateTime(val) + + def setModelData(self, editor, model, index): + val = editor.dateTime() + if val <= UNDEFINED_QDATETIME: + model.setData(index, UNDEFINED_QDATETIME, Qt.EditRole) + else: + model.setData(index, QDateTime(val), Qt.EditRole) diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py new file mode 100644 index 00000000..50cfb3cd --- /dev/null +++ b/calibre-plugin/config.py @@ -0,0 +1,557 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Jim Miller' +__docformat__ = 'restructuredtext en' + +import traceback, copy + +from PyQt4.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QFont, + QTextEdit, QComboBox, QCheckBox, QPushButton, QTabWidget, QVariant) + +from calibre.gui2 import dynamic, info_dialog +from calibre.utils.config import JSONConfig +from calibre.gui2.ui import get_gui + +from calibre_plugins.fanfictiondownloader_plugin.dialogs \ + import (SKIP, ADDNEW, UPDATE, UPDATEALWAYS, OVERWRITE, OVERWRITEALWAYS, + CALIBREONLY,collision_order) + +from calibre_plugins.fanfictiondownloader_plugin.common_utils \ + import ( get_library_uuid, KeyboardConfigDialog ) + +from calibre.gui2.complete import MultiCompleteLineEdit + +# This is where all preferences for this plugin will be stored +# Remember that this name (i.e. plugins/fanfictiondownloader_plugin) is also +# in a global namespace, so make it as unique as possible. +# You should always prefix your config file name with plugins/, +# so as to ensure you dont accidentally clobber a calibre config file +all_prefs = JSONConfig('plugins/fanfictiondownloader_plugin') + +# Set defaults used by all. Library specific settings continue to +# take from here. +all_prefs.defaults['personal.ini'] = get_resources('plugin-example.ini') +all_prefs.defaults['updatemeta'] = True +all_prefs.defaults['keeptags'] = False +all_prefs.defaults['urlsfromclip'] = True +all_prefs.defaults['updatedefault'] = True +all_prefs.defaults['fileform'] = 'epub' +all_prefs.defaults['collision'] = OVERWRITE +all_prefs.defaults['deleteotherforms'] = False +all_prefs.defaults['send_lists'] = '' +all_prefs.defaults['read_lists'] = '' +all_prefs.defaults['addtolists'] = False +all_prefs.defaults['addtoreadlists'] = False +all_prefs.defaults['addtolistsonread'] = False +all_prefs.defaults['custom_cols'] = {} + +# The list of settings to copy from all_prefs or the previous library +# when config is called for the first time on a library. +copylist = ['personal.ini', + 'updatemeta', + 'keeptags', + 'urlsfromclip', + 'updatedefault', + 'fileform', + 'collision', + 'deleteotherforms'] + +# fake out so I don't have to change the prefs calls anywhere. The +# Java programmer in me is offended by op-overloading, but it's very +# tidy. +class PrefsFacade(): + def __init__(self,all_prefs): + self.all_prefs = all_prefs + self.lastlibid = None + + def _get_copylist_prefs(self,frompref): + return filter( lambda x : x[0] in copylist, frompref.items() ) + + def _get_prefs(self): + libraryid = get_library_uuid(get_gui().current_db) + if libraryid not in self.all_prefs: + if self.lastlibid == None: + self.all_prefs[libraryid] = dict(self._get_copylist_prefs(self.all_prefs)) + else: + self.all_prefs[libraryid] = dict(self._get_copylist_prefs(self.all_prefs[self.lastlibid])) + self.lastlibid = libraryid + + return self.all_prefs[libraryid] + + def _save_prefs(self,prefs): + libraryid = get_library_uuid(get_gui().current_db) + self.all_prefs[libraryid] = prefs + + def __getitem__(self,k): + prefs = self._get_prefs() + if k not in prefs: + # pulls from all_prefs.defaults automatically if not set + # in all_prefs + return self.all_prefs[k] + return prefs[k] + + def __setitem__(self,k,v): + prefs = self._get_prefs() + prefs[k]=v + self._save_prefs(prefs) + + # to be avoided--can cause unexpected results as possibly ancient + # all_pref settings may be pulled. + def __delitem__(self,k): + prefs = self._get_prefs() + del prefs[k] + self._save_prefs(prefs) + +prefs = PrefsFacade(all_prefs) + +class ConfigWidget(QWidget): + + def __init__(self, plugin_action): + QWidget.__init__(self) + self.plugin_action = plugin_action + + self.l = QVBoxLayout() + self.setLayout(self.l) + + tab_widget = QTabWidget(self) + self.l.addWidget(tab_widget) + + self.basic_tab = BasicTab(self, plugin_action) + tab_widget.addTab(self.basic_tab, 'Basic') + + self.personalini_tab = PersonalIniTab(self, plugin_action) + tab_widget.addTab(self.personalini_tab, 'personal.ini') + + self.list_tab = ListTab(self, plugin_action) + tab_widget.addTab(self.list_tab, 'Reading Lists') + if 'Reading List' not in plugin_action.gui.iactions: + self.list_tab.setEnabled(False) + + self.columns_tab = ColumnsTab(self, plugin_action) + tab_widget.addTab(self.columns_tab, 'Custom Columns') + + self.other_tab = OtherTab(self, plugin_action) + tab_widget.addTab(self.other_tab, 'Other') + + + def save_settings(self): + + # basic + prefs['fileform'] = unicode(self.basic_tab.fileform.currentText()) + prefs['collision'] = unicode(self.basic_tab.collision.currentText()) + prefs['updatemeta'] = self.basic_tab.updatemeta.isChecked() + prefs['keeptags'] = self.basic_tab.keeptags.isChecked() + prefs['urlsfromclip'] = self.basic_tab.urlsfromclip.isChecked() + prefs['updatedefault'] = self.basic_tab.updatedefault.isChecked() + prefs['deleteotherforms'] = self.basic_tab.deleteotherforms.isChecked() + + if self.list_tab: + # lists + prefs['send_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.list_tab.send_lists_box.text()).split(',')))) + prefs['read_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.list_tab.read_lists_box.text()).split(',')))) + # print("send_lists: %s"%prefs['send_lists']) + # print("read_lists: %s"%prefs['read_lists']) + prefs['addtolists'] = self.list_tab.addtolists.isChecked() + prefs['addtoreadlists'] = self.list_tab.addtoreadlists.isChecked() + prefs['addtolistsonread'] = self.list_tab.addtolistsonread.isChecked() + + # personal.ini + ini = unicode(self.personalini_tab.ini.toPlainText()) + if ini: + prefs['personal.ini'] = ini + else: + # if they've removed everything, reset to default. + prefs['personal.ini'] = get_resources('plugin-example.ini') + + # Custom Columns tab + colsmap = {} + for (col,combo) in self.columns_tab.custcol_dropdowns.iteritems(): + val = unicode(combo.itemData(combo.currentIndex()).toString()) + if val != 'none': + colsmap[col] = val + #print("colsmap[%s]:%s"%(col,colsmap[col])) + prefs['custom_cols'] = colsmap + + def edit_shortcuts(self): + self.save_settings() + # Force the menus to be rebuilt immediately, so we have all our actions registered + self.plugin_action.rebuild_menus() + d = KeyboardConfigDialog(self.plugin_action.gui, self.plugin_action.action_spec[0]) + if d.exec_() == d.Accepted: + self.plugin_action.gui.keyboard.finalize() + +class BasicTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel('These settings control the basic features of the plugin--downloading FanFiction.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + horz = QHBoxLayout() + label = QLabel('Default Output &Format:') + horz.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip('Choose output format to create. May set default from plugin configuration.') + self.fileform.activated.connect(self.set_collisions) + label.setBuddy(self.fileform) + horz.addWidget(self.fileform) + self.l.addLayout(horz) + + horz = QHBoxLayout() + label = QLabel('Default If Story Already Exists?') + label.setToolTip("What to do if there's already an existing story with the same title and author.") + horz.addWidget(label) + self.collision = QComboBox(self) + # add collision options + self.set_collisions() + i = self.collision.findText(prefs['collision']) + if i > -1: + self.collision.setCurrentIndex(i) + # self.collision.setToolTip('Overwrite will replace the existing story. Add New will create a new story with the same title and author.') + label.setBuddy(self.collision) + horz.addWidget(self.collision) + self.l.addLayout(horz) + + self.updatemeta = QCheckBox('Default Update Calibre &Metadata?',self) + self.updatemeta.setToolTip('Update title, author, URL, tags, custom columns, etc for story in Calibre from web site.') + self.updatemeta.setChecked(prefs['updatemeta']) + self.l.addWidget(self.updatemeta) + + self.keeptags = QCheckBox('Keep Existing Tags when Updating Metadata?',self) + self.keeptags.setToolTip('Existing tags will be kept and any new tags added.\nCompleted and In-Progress tags will be still be updated, if known.\nLast Updated tags will be updated if lastupdate in include_subject_tags.') + self.keeptags.setChecked(prefs['keeptags']) + self.l.addWidget(self.keeptags) + + self.urlsfromclip = QCheckBox('Take URLs from Clipboard?',self) + self.urlsfromclip.setToolTip('Prefill URLs from valid URLs in Clipboard when Adding New.') + self.urlsfromclip.setChecked(prefs['urlsfromclip']) + self.l.addWidget(self.urlsfromclip) + + self.updatedefault = QCheckBox('Default to Update when books selected?',self) + self.updatedefault.setToolTip('The top FanFictionDownLoader plugin button will start Update if\n'+ + 'books are selected. If unchecked, it will always bring up \'Add New\'.') + self.updatedefault.setChecked(prefs['updatedefault']) + self.l.addWidget(self.updatedefault) + + self.deleteotherforms = QCheckBox('Delete other existing formats?',self) + self.deleteotherforms.setToolTip('Check this to automatically delete all other ebook formats when updating an existing book.\nHandy if you have both a Nook(epub) and Kindle(mobi), for example.') + self.deleteotherforms.setChecked(prefs['deleteotherforms']) + self.l.addWidget(self.deleteotherforms) + + self.l.insertStretch(-1) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]: + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def show_defaults(self): + text = get_resources('plugin-defaults.ini') + ShowDefaultsIniDialog(self.windowIcon(),text,self).exec_() + +class PersonalIniTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel('These settings provide more detailed control over what metadata will be displayed inside the ebook as well as let you set is_adult and user/password for different sites.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.label = QLabel('personal.ini:') + self.l.addWidget(self.label) + + self.ini = QTextEdit(self) + try: + self.ini.setFont(QFont("Courier", + self.plugin_action.gui.font().pointSize()+1)); + except Exception as e: + print("Couldn't get font: %s"%e) + self.ini.setLineWrapMode(QTextEdit.NoWrap) + self.ini.setText(prefs['personal.ini']) + self.l.addWidget(self.ini) + + self.defaults = QPushButton('View Defaults', self) + self.defaults.setToolTip("View all of the plugin's configurable settings\nand their default settings.") + self.defaults.clicked.connect(self.show_defaults) + self.l.addWidget(self.defaults) + + # self.l.insertStretch(-1) + # let edit box fill the space. + + def show_defaults(self): + text = get_resources('plugin-defaults.ini') + ShowDefaultsIniDialog(self.windowIcon(),text,self).exec_() + +class ShowDefaultsIniDialog(QDialog): + + def __init__(self, icon, text, parent=None): + QDialog.__init__(self, parent) + self.resize(600, 500) + self.l = QVBoxLayout() + self.setLayout(self.l) + self.label = QLabel("Plugin Defaults (Read-Only)") + self.label.setToolTip("These are all of the plugin's configurable options\nand their default settings.") + self.setWindowTitle(_('Plugin Defaults')) + self.setWindowIcon(icon) + self.l.addWidget(self.label) + + self.ini = QTextEdit(self) + self.ini.setToolTip("These are all of the plugin's configurable options\nand their default settings.") + try: + self.ini.setFont(QFont("Courier", + get_gui().font().pointSize()+1)); + except Exception as e: + print("Couldn't get font: %s"%e) + self.ini.setLineWrapMode(QTextEdit.NoWrap) + self.ini.setText(text) + self.ini.setReadOnly(True) + self.l.addWidget(self.ini) + + self.ok_button = QPushButton('OK', self) + self.ok_button.clicked.connect(self.hide) + self.l.addWidget(self.ok_button) + +class ListTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + try: + rl_plugin = plugin_action.gui.iactions['Reading List'] + reading_lists = rl_plugin.get_list_names() + except KeyError: + reading_lists= [] + + label = QLabel('These settings provide integration with the Reading List Plugin. Reading List can automatically send to devices and change custom columns. You have to create and configure the lists in Reading List to be useful.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.addtolists = QCheckBox('Add new/updated stories to "Send to Device" Reading List(s).',self) + self.addtolists.setToolTip('Automatically add new/updated stories to these lists in the Reading List plugin.') + self.addtolists.setChecked(prefs['addtolists']) + self.l.addWidget(self.addtolists) + + horz = QHBoxLayout() + label = QLabel('"Send to Device" Reading Lists') + label.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + horz.addWidget(label) + self.send_lists_box = MultiCompleteLineEdit(self) + self.send_lists_box.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + self.send_lists_box.update_items_cache(reading_lists) + self.send_lists_box.setText(prefs['send_lists']) + horz.addWidget(self.send_lists_box) + self.l.addLayout(horz) + + self.addtoreadlists = QCheckBox('Add new/updated stories to "To Read" Reading List(s).',self) + self.addtoreadlists.setToolTip('Automatically add new/updated stories to these lists in the Reading List plugin.\nAlso offers menu option to remove stories from the "To Read" lists.') + self.addtoreadlists.setChecked(prefs['addtoreadlists']) + self.l.addWidget(self.addtoreadlists) + + horz = QHBoxLayout() + label = QLabel('"To Read" Reading Lists') + label.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + horz.addWidget(label) + self.read_lists_box = MultiCompleteLineEdit(self) + self.read_lists_box.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + self.read_lists_box.update_items_cache(reading_lists) + self.read_lists_box.setText(prefs['read_lists']) + horz.addWidget(self.read_lists_box) + self.l.addLayout(horz) + + self.addtolistsonread = QCheckBox('Add stories back to "Send to Device" Reading List(s) when marked "Read".',self) + self.addtolistsonread.setToolTip('Menu option to remove from "To Read" lists will also add stories back to "Send to Device" Reading List(s)') + self.addtolistsonread.setChecked(prefs['addtolistsonread']) + self.l.addWidget(self.addtolistsonread) + + self.l.insertStretch(-1) + +class OtherTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel("These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFictionDownLoader confirmation dialogs back again.") + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + keyboard_shortcuts_button = QPushButton('Keyboard shortcuts...', self) + keyboard_shortcuts_button.setToolTip(_( + 'Edit the keyboard shortcuts associated with this plugin')) + keyboard_shortcuts_button.clicked.connect(parent_dialog.edit_shortcuts) + self.l.addWidget(keyboard_shortcuts_button) + + reset_confirmation_button = QPushButton(_('Reset disabled &confirmation dialogs'), self) + reset_confirmation_button.setToolTip(_( + 'Reset all show me again dialogs for the FanFictionDownLoader plugin')) + reset_confirmation_button.clicked.connect(self.reset_dialogs) + self.l.addWidget(reset_confirmation_button) + + self.l.insertStretch(-1) + + def reset_dialogs(self): + for key in dynamic.keys(): + if key.startswith('fanfictiondownloader_') and key.endswith('_again') \ + and dynamic[key] is False: + dynamic[key] = True + info_dialog(self, _('Done'), + _('Confirmation dialogs have all been reset'), + show=True, + show_copy_button=False) + +permitted_values = { + 'int' : ['numWords','numChapters'], + 'float' : ['numWords','numChapters'], + 'bool' : ['status-C','status-I'], + 'datetime' : ['datePublished', 'dateUpdated', 'dateCreated'], + 'series' : ['series'], + 'enumeration' : ['category', + 'genre', + 'language', + 'series', + 'characters', + 'status', + 'datePublished', + 'dateUpdated', + 'dateCreated', + 'rating', + 'warnings', + 'numChapters', + 'numWords', + 'site', + 'storyId', + 'authorId', + 'extratags', + 'title', + 'storyUrl', + 'description', + 'author', + 'authorUrl', + 'formatname' + #,'formatext' # not useful information. + #,'siteabbrev' + #,'version' + ] + } +# no point copying the whole list. +permitted_values['text'] = permitted_values['enumeration'] +permitted_values['comments'] = permitted_values['enumeration'] + +titleLabels = { + 'category':'Category', + 'genre':'Genre', + 'language':'Language', + 'status':'Status', + 'status-C':'Status:Completed', + 'status-I':'Status:In-Progress', + 'series':'Series', + 'characters':'Characters', + 'datePublished':'Published', + 'dateUpdated':'Updated', + 'dateCreated':'Packaged', + 'rating':'Rating', + 'warnings':'Warnings', + 'numChapters':'Chapters', + 'numWords':'Words', + 'site':'Site', + 'storyId':'Story ID', + 'authorId':'Author ID', + 'extratags':'Extra Tags', + 'title':'Title', + 'storyUrl':'Story URL', + 'description':'Summary', + 'author':'Author', + 'authorUrl':'Author URL', + 'formatname':'File Format', + 'formatext':'File Extension', + 'siteabbrev':'Site Abbrev', + 'version':'FFDL Version' + } + +class ColumnsTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel("If you have custom columns defined, they will be listed below. Choose a metadata value type to fill your columns automatically.") + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.custcol_dropdowns = {} + + custom_columns = self.plugin_action.gui.library_view.model().custom_columns + + for key, column in custom_columns.iteritems(): + + if column['datatype'] in permitted_values: + # print("\n============== %s ===========\n"%key) + # for (k,v) in column.iteritems(): + # print("column['%s'] => %s"%(k,v)) + horz = QHBoxLayout() + label = QLabel('%s(%s)'%(column['name'],key)) + label.setToolTip("Update this %s column with..."%column['datatype']) + horz.addWidget(label) + dropdown = QComboBox(self) + dropdown.addItem('',QVariant('none')) + for md in permitted_values[column['datatype']]: + dropdown.addItem(titleLabels[md],QVariant(md)) + self.custcol_dropdowns[key] = dropdown + if key in prefs['custom_cols']: + dropdown.setCurrentIndex(dropdown.findData(QVariant(prefs['custom_cols'][key]))) + if column['datatype'] == 'enumeration': + dropdown.setToolTip("Metadata values valid for this type of column.\nValues that aren't valid for this enumeration column will be ignored.") + else: + dropdown.setToolTip("Metadata values valid for this type of column.") + + horz.addWidget(dropdown) + self.l.addLayout(horz) + + self.l.insertStretch(-1) + + #print("prefs['custom_cols'] %s"%prefs['custom_cols']) diff --git a/calibre-plugin/dcsource.py b/calibre-plugin/dcsource.py new file mode 100644 index 00000000..03910417 --- /dev/null +++ b/calibre-plugin/dcsource.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Jim Miller' +__docformat__ = 'restructuredtext en' + +from zipfile import ZipFile + +from xml.dom.minidom import parseString + +def get_dcsource(inputio): + epub = ZipFile(inputio, 'r') + + ## Find the .opf file. + container = epub.read("META-INF/container.xml") + containerdom = parseString(container) + rootfilenodelist = containerdom.getElementsByTagName("rootfile") + rootfilename = rootfilenodelist[0].getAttribute("full-path") + + metadom = parseString(epub.read(rootfilename)) + firstmetadom = metadom.getElementsByTagName("metadata")[0] + try: + source=firstmetadom.getElementsByTagName("dc:source")[0].firstChild.data.encode("utf-8") + except: + source=None + + return source diff --git a/calibre-plugin/dialogs.py b/calibre-plugin/dialogs.py new file mode 100644 index 00000000..5ab82df6 --- /dev/null +++ b/calibre-plugin/dialogs.py @@ -0,0 +1,646 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Jim Miller' +__docformat__ = 'restructuredtext en' + +import traceback + +from PyQt4 import QtGui +from PyQt4.Qt import (QDialog, QTableWidget, QMessageBox, QVBoxLayout, QHBoxLayout, QGridLayout, + QPushButton, QProgressDialog, QString, QLabel, QCheckBox, QIcon, QTextCursor, + QTextEdit, QLineEdit, QInputDialog, QComboBox, QClipboard, QVariant, + QProgressDialog, QTimer, QDialogButtonBox, QPixmap, Qt,QAbstractItemView ) + +from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dialog +from calibre.gui2.dialogs.confirm_delete import confirm + +from calibre import confirm_config_name +from calibre.gui2 import dynamic + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters,writers,exceptions +from calibre_plugins.fanfictiondownloader_plugin.common_utils \ + import (ReadOnlyTableWidgetItem, ReadOnlyTextIconWidgetItem, SizePersistedDialog, + ImageTitleLayout, get_icon) + +SKIP='Skip' +ADDNEW='Add New Book' +UPDATE='Update EPUB if New Chapters' +UPDATEALWAYS='Update EPUB Always' +OVERWRITE='Overwrite if Newer' +OVERWRITEALWAYS='Overwrite Always' +CALIBREONLY='Update Calibre Metadata Only' +collision_order=[SKIP, + ADDNEW, + UPDATE, + UPDATEALWAYS, + OVERWRITE, + OVERWRITEALWAYS, + CALIBREONLY,] + +class NotGoingToDownload(Exception): + def __init__(self,error,icon='dialog_error.png'): + self.error=error + self.icon=icon + + def __str__(self): + return self.error + +class DroppableQTextEdit(QTextEdit): + def __init__(self,parent): + QTextEdit.__init__(self,parent) + + def canInsertFromMimeData(self, source): + if source.hasUrls(): + return True; + else: + return QTextEdit.canInsertFromMimeData(self,source) + + def insertFromMimeData(self, source): + if source.hasUrls(): + for u in source.urls(): + self.append(u.toString()) + else: + return QTextEdit.insertFromMimeData(self, source) + +class AddNewDialog(SizePersistedDialog): + + def __init__(self, gui, prefs, icon, url_list_text): + SizePersistedDialog.__init__(self, gui, 'FanFictionDownLoader plugin:add new dialog') + self.gui = gui + + self.setMinimumWidth(300) + self.l = QVBoxLayout() + self.setLayout(self.l) + + self.setWindowTitle('FanFictionDownLoader') + self.setWindowIcon(icon) + + self.l.addWidget(QLabel('Story URL(s), one per line:')) + self.url = DroppableQTextEdit(self) + self.url.setToolTip('URLs for stories, one per line.\nWill take URLs from clipboard, but only valid URLs.') + self.url.setLineWrapMode(QTextEdit.NoWrap) + self.url.setText(url_list_text) + self.l.addWidget(self.url) + + horz = QHBoxLayout() + label = QLabel('Output &Format:') + horz.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip('Choose output format to create. May set default from plugin configuration.') + self.fileform.activated.connect(self.set_collisions) + + label.setBuddy(self.fileform) + horz.addWidget(self.fileform) + self.l.addLayout(horz) + + horz = QHBoxLayout() + label = QLabel('If Story Already Exists?') + label.setToolTip("What to do if there's already an existing story with the same title and author.") + horz.addWidget(label) + self.collision = QComboBox(self) + # add collision options + self.set_collisions() + i = self.collision.findText(prefs['collision']) + if i > -1: + self.collision.setCurrentIndex(i) + # self.collision.setToolTip(OVERWRITE+' will replace the existing story.\n'+ + # UPDATE+' will download new chapters only and add to existing EPUB.\n'+ + # ADDNEW+' will create a new story with the same title and author.\n'+ + # SKIP+' will not download existing stories.\n'+ + # CALIBREONLY+' will not download stories, but will update Calibre metadata.') + label.setBuddy(self.collision) + horz.addWidget(self.collision) + self.l.addLayout(horz) + + self.updatemeta = QCheckBox('Update Calibre &Metadata?',self) + self.updatemeta.setToolTip('Update metadata for story in Calibre from web site?') + self.updatemeta.setChecked(prefs['updatemeta']) + self.l.addWidget(self.updatemeta) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + self.l.addWidget(button_box) + + if url_list_text: + button_box.button(QDialogButtonBox.Ok).setFocus() + + # restore saved size. + self.resize_dialog() + #self.resize(self.sizeHint()) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]: + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def get_ffdl_options(self): + return { + 'fileform': unicode(self.fileform.currentText()), + 'collision': unicode(self.collision.currentText()), + 'updatemeta': self.updatemeta.isChecked(), + } + + def get_urlstext(self): + return unicode(self.url.toPlainText()) + +class UserPassDialog(QDialog): + ''' + Need to collect User/Pass for some sites. + ''' + def __init__(self, gui, site): + QDialog.__init__(self, gui) + self.gui = gui + self.status=False + self.setWindowTitle('User/Password') + + self.l = QGridLayout() + self.setLayout(self.l) + + self.l.addWidget(QLabel("%s requires you to login to download this story."%site),0,0,1,2) + + self.l.addWidget(QLabel("User:"),1,0) + self.user = QLineEdit(self) + self.l.addWidget(self.user,1,1) + + self.l.addWidget(QLabel("Password:"),2,0) + self.passwd = QLineEdit(self) + self.passwd.setEchoMode(QLineEdit.Password) + self.l.addWidget(self.passwd,2,1) + + self.ok_button = QPushButton('OK', self) + self.ok_button.clicked.connect(self.ok) + self.l.addWidget(self.ok_button,3,0) + + self.cancel_button = QPushButton('Cancel', self) + self.cancel_button.clicked.connect(self.cancel) + self.l.addWidget(self.cancel_button,3,1) + + self.resize(self.sizeHint()) + + def ok(self): + self.status=True + self.hide() + + def cancel(self): + self.status=False + self.hide() + +class LoopProgressDialog(QProgressDialog): + ''' + ProgressDialog displayed while fetching metadata for each story. + ''' + def __init__(self, gui, + book_list, + foreach_function, + finish_function, + init_label="Fetching metadata for stories...", + win_title="Downloading metadata for stories", + status_prefix="Fetched metadata for"): + QProgressDialog.__init__(self, + init_label, + QString(), 0, len(book_list), gui) + self.setWindowTitle(win_title) + self.setMinimumWidth(500) + self.gui = gui + self.book_list = book_list + self.foreach_function = foreach_function + self.finish_function = finish_function + self.status_prefix = status_prefix + self.i = 0 + + ## self.do_loop does QTimer.singleShot on self.do_loop also. + ## A weird way to do a loop, but that was the example I had. + QTimer.singleShot(0, self.do_loop) + self.exec_() + + def updateStatus(self): + self.setLabelText("%s %d of %d"%(self.status_prefix,self.i+1,len(self.book_list))) + self.setValue(self.i+1) + print(self.labelText()) + + def do_loop(self): + + if self.i == 0: + self.setValue(0) + + book = self.book_list[self.i] + try: + ## collision spec passed into getadapter by partial from ffdl_plugin + ## no retval only if it exists, but collision is SKIP + self.foreach_function(book) + + except NotGoingToDownload as d: + book['good']=False + book['comment']=unicode(d) + book['icon'] = d.icon + + except Exception as e: + book['good']=False + book['comment']=unicode(e) + print("Exception: %s:%s"%(book,unicode(e))) + traceback.print_exc() + + self.updateStatus() + self.i += 1 + + if self.i >= len(self.book_list) or self.wasCanceled(): + return self.do_when_finished() + else: + QTimer.singleShot(0, self.do_loop) + + def do_when_finished(self): + self.hide() + self.gui = None + # Queues a job to process these books in the background. + self.finish_function(self.book_list) + +class AboutDialog(QDialog): + + def __init__(self, parent, icon, text): + QDialog.__init__(self, parent) + self.resize(400, 250) + self.l = QGridLayout() + self.setLayout(self.l) + self.logo = QLabel() + self.logo.setMaximumWidth(110) + self.logo.setPixmap(QPixmap(icon.pixmap(100,100))) + self.label = QLabel(text) + self.label.setOpenExternalLinks(True) + self.label.setWordWrap(True) + self.setWindowTitle(_('About FanFictionDownLoader')) + self.setWindowIcon(icon) + self.l.addWidget(self.logo, 0, 0) + self.l.addWidget(self.label, 0, 1) + self.bb = QDialogButtonBox(self) + b = self.bb.addButton(_('OK'), self.bb.AcceptRole) + b.setDefault(True) + self.l.addWidget(self.bb, 2, 0, 1, -1) + self.bb.accepted.connect(self.accept) + +class IconWidgetItem(ReadOnlyTextIconWidgetItem): + def __init__(self, text, icon, sort_key): + ReadOnlyTextIconWidgetItem.__init__(self, text, icon) + self.sort_key = sort_key + + #Qt uses a simple < check for sorting items, override this to use the sortKey + def __lt__(self, other): + return self.sort_key < other.sort_key + +class AuthorTableWidgetItem(ReadOnlyTableWidgetItem): + def __init__(self, text, sort_key): + ReadOnlyTableWidgetItem.__init__(self, text) + self.sort_key = sort_key + + #Qt uses a simple < check for sorting items, override this to use the sortKey + def __lt__(self, other): + return self.sort_key < other.sort_key + +class UpdateExistingDialog(SizePersistedDialog): + def __init__(self, gui, header, prefs, icon, books, + save_size_name='fanfictiondownloader_plugin:update list dialog'): + SizePersistedDialog.__init__(self, gui, save_size_name) + self.gui = gui + + self.setWindowTitle(header) + self.setWindowIcon(icon) + + layout = QVBoxLayout(self) + self.setLayout(layout) + title_layout = ImageTitleLayout(self, 'images/icon.png', + header) + layout.addLayout(title_layout) + books_layout = QHBoxLayout() + layout.addLayout(books_layout) + + self.books_table = StoryListTableWidget(self) + books_layout.addWidget(self.books_table) + + button_layout = QVBoxLayout() + books_layout.addLayout(button_layout) + # self.move_up_button = QtGui.QToolButton(self) + # self.move_up_button.setToolTip('Move selected books up the list') + # self.move_up_button.setIcon(QIcon(I('arrow-up.png'))) + # self.move_up_button.clicked.connect(self.books_table.move_rows_up) + # button_layout.addWidget(self.move_up_button) + spacerItem = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) + button_layout.addItem(spacerItem) + self.remove_button = QtGui.QToolButton(self) + self.remove_button.setToolTip('Remove selected books from the list') + self.remove_button.setIcon(get_icon('list_remove.png')) + self.remove_button.clicked.connect(self.remove_from_list) + button_layout.addWidget(self.remove_button) + spacerItem1 = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) + button_layout.addItem(spacerItem1) + # self.move_down_button = QtGui.QToolButton(self) + # self.move_down_button.setToolTip('Move selected books down the list') + # self.move_down_button.setIcon(QIcon(I('arrow-down.png'))) + # self.move_down_button.clicked.connect(self.books_table.move_rows_down) + # button_layout.addWidget(self.move_down_button) + + options_layout = QHBoxLayout() + + label = QLabel('Output &Format:') + options_layout.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip('Choose output format to create. May set default from plugin configuration.') + self.fileform.activated.connect(self.set_collisions) + label.setBuddy(self.fileform) + options_layout.addWidget(self.fileform) + + label = QLabel('Update Mode:') + label.setToolTip("What sort of update to perform. May set default from plugin configuration.") + options_layout.addWidget(label) + self.collision = QComboBox(self) + # add collision options + self.set_collisions() + i = self.collision.findText(prefs['collision']) + if i > -1: + self.collision.setCurrentIndex(i) + # self.collision.setToolTip('Overwrite will replace the existing story. Add New will create a new story with the same title and author.') + label.setBuddy(self.collision) + options_layout.addWidget(self.collision) + + self.updatemeta = QCheckBox('Update Calibre &Metadata?',self) + self.updatemeta.setToolTip('Update metadata for story in Calibre from web site? May set default from plugin configuration.') + self.updatemeta.setChecked(prefs['updatemeta']) + options_layout.addWidget(self.updatemeta) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + options_layout.addWidget(button_box) + + layout.addLayout(options_layout) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.books_table.populate_table(books) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if o not in [ADDNEW,SKIP] and \ + (self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]): + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def remove_from_list(self): + self.books_table.remove_selected_rows() + + def get_books(self): + return self.books_table.get_books() + + def get_ffdl_options(self): + return { + 'fileform': unicode(self.fileform.currentText()), + 'collision': unicode(self.collision.currentText()), + 'updatemeta': self.updatemeta.isChecked(), + } + +def display_story_list(gui, header, prefs, icon, books, + label_text='', + save_size_name='fanfictiondownloader_plugin:display list dialog', + offer_skip=False): + all_good = True + for b in books: + if not b['good']: + all_good=False + break + + ## + if all_good and not dynamic.get(confirm_config_name(save_size_name), True): + return True + pass + ## fake accept? + d = DisplayStoryListDialog(gui, header, prefs, icon, books, + label_text, + save_size_name, + offer_skip and all_good) + d.exec_() + return d.result() == d.Accepted + +class DisplayStoryListDialog(SizePersistedDialog): + def __init__(self, gui, header, prefs, icon, books, + label_text='', + save_size_name='fanfictiondownloader_plugin:display list dialog', + offer_skip=False): + SizePersistedDialog.__init__(self, gui, save_size_name) + self.name = save_size_name + self.gui = gui + + self.setWindowTitle(header) + self.setWindowIcon(icon) + + layout = QVBoxLayout(self) + self.setLayout(layout) + title_layout = ImageTitleLayout(self, 'images/icon.png', + header) + layout.addLayout(title_layout) + + self.books_table = StoryListTableWidget(self) + layout.addWidget(self.books_table) + + options_layout = QHBoxLayout() + self.label = QLabel(label_text) + #self.label.setOpenExternalLinks(True) + #self.label.setWordWrap(True) + options_layout.addWidget(self.label) + + if offer_skip: + spacerItem1 = QtGui.QSpacerItem(2, 4, QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Minimum) + options_layout.addItem(spacerItem1) + self.again = QCheckBox('Show this again?',self) + self.again.setChecked(True) + self.again.stateChanged.connect(self.toggle) + self.again.setToolTip('Uncheck to skip review and update stories immediately when no problems.') + options_layout.addWidget(self.again) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + + options_layout.addWidget(button_box) + + layout.addLayout(options_layout) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.books_table.populate_table(books) + + def get_books(self): + return self.books_table.get_books() + + def toggle(self, *args): + dynamic[confirm_config_name(self.name)] = self.again.isChecked() + + + +class StoryListTableWidget(QTableWidget): + + def __init__(self, parent): + QTableWidget.__init__(self, parent) + self.setSelectionBehavior(QAbstractItemView.SelectRows) + + def populate_table(self, books): + self.clear() + self.setAlternatingRowColors(True) + self.setRowCount(len(books)) + header_labels = ['','Title', 'Author', 'URL', 'Comment'] + self.setColumnCount(len(header_labels)) + self.setHorizontalHeaderLabels(header_labels) + self.horizontalHeader().setStretchLastSection(True) + #self.verticalHeader().setDefaultSectionSize(24) + self.verticalHeader().hide() + + self.books={} + for row, book in enumerate(books): + self.populate_table_row(row, book) + self.books[row] = book + + # turning True breaks up/down. Do we need either sorting or up/down? + self.setSortingEnabled(True) + self.resizeColumnsToContents() + self.setMinimumColumnWidth(1, 100) + self.setMinimumColumnWidth(2, 100) + self.setMinimumColumnWidth(3, 100) + self.setMinimumSize(300, 0) + # if len(books) > 0: + # self.selectRow(0) + self.sortItems(1) + self.sortItems(0) + + def setMinimumColumnWidth(self, col, minimum): + if self.columnWidth(col) < minimum: + self.setColumnWidth(col, minimum) + + def populate_table_row(self, row, book): + if book['good']: + icon = get_icon('ok.png') + val = 0 + else: + icon = get_icon('minus.png') + val = 1 + if 'icon' in book: + icon = get_icon(book['icon']) + + status_cell = IconWidgetItem(None,icon,val) + status_cell.setData(Qt.UserRole, QVariant(val)) + self.setItem(row, 0, status_cell) + + title_cell = ReadOnlyTableWidgetItem(book['title']) + title_cell.setData(Qt.UserRole, QVariant(row)) + self.setItem(row, 1, title_cell) + + self.setItem(row, 2, AuthorTableWidgetItem(book['author'], book['author_sort'])) + + url_cell = ReadOnlyTableWidgetItem(book['url']) + #url_cell.setData(Qt.UserRole, QVariant(book['url'])) + self.setItem(row, 3, url_cell) + + comment_cell = ReadOnlyTableWidgetItem(book['comment']) + #comment_cell.setData(Qt.UserRole, QVariant(book)) + self.setItem(row, 4, comment_cell) + + def get_books(self): + books = [] + #print("=========================\nbooks:%s"%self.books) + for row in range(self.rowCount()): + rnum = self.item(row, 1).data(Qt.UserRole).toPyObject() + book = self.books[rnum] + books.append(book) + return books + + def remove_selected_rows(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + message = '

      Are you sure you want to remove this book from the list?' + if len(rows) > 1: + message = '

      Are you sure you want to remove the selected %d books from the list?'%len(rows) + if not confirm(message,'fanfictiondownloader_delete_item', self): + return + first_sel_row = self.currentRow() + for selrow in reversed(rows): + self.removeRow(selrow.row()) + if first_sel_row < self.rowCount(): + self.select_and_scroll_to_row(first_sel_row) + elif self.rowCount() > 0: + self.select_and_scroll_to_row(first_sel_row - 1) + + def select_and_scroll_to_row(self, row): + self.selectRow(row) + self.scrollToItem(self.currentItem()) + + def move_rows_up(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + first_sel_row = rows[0].row() + if first_sel_row <= 0: + return + # Workaround for strange selection bug in Qt which "alters" the selection + # in certain circumstances which meant move down only worked properly "once" + selrows = [] + for row in rows: + selrows.append(row.row()) + selrows.sort() + for selrow in selrows: + self.swap_row_widgets(selrow - 1, selrow + 1) + scroll_to_row = first_sel_row - 1 + if scroll_to_row > 0: + scroll_to_row = scroll_to_row - 1 + self.scrollToItem(self.item(scroll_to_row, 0)) + + def move_rows_down(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + last_sel_row = rows[-1].row() + if last_sel_row == self.rowCount() - 1: + return + # Workaround for strange selection bug in Qt which "alters" the selection + # in certain circumstances which meant move down only worked properly "once" + selrows = [] + for row in rows: + selrows.append(row.row()) + selrows.sort() + for selrow in reversed(selrows): + self.swap_row_widgets(selrow + 2, selrow) + scroll_to_row = last_sel_row + 1 + if scroll_to_row < self.rowCount() - 1: + scroll_to_row = scroll_to_row + 1 + self.scrollToItem(self.item(scroll_to_row, 0)) + + def swap_row_widgets(self, src_row, dest_row): + self.blockSignals(True) + self.insertRow(dest_row) + for col in range(0, self.columnCount()): + self.setItem(dest_row, col, self.takeItem(src_row, col)) + self.removeRow(src_row) + self.blockSignals(False) diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py new file mode 100644 index 00000000..19003405 --- /dev/null +++ b/calibre-plugin/ffdl_plugin.py @@ -0,0 +1,982 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Jim Miller' +__docformat__ = 'restructuredtext en' + +import time, os, copy, threading +from ConfigParser import SafeConfigParser +from StringIO import StringIO +from functools import partial +from datetime import datetime + +from PyQt4.Qt import (QApplication, QMenu, QToolButton) + +from calibre.ptempfile import PersistentTemporaryFile, PersistentTemporaryDirectory, remove_dir +from calibre.ebooks.metadata import MetaInformation, authors_to_string +from calibre.ebooks.metadata.meta import get_metadata +from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dialog +from calibre.gui2.dialogs.message_box import ViewLog +from calibre.gui2.dialogs.confirm_delete import confirm +from calibre.utils.date import local_tz + +# The class that all interface action plugins must inherit from +from calibre.gui2.actions import InterfaceAction + +from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin_icon_resources, get_icon, + create_menu_action_unique, get_library_uuid) + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions +from calibre_plugins.fanfictiondownloader_plugin.epubmerge import doMerge +from calibre_plugins.fanfictiondownloader_plugin.dcsource import get_dcsource + +from calibre_plugins.fanfictiondownloader_plugin.config import (prefs, permitted_values) +from calibre_plugins.fanfictiondownloader_plugin.dialogs import ( + AddNewDialog, UpdateExistingDialog, display_story_list, DisplayStoryListDialog, + LoopProgressDialog, UserPassDialog, AboutDialog, + OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, + NotGoingToDownload ) + +# because calibre immediately transforms html into zip and don't want +# to have an 'if html'. db.has_format is cool with the case mismatch, +# but if I'm doing it anyway... +formmapping = { + 'epub':'EPUB', + 'mobi':'MOBI', + 'html':'ZIP', + 'txt':'TXT' + } + +PLUGIN_ICONS = ['images/icon.png'] + +class FanFictionDownLoaderPlugin(InterfaceAction): + + name = 'FanFictionDownLoader' + + # Declare the main action associated with this plugin + # The keyboard shortcut can be None if you dont want to use a keyboard + # shortcut. Remember that currently calibre has no central management for + # keyboard shortcuts, so try to use an unusual/unused shortcut. + # (text, icon_path, tooltip, keyboard shortcut) + # icon_path isn't in the zip--icon loaded below. + action_spec = (name, None, + 'Download FanFiction stories from various web sites', ()) + # None for keyboard shortcut doesn't allow shortcut. () does, there just isn't one yet + + action_type = 'global' + # make button menu drop down only + #popup_type = QToolButton.InstantPopup + + def genesis(self): + + # This method is called once per plugin, do initial setup here + + # Read the plugin icons and store for potential sharing with the config widget + icon_resources = self.load_resources(PLUGIN_ICONS) + set_plugin_icon_resources(self.name, icon_resources) + + base = self.interface_action_base_plugin + self.version = base.name+" v%d.%d.%d"%base.version + + # Set the icon for this interface action + # The get_icons function is a builtin function defined for all your + # plugin code. It loads icons from the plugin zip file. It returns + # QIcon objects, if you want the actual data, use the analogous + # get_resources builtin function. + + # Note that if you are loading more than one icon, for performance, you + # should pass a list of names to get_icons. In this case, get_icons + # will return a dictionary mapping names to QIcons. Names that + # are not found in the zip file will result in null QIcons. + icon = get_icon('images/icon.png') + + # The qaction is automatically created from the action_spec defined + # above + self.qaction.setIcon(icon) + + # Call function when plugin triggered. + self.qaction.triggered.connect(self.plugin_button) + + # Assign our menu to this action + self.menu = QMenu(self.gui) + self.old_actions_unique_map = {} + self.qaction.setMenu(self.menu) + self.menu.aboutToShow.connect(self.about_to_show_menu) + + self.menus_lock = threading.RLock() + + def initialization_complete(self): + # otherwise configured hot keys won't work until the menu's + # been displayed once. + self.rebuild_menus() + + def about_to_show_menu(self): + self.rebuild_menus() + + def library_changed(self, db): + # We need to reset our menus after switching libraries + self.rebuild_menus() + + def rebuild_menus(self): + with self.menus_lock: + # Show the config dialog + # The config dialog can also be shown from within + # Preferences->Plugins, which is why the do_user_config + # method is defined on the base plugin class + do_user_config = self.interface_action_base_plugin.do_user_config + self.menu.clear() + self.actions_unique_map = {} + self.add_action = self.create_menu_item_ex(self.menu, '&Add New from URL(s)', image='plus.png', + unique_name='Add New FanFiction Book(s) from URL(s)', + shortcut_name='Add New FanFiction Book(s) from URL(s)', + triggered=self.add_dialog ) + + self.update_action = self.create_menu_item_ex(self.menu, '&Update Existing FanFiction Book(s)', image='plusplus.png', + unique_name='Update Existing FanFiction Book(s)', + shortcut_name='Update Existing FanFiction Book(s)', + triggered=self.update_existing) + + if 'Reading List' in self.gui.iactions and (prefs['addtolists'] or prefs['addtoreadlists']) : + self.menu.addSeparator() + addmenutxt, rmmenutxt = None, None + if prefs['addtolists'] and prefs['addtoreadlists'] : + addmenutxt = 'Add to "To Read" and "Send to Device" Lists' + if prefs['addtolistsonread']: + rmmenutxt = 'Remove from "To Read" and add to "Send to Device" Lists' + else: + rmmenutxt = 'Remove from "To Read" Lists' + elif prefs['addtolists'] : + addmenutxt = 'Add Selected to "Send to Device" Lists' + elif prefs['addtoreadlists']: + addmenutxt = 'Add to "To Read" Lists' + rmmenutxt = 'Remove from "To Read" Lists' + + if addmenutxt: + self.add_send_action = self.create_menu_item_ex(self.menu, addmenutxt, image='plusplus.png', + unique_name=addmenutxt, + shortcut_name=addmenutxt, + triggered=partial(self.update_lists,add=True)) + + if rmmenutxt: + self.add_remove_action = self.create_menu_item_ex(self.menu, rmmenutxt, image='minusminus.png', + unique_name=rmmenutxt, + shortcut_name=rmmenutxt, + triggered=partial(self.update_lists,add=False)) + + # try: + # self.add_send_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 ) + # except: + # pass + # try: + # self.add_remove_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 ) + # except: + # pass + + self.menu.addSeparator() + self.get_list_action = self.create_menu_item_ex(self.menu, 'Get URLs from Selected Books', image='bookmarks.png', + unique_name='Get URLs from Selected Books', + shortcut_name='Get URLs from Selected Books', + triggered=self.get_list_urls) + + self.menu.addSeparator() + self.config_action = create_menu_action_unique(self, self.menu, '&Configure Plugin', shortcut=False, + image= 'config.png', + unique_name='Configure FanFictionDownLoader', + shortcut_name='Configure FanFictionDownLoader', + triggered=partial(do_user_config,parent=self.gui)) + + self.config_action = create_menu_action_unique(self, self.menu, '&About Plugin', shortcut=False, + image= 'images/icon.png', + unique_name='About FanFictionDownLoader', + shortcut_name='About FanFictionDownLoader', + triggered=self.about) + + # self.update_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 ) + # self.get_list_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 ) + + # Before we finalize, make sure we delete any actions for menus that are no longer displayed + for menu_id, unique_name in self.old_actions_unique_map.iteritems(): + if menu_id not in self.actions_unique_map: + self.gui.keyboard.unregister_shortcut(unique_name) + self.old_actions_unique_map = self.actions_unique_map + self.gui.keyboard.finalize() + + def about(self): + # Get the about text from a file inside the plugin zip file + # The get_resources function is a builtin function defined for all your + # plugin code. It loads files from the plugin zip file. It returns + # the bytes from the specified file. + # + # Note that if you are loading more than one file, for performance, you + # should pass a list of names to get_resources. In this case, + # get_resources will return a dictionary mapping names to bytes. Names that + # are not found in the zip file will not be in the returned dictionary. + + text = get_resources('about.txt') + AboutDialog(self.gui,self.qaction.icon(),self.version + text).exec_() + + def create_menu_item_ex(self, parent_menu, menu_text, image=None, tooltip=None, + shortcut=None, triggered=None, is_checked=None, shortcut_name=None, + unique_name=None): + ac = create_menu_action_unique(self, parent_menu, menu_text, image, tooltip, + shortcut, triggered, is_checked, shortcut_name, unique_name) + self.actions_unique_map[ac.calibre_shortcut_unique_name] = ac.calibre_shortcut_unique_name + return ac + + def plugin_button(self): + if len(self.gui.library_view.get_selected_ids()) > 0 and prefs['updatedefault']: + self.update_existing() + else: + self.add_dialog() + + def update_lists(self,add=True): + if len(self.gui.library_view.get_selected_ids()) > 0 and \ + (prefs['addtolists'] or prefs['addtoreadlists']) : + self._update_reading_lists(self.gui.library_view.get_selected_ids(),add) + #self.gui.library_view.model().refresh_ids(self.gui.library_view.get_selected_ids()) + + def get_list_urls(self): + if len(self.gui.library_view.get_selected_ids()) > 0: + book_list = map( partial(self._convert_id_to_book, good=False), self.gui.library_view.get_selected_ids() ) + + LoopProgressDialog(self.gui, + book_list, + partial(self._get_story_url_for_list, db=self.gui.current_db), + self._finish_get_list_urls, + init_label="Collecting URLs for stories...", + win_title="Get URLs for stories", + status_prefix="URL retrieved") + + def _get_story_url_for_list(self,book,db=None): + book['url'] = self._get_story_url(db,book['calibre_id']) + if book['url'] == None: + book['good']=False + else: + book['good']=True + + def _finish_get_list_urls(self, book_list): + url_list = [ x['url'] for x in book_list if x['good'] ] + if url_list: + d = ViewLog(_("List of URLs"),"\n".join(url_list),parent=self.gui) + d.setWindowIcon(get_icon('bookmarks.png')) + d.exec_() + else: + info_dialog(self.gui, _('List of URLs'), + _('No URLs found in selected books.'), + show=True, + show_copy_button=False) + + def add_dialog(self): + + #print("add_dialog()") + + url_list = self.get_urls_clip() + url_list_text = "\n".join(url_list) + + # self.gui is the main calibre GUI. It acts as the gateway to access + # all the elements of the calibre user interface, it should also be the + # parent of the dialog + # AddNewDialog just collects URLs, format and presents buttons. + d = AddNewDialog(self.gui, + prefs, + self.qaction.icon(), + url_list_text, + ) + d.exec_() + if d.result() != d.Accepted: + return + + url_list = get_url_list(d.get_urlstext()) + add_books = self._convert_urls_to_books(url_list) + #print("add_books:%s"%add_books) + #print("options:%s"%d.get_ffdl_options()) + + options = d.get_ffdl_options() + options['version'] = self.version + print(self.version) + + self.start_downloads( options, add_books ) + + def update_existing(self): + if len(self.gui.library_view.get_selected_ids()) == 0: + return + #print("update_existing()") + + db = self.gui.current_db + book_list = map( partial(self._convert_id_to_book, good=False), self.gui.library_view.get_selected_ids() ) + #book_ids = self.gui.library_view.get_selected_ids() + + LoopProgressDialog(self.gui, + book_list, + partial(self._populate_book_from_calibre_id, db=self.gui.current_db), + self._update_existing_2, + init_label="Collecting stories for update...", + win_title="Get stories for updates", + status_prefix="URL retrieved") + + #books = self._convert_calibre_ids_to_books(db, book_ids) + #print("update books:%s"%books) + + def _update_existing_2(self,book_list): + + d = UpdateExistingDialog(self.gui, + 'Update Existing List', + prefs, + self.qaction.icon(), + book_list, + ) + d.exec_() + if d.result() != d.Accepted: + return + + update_books = d.get_books() + + #print("update_books:%s"%update_books) + #print("options:%s"%d.get_ffdl_options()) + # only if there's some good ones. + if 0 < len(filter(lambda x : x['good'], update_books)): + options = d.get_ffdl_options() + options['version'] = self.version + print(self.version) + self.start_downloads( options, update_books ) + + def get_urls_clip(self): + url_list = [] + if prefs['urlsfromclip']: + for url in unicode(QApplication.instance().clipboard().text()).split(): + if( self._is_good_downloader_url(url) ): + url_list.append(url) + return url_list + + def apply_settings(self): + # No need to do anything with perfs here, but we could. + prefs + + def start_downloads(self, options, books): + + #print("start_downloads:%s"%books) + + # create and pass temp dir. + tdir = PersistentTemporaryDirectory(prefix='fanfictiondownloader_') + options['tdir']=tdir + + self.gui.status_bar.show_message(_('Started fetching metadata for %s stories.'%len(books)), 3000) + + if 0 < len(filter(lambda x : x['good'], books)): + LoopProgressDialog(self.gui, + books, + partial(self.get_metadata_for_book, options = options), + partial(self.start_download_list, options = options)) + # LoopProgressDialog calls get_metadata_for_book for each 'good' story, + # get_metadata_for_book updates book for each, + # LoopProgressDialog calls start_download_list at the end which goes + # into the BG, or shows list if no 'good' books. + + def get_metadata_for_book(self,book, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True}): + ''' + Update passed in book dict with metadata from website and + necessary data. To be called from LoopProgressDialog + 'loop'. Also pops dialogs for is adult, user/pass. + ''' + + # The current database shown in the GUI + # db is an instance of the class LibraryDatabase2 from database.py + # This class has many, many methods that allow you to do a lot of + # things. + db = self.gui.current_db + + fileform = options['fileform'] + collision = options['collision'] + updatemeta= options['updatemeta'] + + if not book['good']: + # book has already been flagged bad for whatever reason. + return + + url = book['url'] + print("url:%s"%url) + skip_date_update = False + + ## was self.ffdlconfig, but we need to be able to change it + ## when doing epub update. + ffdlconfig = SafeConfigParser() + ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) + ffdlconfig.readfp(StringIO(prefs['personal.ini'])) + adapter = adapters.getAdapter(ffdlconfig,url) + + options['personal.ini'] = prefs['personal.ini'] + + ## three tries, that's enough if both user/pass & is_adult needed, + ## or a couple tries of one or the other + for x in range(0,2): + try: + adapter.getStoryMetadataOnly() + except exceptions.FailedToLogin: + print("Login Failed, Need Username/Password.") + userpass = UserPassDialog(self.gui,url) + userpass.exec_() # exec_ will make it act modal + if userpass.status: + adapter.username = userpass.user.text() + adapter.password = userpass.passwd.text() + + except exceptions.AdultCheckRequired: + if question_dialog(self.gui, 'Are You Adult?', '

      '+ + "%s requires that you be an adult. Please confirm you are an adult in your locale:"%url, + show_copy_button=False): + adapter.is_adult=True + + # let other exceptions percolate up. + story = adapter.getStoryMetadataOnly() + writer = writers.getWriter(options['fileform'],adapter.config,adapter) + + book['all_metadata'] = story.getAllMetadata(removeallentities=True) + book['title'] = story.getMetadata("title", removeallentities=True) + book['author_sort'] = book['author'] = story.getMetadata("author", removeallentities=True) + book['publisher'] = story.getMetadata("site") + book['tags'] = writer.getTags() + book['comments'] = story.getMetadata("description") #, removeallentities=True) comments handles entities better. + book['series'] = story.getMetadata("series") + + # adapter.opener is the element with a threadlock. But del + # adapter.opener doesn't work--subproc fails when it tries + # to pull in the adapter object that hasn't been imported yet. + # book['adapter'] = adapter + + book['is_adult'] = adapter.is_adult + book['username'] = adapter.username + book['password'] = adapter.password + + book['icon'] = 'plus.png' + if story.getMetadataRaw('datePublished'): + # should only happen when an adapter is broken, but better to + # fail gracefully. + book['pubdate'] = story.getMetadataRaw('datePublished').replace(tzinfo=local_tz) + book['timestamp'] = None # filled below if not skipped. + + if collision in (CALIBREONLY): + book['icon'] = 'metadata.png' + + # Dialogs should prevent this case now. + if collision in (UPDATE,UPDATEALWAYS) and fileform != 'epub': + raise NotGoingToDownload("Cannot update non-epub format.") + + book_id = None + + if book['calibre_id'] != None: + # updating an existing book. Update mode applies. + print("update existing id:%s"%book['calibre_id']) + book_id = book['calibre_id'] + # No handling needed: OVERWRITEALWAYS,CALIBREONLY + + # only care about collisions when not ADDNEW + elif collision != ADDNEW: + # 'new' book from URL. collision handling applies. + print("from URL") + + # find dups + mi = MetaInformation(story.getMetadata("title", removeallentities=True), + (story.getMetadata("author", removeallentities=True),)) # author is a list. + identicalbooks = db.find_identical_books(mi) + ## removed for being overkill. + # for ib in identicalbooks: + # # only *really* identical if URL matches, too. + # # XXX make an option? + # if self._get_story_url(db,ib) == url: + # identicalbooks.append(ib) + #print("identicalbooks:%s"%identicalbooks) + + if collision == SKIP and identicalbooks: + raise NotGoingToDownload("Skipping duplicate story.","list_remove.png") + + if len(identicalbooks) > 1: + raise NotGoingToDownload("More than one identical book--can't tell which to update/overwrite.","minusminus.png") + + if collision == CALIBREONLY and not identicalbooks: + raise NotGoingToDownload("Not updating Calibre Metadata, no existing book to update.","search_delete_saved.png") + + if len(identicalbooks)>0: + book_id = identicalbooks.pop() + book['calibre_id'] = book_id + book['icon'] = 'edit-redo.png' + + if book_id != None and collision != ADDNEW: + if options['collision'] in (CALIBREONLY): + book['comment'] = 'Metadata collected.' + # don't need temp file created below. + return + + ## newer/chaptercount checks are the same for both: + # Update epub, but only if more chapters. + if collision in (UPDATE,UPDATEALWAYS): # collision == UPDATE + # 'book' can exist without epub. If there's no existing epub, + # let it go and it will download it. + if db.has_format(book_id,fileform,index_is_id=True): + toupdateio = StringIO() + (epuburl,chaptercount) = doMerge(toupdateio, + [StringIO(db.format(book_id,'EPUB', + index_is_id=True))], + titlenavpoints=False, + striptitletoc=True, + forceunique=False) + urlchaptercount = int(story.getMetadata('numChapters')) + if chaptercount == urlchaptercount: + if collision == UPDATE: + raise NotGoingToDownload("Already contains %d chapters."%chaptercount,'edit-undo.png') + else: + # UPDATEALWAYS + skip_date_update = True + elif chaptercount > urlchaptercount: + raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png') + + if collision == OVERWRITE and \ + db.has_format(book_id,formmapping[fileform],index_is_id=True): + # check make sure incoming is newer. + lastupdated=story.getMetadataRaw('dateUpdated').date() + fileupdated=datetime.fromtimestamp(os.stat(db.format_abspath(book_id, formmapping[fileform], index_is_id=True))[8]).date() + if fileupdated > lastupdated: + raise NotGoingToDownload("Not Overwriting, web site is not newer.",'edit-undo.png') + + # For update, provide a tmp file copy of the existing epub so + # it can't change underneath us. + if collision in (UPDATE,UPDATEALWAYS) and \ + db.has_format(book['calibre_id'],'EPUB',index_is_id=True): + tmp = PersistentTemporaryFile(prefix='old-%s-'%book['calibre_id'], + suffix='.epub', + dir=options['tdir']) + db.copy_format_to(book_id,fileform,tmp,index_is_id=True) + print("existing epub tmp:"+tmp.name) + book['epub_for_update'] = tmp.name + + if collision != CALIBREONLY and not skip_date_update: + # I'm half convinced this should be dateUpdated instead, but + # this behavior matches how epubs come out when imported + # dateCreated == packaged--epub/etc created. + book['timestamp'] = story.getMetadataRaw('dateCreated').replace(tzinfo=local_tz) + + if book['good']: # there shouldn't be any !'good' books at this point. + # if still 'good', make a temp file to write the output to. + tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'], + suffix='.'+options['fileform'], + dir=options['tdir']) + print("title:"+book['title']) + print("outfile:"+tmp.name) + book['outfile'] = tmp.name + + return + + def start_download_list(self,book_list, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True}): + ''' + Called by LoopProgressDialog to start story downloads BG processing. + adapter_list is a list of tuples of (url,adapter) + ''' + #print("start_download_list:book_list:%s"%book_list) + + ## No need to BG process when CALIBREONLY! Fake it. + if options['collision'] in (CALIBREONLY): + class NotJob(object): + def __init__(self,result): + self.failed=False + self.result=result + notjob = NotJob(book_list) + self.download_list_completed(notjob,options=options) + return + + for book in book_list: + if book['good']: + break + else: + ## No good stories to try to download, go straight to + ## list. + d = DisplayStoryListDialog(self.gui, + 'Nothing to Download', + prefs, + self.qaction.icon(), + book_list, + label_text='None of the URLs/stories given can be/need to be downloaded.' + ) + d.exec_() + return + + func = 'arbitrary_n' + cpus = self.gui.job_manager.server.pool_size + args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', 'do_download_worker', + (book_list, options, cpus)] + desc = 'Download FanFiction Book' + job = self.gui.job_manager.run_job( + self.Dispatcher(partial(self.download_list_completed,options=options)), + func, args=args, + description=desc) + + self.gui.status_bar.show_message('Starting %d FanFictionDownLoads'%len(book_list),3000) + + def _update_book(self,book,db=None, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True}): + print("add/update %s %s"%(book['title'],book['url'])) + mi = self._make_mi_from_book(book) + + if options['collision'] != CALIBREONLY: + self._add_or_update_book(book,options,prefs,mi) + + if options['collision'] == CALIBREONLY or \ + (options['updatemeta'] and book['good']): + self._update_metadata(db, book['calibre_id'], book, mi) + + def _update_books_completed(self, book_list, options={}): + + add_list = filter(lambda x : x['good'] and x['added'], book_list) + update_list = filter(lambda x : x['good'] and not x['added'], book_list) + update_ids = [ x['calibre_id'] for x in update_list ] + + if len(add_list): + ## even shows up added to searchs. Nice. + self.gui.library_view.model().books_added(len(add_list)) + + if update_ids: + self.gui.library_view.model().refresh_ids(update_ids) + + current = self.gui.library_view.currentIndex() + self.gui.library_view.model().current_changed(current, self.previous) + self.gui.tags_view.recount() + + self.gui.status_bar.show_message(_('Finished Adding/Updating %d books.'%(len(update_list) + len(add_list))), 3000) + + if len(update_list) + len(add_list) != len(book_list): + d = DisplayStoryListDialog(self.gui, + 'Updates completed, final status', + prefs, + self.qaction.icon(), + book_list, + label_text='Stories have be added or updated in Calibre, some had additional problems.' + ) + d.exec_() + + print("all done, remove temp dir.") + remove_dir(options['tdir']) + + def download_list_completed(self, job, options={}): + if job.failed: + self.gui.job_exception(job, dialog_title='Failed to Download Stories') + return + + self.previous = self.gui.library_view.currentIndex() + db = self.gui.current_db + + if display_story_list(self.gui, + 'Downloads finished, confirm to update Calibre', + prefs, + self.qaction.icon(), + job.result, + label_text='Stories will not be added or updated in Calibre without confirmation.', + offer_skip=True): + + book_list = job.result + good_list = filter(lambda x : x['good'], book_list) + total_good = len(good_list) + + self.gui.status_bar.show_message(_('Adding/Updating %s books.'%total_good)) + + if total_good > 0: + LoopProgressDialog(self.gui, + good_list, + partial(self._update_book, options=options, db=self.gui.current_db), + partial(self._update_books_completed, options=options), + init_label="Updating calibre for stories...", + win_title="Update calibre for stories", + status_prefix="Updated") + + def _add_or_update_book(self,book,options,prefs,mi=None): + db = self.gui.current_db + + if mi == None: + mi = self._make_mi_from_book(book) + + book_id = book['calibre_id'] + if book_id == None: + book_id = db.create_book_entry(mi, + add_duplicates=True) + book['calibre_id'] = book_id + book['added'] = True + else: + book['added'] = False + + if not db.add_format_with_hooks(book_id, + options['fileform'], + book['outfile'], index_is_id=True): + book['comment'] = "Adding format to book failed for some reason..." + book['good']=False + book['icon']='dialog_error.png' + + if prefs['deleteotherforms']: + fmts = db.formats(book['calibre_id'], index_is_id=True).split(',') + for fmt in fmts: + if fmt != formmapping[options['fileform']]: + print("remove f:"+fmt) + db.remove_format(book['calibre_id'], fmt, index_is_id=True)#, notify=False + + if prefs['addtolists'] or prefs['addtoreadlists']: + self._update_reading_lists([book_id],add=True) + + return book_id + + def _update_metadata(self, db, book_id, book, mi): + if prefs['keeptags']: + old_tags = db.get_tags(book_id) + # remove old Completed/In-Progress only if there's a new one. + if 'Completed' in mi.tags or 'In-Progress' in mi.tags: + old_tags = filter( lambda x : x not in ('Completed', 'In-Progress'), old_tags) + # remove old Last Update tags if there are new ones. + if len(filter( lambda x : not x.startswith("Last Update"), mi.tags)) > 0: + old_tags = filter( lambda x : not x.startswith("Last Update"), old_tags) + # mi.tags needs to be list, but set kills dups. + mi.tags = list(set(list(old_tags)+mi.tags)) + + if 'langcode' in book['all_metadata']: + mi.languages=[book['all_metadata']['langcode']] + else: + # Set language english, but only if not already set. + oldmi = db.get_metadata(book_id,index_is_id=True) + if not oldmi.languages: + mi.languages=['eng'] + + db.set_metadata(book_id,mi) + + # do configured column updates here. + #print("all_metadata: %s"%book['all_metadata']) + custom_columns = self.gui.library_view.model().custom_columns + + #print("prefs['custom_cols'] %s"%prefs['custom_cols']) + for col, meta in prefs['custom_cols'].iteritems(): + #print("setting %s to %s"%(col,meta)) + if col not in custom_columns: + print("%s not an existing column, skipping."%col) + continue + coldef = custom_columns[col] + if not meta.startswith('status-') and meta not in book['all_metadata']: + print("No value for %s, skipping."%meta) + continue + if meta not in permitted_values[coldef['datatype']]: + print("%s not a valid column type for %s, skipping."%(col,meta)) + continue + label = coldef['label'] + if coldef['datatype'] in ('enumeration','text','comments','datetime','series'): + db.set_custom(book_id, book['all_metadata'][meta], label=label, commit=False) + elif coldef['datatype'] in ('int','float'): + num = unicode(book['all_metadata'][meta]).replace(",","") + db.set_custom(book_id, num, label=label, commit=False) + elif coldef['datatype'] == 'bool' and meta.startswith('status-'): + if meta == 'status-C': + val = book['all_metadata']['status'] == 'Completed' + if meta == 'status-I': + val = book['all_metadata']['status'] == 'In-Progress' + db.set_custom(book_id, val, label=label, commit=False) + + db.commit() + + def _get_clean_reading_lists(self,lists): + if lists == None or lists.strip() == "" : + return [] + else: + return filter( lambda x : x, map( lambda x : x.strip(), lists.split(',') ) ) + + def _update_reading_lists(self,book_ids,add=True): + try: + rl_plugin = self.gui.iactions['Reading List'] + except: + if prefs['addtolists'] or prefs['addtoreadlists']: + message="

      You configured FanFictionDownLoader to automatically update Reading Lists, but you don't have the Reading List plugin installed anymore?

      " + confirm(message,'fanfictiondownloader_no_reading_list_plugin', self.gui) + return + + # XXX check for existence of lists, warning if not. + if prefs['addtoreadlists']: + if add: + addremovefunc = rl_plugin.add_books_to_list + else: + addremovefunc = rl_plugin.remove_books_from_list + + lists = self._get_clean_reading_lists(prefs['read_lists']) + if len(lists) < 1 : + message="

      You configured FanFictionDownLoader to automatically update \"To Read\" Reading Lists, but you don't have any lists set?

      " + confirm(message,'fanfictiondownloader_no_read_lists', self.gui) + for l in lists: + if l in rl_plugin.get_list_names(): + #print("add good read l:(%s)"%l) + addremovefunc(l, + book_ids, + display_warnings=False) + else: + if l != '': + message="

      You configured FanFictionDownLoader to automatically update Reading List '%s', but you don't have a list of that name?

      "%l + confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui) + + if prefs['addtolists'] and (add or (prefs['addtolistsonread'] and prefs['addtoreadlists']) ): + lists = self._get_clean_reading_lists(prefs['send_lists']) + if len(lists) < 1 : + message="

      You configured FanFictionDownLoader to automatically update \"Send to Device\" Reading Lists, but you don't have any lists set?

      " + confirm(message,'fanfictiondownloader_no_send_lists', self.gui) + for l in lists: + if l in rl_plugin.get_list_names(): + #print("good send l:(%s)"%l) + rl_plugin.add_books_to_list(l, + book_ids, + display_warnings=False) + else: + if l != '': + message="

      You configured FanFictionDownLoader to automatically update Reading List '%s', but you don't have a list of that name?

      "%l + confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui) + + def _find_existing_book_id(self,db,book,matchurl=True): + mi = MetaInformation(book["title"],(book["author"],)) # author is a list. + identicalbooks = db.find_identical_books(mi) + if matchurl: # only *really* identical if URL matches, too. + for ib in identicalbooks: + if self._get_story_url(db,ib) == book['url']: + return ib + if identicalbooks: + return identicalbooks.pop() + return None + + def _make_mi_from_book(self,book): + mi = MetaInformation(book['title'],(book['author'],)) # author is a list. + mi.set_identifiers({'url':book['url']}) + mi.publisher = book['publisher'] + mi.tags = book['tags'] + #mi.languages = ['en'] # handled in _update_metadata so it can check for existing lang. + mi.pubdate = book['pubdate'] + mi.timestamp = book['timestamp'] + mi.comments = book['comments'] + mi.series = book['series'] + return mi + + + def _convert_urls_to_books(self, urls): + books = [] + uniqueurls = set() + for url in urls: + book = self._convert_url_to_book(url) + if book['url'] in uniqueurls: + book['good'] = False + book['comment'] = "Same story already included." + uniqueurls.add(book['url']) + books.append(book) + return books + + def _convert_url_to_book(self, url): + book = {} + book['good'] = True + book['calibre_id'] = None + book['title'] = 'Unknown' + book['author'] = 'Unknown' + book['author_sort'] = 'Unknown' + + book['comment'] = '' + book['url'] = '' + book['added'] = False + + self._set_book_url_and_comment(book,url) + return book + + def _convert_id_to_book(self, idval, good=True): + book = {} + book['good'] = good + book['calibre_id'] = idval + book['title'] = 'Unknown' + book['author'] = 'Unknown' + book['author_sort'] = 'Unknown' + + book['comment'] = '' + book['url'] = '' + book['added'] = False + + return book + + + # def _convert_calibre_ids_to_books(self, db, ids): + # books = [] + # for book_id in ids: + # books.append(self._convert_calibre_id_to_book(db,book_id)) + # return books + + def _populate_book_from_calibre_id(self, book, db=None): + mi = db.get_metadata(book['calibre_id'], index_is_id=True) + #book = {} + book['good'] = True + book['calibre_id'] = mi.id + book['title'] = mi.title + book['author'] = authors_to_string(mi.authors) + book['author_sort'] = mi.author_sort + book['comment'] = '' + book['url'] = "" + book['added'] = False + + url = self._get_story_url(db,book['calibre_id']) + self._set_book_url_and_comment(book,url) + #return book + + def _set_book_url_and_comment(self,book,url): + if not url: + book['comment'] = "No story URL found." + book['good'] = False + book['icon'] = 'search_delete_saved.png' + else: + # get normalized url or None. + book['url'] = self._is_good_downloader_url(url) + if book['url'] == None: + book['url'] = url + book['comment'] = "URL is not a valid story URL." + book['good'] = False + book['icon']='dialog_error.png' + + def _get_story_url(self, db, book_id): + identifiers = db.get_identifiers(book_id,index_is_id=True) + if 'url' in identifiers: + # identifiers have :->| in url. + #print("url from book:"+identifiers['url'].replace('|',':')) + return identifiers['url'].replace('|',':') + else: + ## only epub has URL in it--at least where I can easily find it. + if db.has_format(book_id,'EPUB',index_is_id=True): + existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True) + mi = get_metadata(existingepub,'EPUB') + identifiers = mi.get_identifiers() + if 'url' in identifiers: + #print("url from epub:"+identifiers['url'].replace('|',':')) + return identifiers['url'].replace('|',':') + # look for dc:source + return get_dcsource(existingepub) + return None + + def _is_good_downloader_url(self,url): + # this is the accepted way to 'check for existance'? really? + try: + self.dummyconfig + except AttributeError: + self.dummyconfig = SafeConfigParser() + # pulling up an adapter is pretty low over-head. If + # it fails, it's a bad url. + try: + adapter = adapters.getAdapter(self.dummyconfig,url) + url = adapter.url + del adapter + return url + except: + return None; + +def get_url_list(urls): + def f(x): + if x.strip(): return True + else: return False + # set removes dups. + return set(filter(f,urls.strip().splitlines())) + diff --git a/calibre-plugin/images/icon.png b/calibre-plugin/images/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..e9715307dd4fe35c686b222262b796828856ff22 GIT binary patch literal 24649 zcmZ6z1ymeC*DX3Y!QCymy9N#J?(Xgq2oM+`xCM6)5Zr>>;O_2j!QJ5w|F^#P{(Bca zt7m$qx~h-us&jS`uB<49j6i?@0059>q{USM00{Yy7d$NZO0HwD5%>h*rYa>0s2nFc z0)K!tm6H+&ynp=VwihRWuY7Wr)^P(jU-)=Ih|*?wfiJ?j%P2^|twO+{<3fzyO|t<2 zWB?iQuj*dQC+*)8DLp+7p4&SY+3W7=Tg%GsC79}U|KW%dN&`VBto^V-uL5`iAfg=n z71Qyw`%j||m zwf#Du-@>RU8cRoYf1mw(KE=#Rk`p(V6!*JY-)QgjfrvXqn6I2L>~LDZ-4|k%5_P8v z+eHhjG}o?`BZUQ!KMlG$ojm6fVaEZ~D0QKem-^@@u|Ba%fA_B&P}$`HOM-vf3AbiK?!pBm z+rf@GdfVV1*_Eb!3j1-%JgCHMF>9<|-d2{~P2?{5C-;7Kf zuuT7&o<2?&hPsP*>W804hreZ~f}gBCa7^3;&yoL9&8&FOKJqI5FS&}=@^dUMszFIL zG{E#ti{U2~U@>N=O>4a2qvyD8;F%h$_I;$ox>V=D)hk-K9J6lrH$Mx%poqX7z^dKW zHTyMTw$IL>@NgP#cre{RUCrIjcijLg0NT{^UU_71H6VI0xoF+lvbXLl9E|7$Z=TK~#UX64<) zJLjLHrsAuF=-;I9x0m$S8o+x5_sgv7yoH4V>5rbt`#pMn(%L;VeJIDDs-2LVDaoxu zMmqyaK|%=i0xN-_cfjVDD|Un&Ge)>Lr2N#=G;A#F9&H;hf$JZmn82pp{4!~tyONA? zEebv~mcVa!+6#j8pTQ0IEW zdT5UZgk|8CygO&_;2N4E$EXZe0@lb8QsCo2*gR}zR-jG&od%qQJ(_t7Ja?D%sYnO(s zV8!Fmh7^}VXi(`{?-5k>PVbK5lyBhF=eqLDJFI^heRqiGhXqt9KJzNr9D~Qj?m2_Bh z&6f;oXyhi`vLHC;pwXo4a&B{i5+MWu^Z5K$(a_k$_CRS}$>|je0sHl*CE@b{-+cdB zcM%-MJDysI z5n$B%0~RNY^|@A&Q`q0{!T}p&xm*1SWy*DUGo21FyuXdh@ZwP*8pwGgj4``3Z-HYX zG8K_%%D5?*jm|k7cxvs>Rg(gK%NW}FPA2;X7g+9JAj}iB)6qFZF?-Yq$v8y@krD)o zo7WWc06tw36LUb!Sh#zwIg_?3Q91M;MAYJg$c4AGe$vPd-DmwXF>)RLC*F zDbT>lNkJHz+TdVyX?)@hk9Qec)X+kn)Mq3UJ3ZSSz&&2wPxp{Tu6#7*%ZuLRLY^oL zAj}eBW(UF!;8;SAg8*?RKarqE!(8t@NHUAK*PHdNslUjH`gsYmn6NSpDp8x z7uRLN&XAACp)VgclB4m5B!~NTB|DD@FL6(l!6lw|dcZHt$Z0%iq=!$mP(ARxA!Sac zk?hj*Q|WdFR9dhKXuN=u_kM9w!K>vd|2PIcaQKoqeHplRg1h(%@bvIDUC~KlrfWi= z)dm*BXJhN71Vg7ALXP8AtJkq!Ly+0R$=U|$uk}fO2hExFS3?jgf(FAqmIShGFeY6I zoh^@I#bpMCKb-(~E@fj3&dH{0ENo~4s41jHjtC{KfuLSZt6xn!UpyvOfZlwn%q082 zguXPn-1rN4a|FBABZ2{7w$fkV{+;zu*xk)k&3KuPQ^QuGUxZQjXTAhHbEeqis0?GK zFycV1u^u!bP!&3oZ_-Iam3HwZ#R>anYZGSwUf|(NaF zYrcGO6;@Zl!u}wCV2S~*pF!mI2C|SOF(hP)hdm@FcaLucPK(Zpl}`+E@tW?~L%*I>;Ns{m=+N_3vfLpRvtEkNPQZLa$zO7){VE}{4z7$Ti5e)2R&lBj|osNb}foI}IH zr_CB?8KzbYL_!1Rxm`0L@AVsNp^PaIZ8l0v;Q z@8R|;X5CaRf!*Fgbg`rNmkyom7U*}{F8(jDmUSohg9buU=6U*8E}90gkU>KJBr8fJ zebL3)QdKBaaFR=l448PJ@9jLxGz~1Er+43?lTe42Z@d z5s*l!rZx6Q_oeU{$VDb3=IPoBCM@11Twx&MmM)(~>x=8h3$>0ABT*YS9cWQ|lu{pw!u{an3GZ zrpT~p7+o~MFe?G<=n=YR8{r4l&R6gq!an#kJ!n2guU|mqc+{+Tuvo>|DqphLvlWL& zS!McYcFRYSw9^QQx{vz>;DAlLU}YlQ77N#vI8Wq|>K%?kNecK4VEObM;R?wrcEm&r z+&HcO{5K|KnfYD261lJ9z>R0@AaCB$KkFG4REym$8x}dCiEhA|`zt-lm?mvtUDmCS zM1IW~ZQX7IdZ%g9By~@tpD(UnN05#+k1Ogt2zS4W12>OZKF|3KiJL??>M~g8CRk`c zQB(v07M7XA(f6T@*RW#S*@=aF;{_wJ&GclN1FZxv5q%r-jsU5zhAJb*f*a>WJI2qk z7A__~G#or(fhe*Lei}V9eC~D-I~(+5fyol%ureu1E|koq$HbWAsll(*13j!GJKHm4 zkfgzcs;?Imd$aD#_3hDF7+^!DYECON zxA+oqQI`=3xIk~71w%|y>z;4>o0Mz`+KHsg2hW_o;9{1>)RhGJPW$UO zv6X-|VW5LvV>eG!{63=+yEMjB(5_heD)$T|g{Ep;c&f|pq@43zE2_!<$_D5*))%w= zx{lQzEe^6Lw^1PW2dWV8;C=(isfXH2i~?Tgvnp&{AzIDXLFf#8bSjR#_%J2i-)7C; zBe1AFm0zGX>;4BC$_k!0SkHc3h%s-eE-r`IELf7aTa;?-sUXqzkXZtO(&(isNP-Ly zd08#<9=X4l%|6MWQZ~*gVC+&zu;P4;i(Yy}C`J4l|5w^!ouiR+ByoO|RlE;1mx(Y* zxxf@pW9$zBnR(FU?A7NPL1ovtm0nt29R0#-j|q=O&Sn@~RHAOoK8yyx1G`067X%?Z zYJ8kSMBh!soIZ6WmT&K)?;*c{BIp8r@Ytpd2|Lry<)$f_8V&=NX^xLRjduVLDW;4I z3fR)>%SVe-9|_-W%`VTB{(gGfjZ4%`POM|6Fw0F8g$K$D7!LvD*%htOKbwajgV-|_ zqRVv>p&=LaQhJ@k(9tcJ$fgC9Sa`&Wh$Y}7wFFEptz!x=gIgvhvlNR9{iPWe3=9W5k8#&Ksm!2eipJrpPOuNYdjTkcA)=J;Uu)q zjfSP7<4c;$3*;<2>jNQ0lCVp~FTTQ~!HN6UqWzLR)4MR~Qp!fG)f0se{)I(eh}IoQ z4O790SBQo$vUC?w$x9vhIT!>W_zCz4{3L`)kCd3E%#DDUsfT0|@JXY$5izbSlQa** z2G7E5gV?jX^}gQA??4C-J9fONo;@^5@_mbRYtfTbQr!?JcbPa{Zb@5=<7uB&jQE9@ zrXu;?o(=+39BwVk>;x9P$E|0p++Cj@+**vGy{iP>^yO$6gc&cLcmQJ<#^H^h09dD* z`o?%2Z17QEK{74~me84P%xfVsxtWQQ96G^nhP6bis*g!@7d_^NjZSzPm|*|v`GXfE zYpxuFM8BMCiiGp`>k=TT_XnzUU)2Hu1QZ2JC*c###wW9d^hV5q0LuFl8F5r!SWX_O ziHMnOf#7mRL(M{=dkEdJIwM)k?CxV8EWoEbcvcC!?PM*$i~o5hV?*O0L0-774trKN z4Mf^_#8E%hc>Z5z%7uTTurjZM@NC*@lmB)-&@;+;oix2tW>BKrwtc-6?gDi!4l%_#nfcV+<}!~<=_(a+=1UzD;QpoY1kGEUFYCx#Wo zfm{7JbpE-8mAG{~!Z;y<13tccbY$`boD~Iq?;h5Wx=Znd*68OwN@h#d-#lwt_H(Wy z@s8Hd+Sd%Kh#ag@A{@RBn|49Awd$M&ncB%|9{Bi)6q0WB}@=@{bm`?k%N zwoBgHrT1QNAkKPjbac~3zFl2!m0w6~&Xs|!MZ9?C@q)B~hVWzGF9^XZVQ6|J};%6GN)w$+p%eJXnnv)i42LHEQ@R5Aum+Y~|M%4f6;I{ftqb>)^R0v@9D z=S)o^=A~QTk9L3fd)fO*32j#IPGjY=4sBdUVfO)HVzyGaq$Az%vnW%hJ|}6|-!l&X zv-1w$+oLbNg_DJ%l=-}k{bf(ah}c{V*2O1$ksd|C!JfmQR)4I#gDT!diL$GbcXKb% zyZV%hC756hkEZLOgQyot=!r3qqu9@v~DpHxW=(}E+s%h0&0mwKzwi$66(H~rMbXfgHbrA4!f#n#C5Iid^9m|b*7gX&Wh zX-yoOE0r_9ZW49y`y~?!{VS%GTGI-$8#K!f8AzFmjh6sZ1evh!3@ML*nUJs~`BJ z3g>bq;UEeI51YjvVmTu6k$~R-FAmw7a1sYiN5p!dDze<%GNBJX9KFPRm;yqVjgu`f z4qgA`sEl;lI4JL^uG)%uzW!M{;r7~qxg@W^QtQFpdE=}Kfztv>&D}yLx!Fjr@Rv@i zCVSBVU0|`jSMxU<#j8wN#`33z_VB(W?P3%aO9vd)#w=joIz|K)!`-=Fd-2n_wL|%+ zt{)bS*r!2PQiy^CA>pFCtG|a~s<7#w;w-IIFNxeStv-pvvPPiAM4MUKLvb^X&c#op zvH98{;O;Y94MY%>4?mng>BxT1Xo-Zrd;aEs%ZbuyyB+jDu*t&CSjSRViak9zY)U;( zCGna5y6(@D1{R~nv0BU`!q~*K;*52zD6zWSNlZpXABgzm()n&wBGKyA3$1o;b`WKh zZ9k;QD$U0*$IjMu=NCO)j0xL-8bMr?Pw8JJz8=;-mDep3b*X`X1Js3pszIbO{UJ-< zIY@&{J1C+JqhLuLNCmJd^;Mhu;xSm0#zkXdh)|~?IwB>)EGoMY%UY&B>SX%u%8U{V zBd@sCbrW8C8B_*Qa4k?*YA};))AMjmtEA@KvfZv)q}uvawW=*UN5cHUUYG7FnYmA! zKvcjyno+0kots6*RNnx&Zx}a_CP0H2(FUvfzjE{@GqDZK$SM%=j;LjfBG|P-+@Jbg zP@;5ttLD;yu$T~&?Z}esGVJIeqeA>2F~)sQDfc_vFw|X>_s$x9MLFcTpu6|teiF_P zPv}PEOw_Mg?A8j=xsBnwz<4Bpetngxx}w}G-GneaUQ&~^dzhmi{C14ESY>do5l{qB zyD{yGpVaOP``2+(V(0i*l2;7|1_0S<_lM~Yr8baECf|IS$BOA$(2X5Z*(%O;jGU`^ zAn6#wFz+s?yM9uTFq#%#q+w+(Hdw@MO*(&f;g7`|2B0?-v3kTol*R}s8Z1K79p z<%uuHzU3WbkkvP0U1VKM^_oJOr*6$@8LWS%Zgq+(F-qBSpj(0Eb9^=ml7Q81wUKH= zvX;HkXOy3cL>@_JkQ-Vgp+KN;HvX#OgpsQwsK%1*hMCtfdX8=WRU4sNmuHx4B=ET9 zg1@?QA(tUuI~`J9MNV7D%M~MPL+1x8cSG4G|FUiQa zxeMLWHCJe8T;!%vG6jRCt4QsESu|%^NP%cN!RYy%d|ebHx#KVg+8l|(hr=dn(bbmH;*9CK$OioF; zQe+#37`!@F=O;MUZ^zIPgjo6mLu1gg`>|?f+ddBSXuy^-GNZRkV| zOw*P!tJ!7?-4yMZmniaK7^m${k9vekO*b=5bswV!143eviqe*?zKSu88sAUl$ent| zV56y@tn+m0ILgD{cx_ZN6zMA;Utp1Imw^yN`o$>>EyhxC!IHJp&Y}JyV)6&mijeBmx^BIDvfEybe;!bPY z-gFTD6dN(PE`yRNT}d>S+0YmKo+#cAGp`i1m8?#MNh_)S%JR#6>`Wx1QNT~SbUzz{ zCL7SSxyeVqTCcz}A?8_7{I!QpjuxS6Ob!~XMPKG7UxDD~UUY$22zE7qhNZp1!eqaU zKd)oZYJ-Rv{5mt-I5$YOuuw`rXILP@TE|gsY4vXd7u`~}{o?s4Uj4UzyuiO5jS#JO z#HVMUkc;_fD!_wEW+D z3Q4TKsqYEQ)F^p1GSf*-eExrTSc(x}*0Z;;k_kYq|5+mbf#H0o3*J8!Gdy~GHDlw{ z;+`tDL{>r0PUkHn=n9kR!Rgw^`=bJh$)5&mO*Nm|N2KRKbbr_Ya)xLkv*wn0mB9`Q z$4`$Rk8qGLx%t7Q=^LbuAuY?E$Zj`-hQ-Fn@ag0L0i_T-RbAJ(7t%g*QyXY8&_0=j z4L6W=s+D1w=s5BdH^ApT)pl;D_NL&bq^=__CQM)e+eRi^Fr< zatS!(H#K^jzUtCDK*>8N28u!Rr!k;%UlbLyooM~rNVn+PXv4O(7sx>Wo$&99IwNx2 zqb^3%ybqqSuN$jkaN3sxI8l{cm{x?l_Y9g>V1WLrg^%O1U8<_tKT1B{4_!f7YhU7< z8=9M7xNDA?s8a0^60{iXz>h6dk1$ihbGrVwa%_%zj0{=1x0-XiuU8@@PA4Z5x`3C? z8>xxAYaS_s7q-Ym#EwY#O~A_=!Eu^;(RtMn573t7qq6*$Z=9Shn|4WtT= zhlr?SRghk8mNx560&)p@7sXT4qfdLg z34CW(Mj{oFmN^~TomVFOr94h-)JvtMxr)(U3!kjuY!7mQPZp~-0A0dYLV1yN-NI+f z(KZcj9Bl(&q`KVT9cv%vzGwRG^0VSj9K`zgW-=0rEu8io?wYS&AZK!UM}j=urqb-x zm|&8@*_I^0SXX^OQROip4aniZo&4AH4}s$HTz?ks%w0nV+oc`n!Ro~Ww?k{|;_tke z)l2gePULA`o7b`5!>uW^Tk>{ZI9PM9SxlOl^cu?Hdos{G7hjEh&;+fN7EY@mH%AY7 zy=kaHxw=Z8F?vV`Bg03&U8p$P2&@uXl$@>qjuLh;_udr`RNR(d+33m%)0V~i$&P|C zf42`J)0~pX&;R3QWn-MVLv?vx?w}45LX3ed%Ha=0R(b;j)o~0tO9f`R68luZ$SI>SdL^~QXK7%klcHDf}v$T`?*y&;Vxp3YR+C2DM0W?NQAs+Sj z43_~(GIOA=P5s05M%v;PYdZ<&iRuR``5_U_e+ZPiilI!se z_a6$$H}CJG;TfwiWF`;jgQ+<>zjJfGU&N~IZM!zkXy3On<0YP!`Dw=`;9!UY*<9Se zys@BEic9l&hZf_rp`@UKor<)W7?-KZ9S!AJzci2gp0Y61@YQ`~{1OkN+ z%YotBQn>SDPG>|{=Q}AGao=;XSF5gFszSn|-@?e&&7Fy1v8Kn%PTUXXN*4LmXmkg> zCgw^$`f4mhwZF-eFQ9}Oe;pkYuFsAp4z9MRRLn&~Ea8Zx8$2~!MQ-;V|JE}`+b8C? z`pV_Z_xx7;EX{HkL7zdUP#MTXcwWX>U55|_p?HFsy@4?m)>(fakP~8(sB2hfCKgVs zdp0uDtO5$P!D~tA{qq{{o2&4J%K5viq#w)8r#1!biuyz4a}M!vLqla(6=VcV)e;pI zRI4tk2na4ChDs_bC)aiVpE%D}FkkK}-_n zSAF$qF8wIkOjoWbwgsnoR^ENX=bB{PXXeodrTf6aN<@Wk#0Vax4&1 zEWSpev;B@G(z+W9_qs3QJ_V-KYeH1OEEDQhjz76T`IgGd{SqlqbL+nLwk| zMUPdjjJ#PK@Q0Fxoe3CaF5P7fHIFWj_nDRjG&=VAizn=xXJ^nB4avYmcmXXlBbhAH zv?vBBZM_LUW&KXke%+s}j&bI1iETVpE{BMlTHeDR7Cm{!2DrHUF(RLi?gBM2b^epzU<3vRMoSWqPLI>XlUqt$8xXZR!7KcDpw+<()i@1r3e;M6Z; zNh58X9S!3|D_JB*?HY{yXtg^u z4btY522u*H^0bP;x($+Vq4QKj-e1Y8DKc2a`@5aNvW=hJN^hK!G>z15JIM5LU#UF$ z5`aoyz^mY$eSgkmf77vNh?fZcEpQyC2a^(FQR2shJNI!c#2voMAUl4~jKOMMw^0}z zWQ~eYi>xJ51Y{PQ2rHzkkzOum^Lp1F!@^<`Su{`s_fAZf(##{1N)3+DrpkJoMv1}7JN7D)8 zfP{D9<4LM((jzKnzNu>TJ?njU*{;x|cP#{0AIiweLf7GPA51$!^~Yieu)t;M`kLXz z+9(4Zo-FJFh(4Ub+8r$&m$WNd$kJu9NdmBe!Ao}4<|d34V@EJ@6Q{YP)k{HZYi5R3 zF|u;NM=8@m2V<1>VJMz2M~<+xW42c{;r6uNk%Ma|KM6Dd*u+5`S$kN@@re`25L z?txqpjJe5FS?Pbq$Rice29Cf|jfkQWSv!84Xib7trUddTeowz8$|NKiNIPw1>pX|O zS@(vcb`k(?Z?cw#sGx9_67|%^E77rw{4tH>lPeKL&Sdx=yQCaAMv7((hQYGj1S8>F zOIo{Q#2S{`lX zseX~{(C-ynP=DwpC7`g`FT%GNBvU!z=VfYJ6t7s2TP9ldMOju}D|UED{Fdfvb|XGS zWjaQ8tX0)?{}Z+`F}~I+#3TUiSAc{1DnWNm&13S9Ltu|wgC1L`liS$;KbQjiQ#s!MFa(ve?ZaRq2~JABfV0Xku69J z$|O-$sBpPp_)@uaF*FZ){fj8B)|b#~dmErd*nry^8nEmapGWc;hzu5Z6UM7qebTG;> z>Y!l5GI?84N8NRZ%-efbDH&|?ULcRT5X-B&bY{6Hsxbsw1}Xs&VF-a^GC5>DGC^FW zT-+nAFVg(sG&8IvCYs1rEW?Z<3F?^P1rJB|D0H;z%0a>)FX1e_&5!;GwV zYH>ZoTDj@3z>HgFEgd#vem1HZSi1<@`9fQ_8g5st;(RQ?~~gShU`v-#FM0*eaoR&TDS%G3`=CDk=Iv=Hajtvb@uh3_(EXQYQ}#Gja}af zfkrSF-+F5ja&1}B54}3L$Ccca+LS2&Bh9d zJnfpRJf}T9^+J**w`mz8l?r3zdt8AQN?qN0l0Fxe$gfR#0}j4V8RB5}92hHPH&@{| zQvkjIQ2&dYT0p;1PIg~$(LSqMxZwpNU2xH`hFzojG`|3Tj{N7;wTY^gU%Mr)dglnK z(WgPpc{DzdG6}ny`C`hISjjxM*capBLTxsjb=24Ca2GrPAxZJr;Us1-V~&|u5O6MC zKCVZ&b_>VMxEZUc-#uhIx%Q1hcd9QjT1xem_je)uqhkw#dL)oXy!)E=^S}L6&Y-c~ z7QQ009BS0QRXnTp3a7xrb^-OfW$yJa^O2r0_a;VlT%Q23fF@GV2HI4c{t_5$mV2&a zpfiP$K{BN86H?5ZGgemfM5z_I;jqr+K~#JCmwgLP?j!s#`=-q9B=L_6uKdc>_a$=1 zG5p+TE8I~$F?ackFP|+j?XLXDpR)D4v^pqJ-2`8s&;^k;O)RseS8x3hjZa{Lxzi(& zGe!l+_2Q6hb20V|oJvEfT|Ce8&~qPmzx@T2*g!2MwQM{;Av_#icJzmKB5s-i-ZaZ) zz?t1XxC0x&dwAMAA}|Lz>JbX?YT(dgs3To)dO|dd(M}k|_vn4xCW>oaA&j?Y8m%gQfP>czN44+oR}!o|THH!yJy+=x8xurzVDrw!Jf zZjLY!-!R6 zcE|!IF&4(N$*a(KJNMRPw>CDqEBG$v;Qi}qvQvV3aB}@80Zu43G7CaI)aA5F29lkx zu|fm&jwgXx7#3&R4%K)7|jh7Pr; z^>!0HfRLDY@osq}n9iDXsI(z8jcS$kZ2-R>Yn^i%4nUf$#$-z+!+_yY04HVeeAph; z&fnqgQn`oN;DtgT*Z-GWYc160p#L+k_y$FO=$`rqHM3%ti3f1zKVaRicZPA%vL*D= z=aKMW#B<&olkeH*2aAdn`)6-QSXc-i;7?|yONj%n@!njv!uj~wP1id_!k^<2f|O!W zjBn^a!A5^a5vud&7y97`>+j|VYZc^Gdao3PWIS6x0rBuR)AWOrs;jxLWBImfbiVSW zmVuzifV&rFxef_xRu%5~<*=1PEqM{3(DQIr`2A&XNzQ8`lCk&ErgdD80vXW<> zg&ub33r~O{oN?@gZ{I$PEj6q%3?LHT3Gb|@*l95$!zU5~D4eXu048i|>KVaa2mlWN zF9bj)@tbetPweRUnp%!nYj_0^_7E3@fU6QQe<^XR-PwpBcqAC~rck2RFd^+!Y&nugcEv0f^E*0SXX|>kAaKP@O>6=ZytYhBC^^qo1)D%4p z*71-&ZF^<>RM{B8xB*E)!4)z72*o0;5Ehbdh94y1zuY z5aebM;3B(5<)y#qQ$Qg|6)ed=YI4^s#p64k-Fcoi?Ij1XY}A{r*ug#_wmJl7yB0<{3BN3c8@-RcU&sEPH-d0xTauJO zDFBnMro3T(CqezzW5j81Y(I4%L?z;NKe$~my8k!THLXw>$Yp-~NWzfm8kq{`f^Cmv z8EE)wmGgQwB>c8$^aBz^1JDP{^{g3eRZ|))bH)O57J)aFc*Dj{@9NT?HisH1eBFB?SQoOScO^4<~ldMWv5wU@0oXp&;-+Pix zH)5Cy*5h{6JR!UYfH@~Adlw(pQ`R+B3AJ0KkP%V!{q?x9^JY@cu)%C)Ukf#{Z7nq4SjHlG7-31p?ZvHM?2vW#z*bZuAjYfw@#N~eHH#7)&&ry6!}Aeh2FvRr_u)n@c| z`f;qsv*x^`&}h);v(R23CJ4qv$;i^>e$NKL#rpW`e|%l=fuz4FMU=d1b}K7 z5TA7V$84k}5Rj%DXR{ppoX*qXaQNi<-*$CC6* zIpiqoP)Mp4?6CXms}FZH>RpPrrR9hZ;Dyy?-^ThL50-74|7w7(J^= zYSU6y7s?^~ml?}ft+g7bUAc{?OFSTQnA=Ar1DAXIZ(?syr&&J z5@?YT3J+RqJR|XYIZEIM+(4zu2szOf8-3iikXpoXt)>1TS*|@`BGdZ%3Bx8}gD(DlpUmX5nn@ zdyhEJ99aAx53?QkQBO7+k7LGmel(&cmXA?D^~D9e<*#h${nVlbMnlj5In(6Unl-dK zdqx#J*2^rcA|rD0i&Y%dmo!d5C_{}mLk0lD*?ukAZCDP z`@KVz-*a{a;ge*jPuANTMns<}q%<*2wA9GGGCmN6{*S&;-Gecur-g83Y2oAW%C}hm z{jLEiKoo@{OYMi9?bkyMmUEk*eM15%0$8A&s~a0V?-v&rL#C!w>VM>u^5{2Ged=eG z$0{$?XmsA9T}(YQUPvlzuDx|y=cy^#Z0;*1wU^2?+!eEY3xiHQ*3>lR!udrRO`+Jo zj|>0i+BK zkDLYDS0BeUu9s7AJ^*x?-!ys$4bVCQ$J}F##VII4af3Il4FGEgPo` zj_q$*JPsL@-7}+$NSoy!lLvi&SBO>UOFYW_(rz;#HPaUX*Sc$5Kr>A{KGsSG| z?16GA%%3Cjkupv2x@#E)JWm+=B|~u5(%3$C4-=95GuM!hFxk|r-Zv9YR#?Aga!1|W zo}?3<3j3Nqc>|Ql9Q8ie`H4f0di-Vszzn!FfHBSjXs?eD49TqD1RNTYkylcph#>!I z62^*@S0A#&>veTuv+6f;dkc(${5dyL1EZ4_j6!eF6VXbjenQ|b$I)DOn?%0qVg0AHr^}*NEr=Vag@ytel*Q&D-A*XRZYVE=TEp`r$86 zM1KuQ;;R3byu6Rt{x*Vows@nJ<6}>ZaH5J9yko#ApC%9Z%ckm3T?h;?5H+JD zO1d84;@~J(XdWA<>82gMK0ba&LPqA&Nmaq>s%0eNGdM#^i zt+4)m?5D=7*XtxcY(hTNt;%8cD0k=-;=n_iG396RABsWmeJp$ZqX3B(-b-bq1d?YO zC4)~dAHm92!OANxhU-r)y2)ona&cWA5akmoIsAL;npC@m^}JI!2{&*I5%$ke*P!X= z@Tz<0nOi9HBYjL327EsrFr9{uxW)1Mw}p=IwC=%7BxN5sWa~?TGRri}E`E@L#Z;(T zGTtH&p-Z2{DkvBYw)cmQ4RZ_n!i$?B9iml(kpcAv#70)Mo|;9}6(%k>MmS>2prz}H zIV~#h0P^iDN=|eY`?2)-Dxwq5_E-J^IRA^2G^ntd$lrge!O~g>rtKR(b;M?8XD7$G z4!E%L@Pu}Cea#g5(K5v4Ndz}A0oSxMiI=$zSQQm!ES|(Z*=NZX!BTI5HaMp7LY^7Y)`9B9RWk4W(R%E&A ztw#IAx)2KQ5)kHQf@e4?R)?N=w2^EPkY=KzX^UKR5jgNDC>*k%toQ zI3HlP4wB1e-g9kcZm0FP)4|CW@b!+H&B8h2mo&S3iNo!CtwjDb%W_+yG}Oi3viBqo zoVs1wqi>PJzLn1z~}B{h!yJ*@nVFHENIvwmG%(ep4Ai6N?|>4%%E%!W4Ct2 z6%jC)T9jB6fAB}_nH%}X@$-Nq^aYf(iV3DmX)z`(m63Vz-M!V_FTt3pf}VKSDT-Z2O<{vZ*8dpB9lka~F0xf2 zB@aC%I|S9+=nO74Fdo|L*L^RTBP$iUdALLXhGH5MNLP{8fQbl#ti=F($crG`dT*r; zXJQfVRt9=lk!Ei4$>9HCEL=}=_UEe8Tg+RT zT^w8t;j6#=u>cnS$SvyWCXR%)B_KWX^@(=tVHybxCucn_dZm}MSb{tS&+%xJzA2%g zl9Ny#SPHlAe%0`cO-?3hDG;36PLsHAmkv6!d-CQ)T+TrlNA3fwO#j;KL4Zj~eEt?OR_8lF# z`tf60N4UcB4f-qa?d`1`UstFd9*`~IU9uDnyNUHQ#nA>c{7nz!n3gMcaNOv5q`(cM zivBM*mAS!!0wIVS;I#hU5FNDB^BU|2Sou6dS@nHr$VfO{mT{ESrh?%s8kU-~clN?J z$L1a0YKK+}@2OdR7~BI<`&CwiDo%LtQM`z%ijcHqBR(_WD)e~OSJu|Cw+v6QhBaA} z82Ct}UIYM;)_nY5028P+tV5el{{jQ8_qG3Uki4#O@gngNJpTC`ZAtj;T{;xgauC;|eynyEN+i`V}hS>=xZ) zOmxOVG;W{5g$#aj zVPQf2-xwa$g2!b~u~L4?VE?SjPd|<)Ts}%Kj%CmAL)`Cb_Q}v}us|t>+5NKYH@9 z0@7T1?xw~XEN%<9os0xHz_%u*RbcU^MCwW2NsV?JnV~-^AqT0;qO?`-;YylwIdQ7(hB3DBJAMH1iwHbJN`NCYi|dy|Tsx@Qh)s z`|D$3e^|O8(R<4l3)wVlZPK9o3)pqt)*>+Ko|aX$d9!qc#EGjU*e_2A>3>F6jP3V4`5=84Je;7iv|1Fu{)D$ziN<^tE&2yz?u zb@&1m=>@?i+Ey0(YEfWM$mahBYiztVjsJj{(gID~2$OY$kSte73DK}a2LDTWNJuv#IY);GgHh7;j_>c?U!TA3J-hcg&pqck=U#s3ZJ|c4dq9#408T~< zzgu)e1xZvUW)cM*kYb(Cv2K?`6F;T9d&I^B2t14Vbp(Ch4H*Imbnj1_rZ?POOpGHF zDKOOVB~_OQdN-msFFYqEDKafXVhO?>xVVv_oT7D*y|$a~34Z+Bo1$XZC~{B5 zJ3|di#7EdhT-)u{Rtg5`IS@K0y-(4h4y9cLSURkvsQ-sFM%4nEOQ~Of;m9z%0@DTK z_XRpUOMTpbTB0G@V9nY=Oba58hvbVFLWP3zqY_N+4_jz{L+yK2&L0K8pZaK#X?=|K zmJhj?l)$dtbr zcR#P6qw$Pd#U`t-C-K=mi`F$&_pz??2`n9ZgkMc3s;PGi|H1TtfM2w!P3vbJ+68HJ zO1cn#-GwKQY2tjhPPbDGtpdI#pkI5{mt%IU(%W|%neurQKTad>Ecp}a52YyyOcgz< zDC0PtYHSBzGG@C^U~9x%c?5e|Z^^(jji3L7h$Y;v#YFz>xCO9fe^Iyu_8Cy2&9whn z*3tnA;iJ-p7#r6Vy+J?q%-$aYU%E9xJ;^%mm5%U(Zo52;nhw?% zgf`|6ybbO7`e0inDqIMu5wXieBS|FH?h0Lz%g=e7;??y}(C_k5?4`S)pE`*k<@mW+{qS z-1t8{Nx$2r3Muov{5C%5;yE;tPZM9X@GHMFQBG_k_oC&!!OX}xVillRYPfPablrwG zJTltgc_YKeogVH35B^>54vYH|wH)N;Ze8fW5n7CkZ2yZyoKh%5*vm{8Bok2O{^K1T z3J(P)MBTL+TE8E>Z(QblHbWv)rSpCx=1~-;@uxIXI;74W7JcF4(xMcwaE##x5PK)C zeFeSrzg6_edqpN1s1xxN76c`V4eQtzX$3VOJFlmRiD^N2knhcXbfhaQ)fY^p8ue;E zYQAHjrR^@t%tTCExZEV5e^lENRr+nqNWfJt0&*teezldnZA=%Ma0p zbND&#e!8=EQ9pQ(PZJc`ATgh`rsrOn44|)0=1~JvJ9dkCuA?VIF#^t0z+}}s`i@Vx z2?B!Vh4bPg4jWM91sm-#%cuJpbaZ#V^*?<{s!_(yDuQ(-Fs6aXJ!Rp7qFK+W{ zUKL%}vAP|xE@2a&LeW@gTC@u&Gh$!+QZzGg)GD^^09BF(A`d<>OqsmbY&`dv`!CN z&K&n7Zuo>hJJB~I3NX^ePTrsY?y_RNi98z->aFIx6^AkSZjc#X_}9QQ7wK8Nl0qcr${>x!b?5lZ0 z{i{s8r?PUsCE|GhGPe9d%-?IKKg(+mWw>KI`%F{IM3&6fgmc$HRX2H=hky*_RrgX<{)uoeep9t-Ks86NO1Oh3bzd; zUM|u{2fMT-E*NHKLJL<4Vn>dBIK_^+ndpMY7Td47y++8%_uxwIAS z`D5Q!Ti+Wrwzw%wMxgns26KEjcd?$vxwN-we~m;qhJjBYl-G-+j=C{#ed#;_ld-&} zEh#_|J|V6tH0IJu#e)n_D|Rv-r^UR%JodbXs(@(-2)B-YtKTFVgdnv_RQ68JlV3z~ z3Ku1WFkG%_-wm$%gh&dMNk>7EGU(l7IIR*JCCv;UF>r8H{74@-=250Ughsw3?=abfOI#t!@oy>4d>}8LX;?sE{gb zMCn*4Sx{wD@0Ea@ z`oDDI5E^a140EFh=sKt#+Lq=8*to5zM9iKY{4n_0y-G^-Exfe+dC{eFNTwQ>E>$E> zo%oEq&B5ajcK+L?z<}c7z1@Shbb3R0K)gRYk!{J1&XkZ4H2RRm{> zeCwMG1SbCx5%Xfenb)u{Sc@+;f67Idw>|4fy-T<{pQhgVs3>mdV)GJ}Ou2OL|CfdJ z5zlg!1LA#>3`3S12>vK)oKS69ls%cmW9TunoOVu*t*ckOufVe+J%1&vXk>XQan>Fw ze=eiyvcn*r^+7Ec?msnBGo5%FHTDUMBRw*j5kAlVRZn;_5VveDH$KMB=8w}7gqv&~ z(Z`nS@V3)i7EjPo5%Lj#4=Ur~Be21h;u^>AIR9`Me&&6tYN^KP9|Ln^j%F-$&$!y+ z>Jp5+ik;PEH_P+eufCjI~XM(=o_mlylNa?Sb)MV>WNJQuSP;a z?&-Qw(JH6^Qq{k|yM{O)Nsm6&I`+KIk=!2dA)?DM-Peo{8 zV6s@={xeSMiC>l(^xn*oDo09%(Bt-$U{9`WLMpW)+39#bU~w7?oSrR{?#sfl@#|68 z{;wCY>+~_(EDobVG04U2r9l<>5+@@%)zVZ`18&Ym>s#tnfi8&>hi0Fe3V2Fs7#MuI zJJ54XW$hsj%gz|Y^3o5TN%s*K1O&0dokYDaCDY~4GPV_H40L(Y;)rh^8(MmLQYjhm z>{iRsixhOH5^&TO@c4%v1%|$%U^;X^=8G*Ck%qw+uYM$EN_tH*LtSXi+W*~sP)1gy z7pgxgLt0sTil$cy>q>T$!m(~wrPt-tHuVnX=uqBi&aP^*=#6C62<3u(GvkE({r!2c z{AF)UYs=7t!|bq|UZtTF?0|y@xc=)$m4&@1LASLC!wfEo^@Pr_5sfQGO)kOmV{EAS z4D6X@mq%4Vyzn71rbny%78kN@7X|GYfF8rjkd32ngraC9;!$IQ7>(GZe&wdtQo8$- z(5S$cmpD*CBl}v@LO!al)S-7qg_2vlc)LHcp&MkddJI{`FgMsP%5_4VP97G9>zc+F zPGUUZrfLm?_t(_uf~lsBjD#yV2nq@ciTbKdf!c7x#((4D>gw7x)-Oj}_o=V{xn0ND zipD!-K|&_Ia#cCP5Ex^4Bm(o{0~2@h@aJ;+*nFN6?x=<43&(dfs>{gy^8U3%94&r< z)1KG^4m)I)$(j0K&7B8(tPSWLiG~qqi|=+N{Lkc?6JHuIvXiFRDMhs#_4_xtn)G=P z4q#$yafleL@ti?0fDmt8}t-&*U`+&WHOsdt2jIw5@_wBG&P7r<$xp zisrTn!s&I%otKiDH_r^9;JFq4jc+n?OCJTP$&6e9 zE@8&b8I^jG7UlunlasOyYJ@~YeZD7e+WRDe_arikQLxxnA-#}S4Azn{#)$WdY-`i- z%;sxSAE>$2-&xxAsEt2rM**R4(h++2ueL~Deh6s6zvTRU4E3JIv-%YC7gsAf%hFU@ zPu;vkbmV~FMMb%$njTtoc68j2E!Pv+ZikEIKkC-&>t~R8Y43=;N-WZT8WGq2z4x+{ zZQM9)rtOsadadt9K_K~Wg%S{VpS}f@!j{STG~$&(ntj)w(UGCK?+2_tZsx`MvmALE zsr0mV%LC=Ld!ASRt;^7*UA)K6=^T>(9gW?MNbq&mAIn`5w`L~bg zrX8IbFRfg;k=9KoaNudehXnlUdV|>`rLX160#6kD9mBTwc=g$T{Ps^6LKZiA>~1tB z4E3MB3B1Y<9zM~^!M={(2HD@$$tp%oZE}?3)BM4DtWHogd0g4hfS4Y{al_I`&H_X0 z&3+85Z>WvH)4z((nR-vO5}EaA|C>GKG|LP8dJFoX7_VW;>%8@Fe5#dMDC0wgiMlpZ zO~>-VbNGn+^wvdT3D?+?q|lJMI)Fi5F2RcSqk7Rb;d*QswRS2U1GkLyOYjqoFUI!v zfAHil&cZ6W3 z?v$tH?(uA!4~f)vi^|J$yH#dpB`9!V%K@>4{6Zf@S@Q7r8(fOG;@`%fE9nI!j0_Aa z@R09{O&v}U@x3J5{PIvdODJ(;<7}_}@okOFsz>Ugw2m%PkNYIyG& za}A2<`ZsA%j4-$L-x7_i1eg5-c(wE>e;J*aI{IT+Q-^YF!WuybXOz&eH_);fR#7{JbFf?%aW>tO8B0PcF5~h zKbs|bjd`C%v7)m3DXTPfskZ&&%ig4CG<(NHPe{1SCi;qY8Gh+jYwJucBgW+& znaC^Qi{jM-Ns`(jywj{u4=VCstVcdOy4TbMuDL%NtFq491thOWk?^1e62n72Pa00A)EZx*|C}sJF#|Ut^l$Ylzpp? zjTNjZb8*n^Nfs2bVB5Ul>97}fDnYQ<#Hs)Up-Aw)QHK?hwInv$vR3LFubyJkH$sEA z1HTe!X&Fo*7&*GM#26al&NXVf)-rYD+Hbz@y1T3}gtI&RWTPMYiZN^RPcxNpvzl=g z?dm&QIdc;h7Jn{I&q)(U8Lupp^Z~c)Al_*alrOmrE%%|(DfLBJw870ahvn*vPE072(pp^Bg{q z!cq``_yvy4m0Jfsxz8l_7vJ4%))LL^?k9VDaJeh?#49!UA~D#`%Zrdd&;DSpCGZFx z{2Q+TP-JIf3|9g(?KbvShf|rj;tfPWPEw!D2<=&((;z0eGMNf>%J;;L?Dv1SI+y9A zTFC{x0?oi# z!v5|Kt1e;HB0Y}OmkfPh^!cpNPf;HSn)b!nxkrW1B2R=hY6(=t8if*k!8mHl{dBLZ z<6FgOhSdJ$U5}Hd>wzji&zq-B^(D4MDvk{d@H{y?Xi!akTU#5-G5PD0oQtDk?#}H% zz%#dOVN-=~bZ>91Ng;n#Ugh7{noB10Qb|{pD;1#6N6pa3nx>YlRU$$bu2-g5rr&u8 z=HlX#488{2*w`?ZIGPw5y?4#B-|(@r1UdpiW#xQ5m0%>od<|PUBrrUZq*{!7?QD8R z!tbANkLl&nl=XbWp?BPw+I;5QE0<$ufb!!I>w4{sd6Gy}zPM_$D(|Ln(Q%nCr!jHI2^KoG%;Ay@(C(wO+Rzx(DZ2fI!pj_grWR4^(%xA$bTAy!< zmllLc#80x4pffMgD4Afaj@6tp{3PVQI!K6{{FxkbuwzMB5CoS1f^%|Rt12qivNazY zxk^(w-`lmiZ@+;Lh{Y}Zuj1fu%8jbLyg0^8hd+#1m%ipQuTo*76d1C$u?aui7)u?n zmDbrNv^y2ntWE76uocavS4;lx^yJap>A#t|wrdf7?TwmzkqXDy_!0aQ`%k`yHy64! zuivaa()9Ua=7VjPW0~=VSJZ4`i%Fwrzj5iQ;rK3X=x&G*_=JALd`xZwiT3O7+W!3jWJ zJ45grO4C-e@FOlbbWmkW%^`-H&B&&y74_TKLJ*>4s^b$oeoSIwVrFfDnYi{tBU}qo z0S{3AI!@mCF*t9+-XU1GOdr#vjE3+`V^MJ7BeAqIR-T#lPM;>&*9c9rS zL z+#CtuZv?=EG*ow5QSCOyps!p-s+r>l-Qaq#2G7r>ciV5|M(k5FGwBd!O`XxqLH&b+ zYNudOaN^Rv13~KBHcg;-35_^e*H_6>6B! z>bvUnWLN<`K!mK%zF)Tdjx6RPA|Dyo_3xOZJ6EA;@rBy06iCviO5*?fY4zmukjaKn z2jFS^(!RiwrX`Pi0^wz$4rho3KcJ8{AH9WV+tT-}tyFA|{4M?(F{uKq@i9#P7XD)y zb$RT+HX6)*j|s}A`|QX879?h$e;wvgx>p>9h>)q4Q1l9X6TxE&;lvK%2zq$PEev>ibIines>A(OnPwwh%*PYkxd@^w*)Ah_)#Ycdfmy1 znOsgsulCO_H$Q0B_kVu2aHNH&pXjg?wfw=GHVey?S75Zj6tqBLMuG2-l^a{+5B%@g zAiIG~F*(qoUNpA7DrJf)|8|$`BJ}+813{rGp~402*;e$sB92>K=v+$G3#58%Ki$Qt z`Va4~)^6pFnXP;ymbmU;<6QNs_eqmLHK>&B2Tv$3rxG&>UrWcUc}_B;S*IRypPX literal 0 HcmV?d00001 diff --git a/calibre-plugin/images/icon.xcf b/calibre-plugin/images/icon.xcf new file mode 100644 index 0000000000000000000000000000000000000000..76d7c0c9aeb81d6cf0b6df17f8bc7e6c60a59e00 GIT binary patch literal 63927 zcmeFa2Y4J+mOolm9i*08om*;Use^LPIY-M;&IXKe#9(j+GXvN#3^44@5FCJE2WED5 zC+*C#gAKMZ#tFuFzzAW2aR3KdmgFcWrS7WtJGZ(OY?RzkMrz<#Xz$y7!)Q z&%Gy9^`gZOEQ?t)Z(+>B#Y+}&9LJB9FQ6P}qy!%SnQ{47ZQ(e&@MpoL#Fd0A*o#v+ zE6PUz8Wy41)Wr|nvuHu;q7{qhBZ)&k&n2u}^~jP1F)NoYSv)^6AYtL6c}tfpT@jPO zKy7N? zIjiLHTt~|uN%Vw!cu_nBM&qUaUP%%qE(YnGWR?U@A$hjW#Qnf;-~9dWejoUI;dh=t z?xuRrK2E7|iDgxly2}2tg4F0BPQiIpnt+IaK7X#Re(t>sbqlX7m_K*UT~nGWOT)CG zkrAO0J&|D+ZtWwl|K?wWf6@N>+Q0txcfWh>Co85E`dLD3A%g8%u%3Hp`G(hCef8)6 z^t$GC{ukH&>D5%p6dyypZg5h zjK}1=16Yy4{%m?|UKz*x^St0us5yc2VxoQZ9(=K2T{WV6-fPaAAeA9$eO^?Z9Hko;gFoG1UO|#W%!o*OOu(K&4WDykpY#MZAoF~dZ_JtP9uwu}k)t@Vlk zxyVpIo>@MwzDeD5b?o>_Q}0^1{OOG(PzG1SODk`pKr(e@PPR z=P1Vqibe}RT;c^jfBl6Q*KgRk@s*$d;(2OGr?=X8CNo!iW#$|HZ-}q|g8xO&&tHMi z-0;E=Q0$7y4AC1IX67FE$=w@}yTASUGfOAs(t}i@pFi*4>!;!xWT}0fr1qVYn=12z zC||8q^{F|Zs4vY+iS)-}=eLR5-&MTZzxzK1cZPreoOXt1XucyP1??SMCgF@C22yd~ z-V(}P?d$FB>+8k;{r!FY#(qy<54H|YyxrDxLf)EsZrz&Ro=#iUzi->B-|F7B=Mp{r z=|?F^b$U&pHb`sGUJud+ zY6CSIe^rn^Oy6hF1q7$BaqjKdnlZg_yIgt2C~3uv){~%N3+|SD z4_mRlSg}eKTavey@UDC}6`)0ZQ;FdcWB@G5*0tI%K+6xw9Y_LAK#-o-_shb`;xG7- zyrCZJuHj^3F%62d9-lZP7DulXDqhJ8W(dH35`Y>uN~L%y3o45hVr-qPN6g@c@-vtb}@I{@~58)eX8e~HItWdL3)GUsK0K|>w>fa z{?YRvUTs~?Kg_S``@sWEi#Y#4YoHL&uknwVyLy#nm1pJhrTo&qIAC=_&Juz`Ev+GJk*2H^Ptz0$O7>aRPu7`)LM?s>?)$;23!%MjRR3W5G@Cuo3 zR|^{!+>|ds+V?6{ew?6C2qxhQSJrrQ5d}$nf!OM5-7WRi#N0e-n#iubC0hm2`}aw{ z_0hz=A}<)x>8e{gO_^r?3RgTz4y1lyFn6`;mdRo*nk+`k6)t(q!kcDF1rGKIS^38N zD_Lc;mW-0?RxVh!)U@>4vK1?bkqJbgs&3`t)hmrF`Bm3fu2?ZdL|?9A(Te4!<@^eM zW$&$->gA6=W_;}Gqia@;R#kb&g87E|SLQ8Pviz3kVuo@GjfGcoDrVk)OOLU>^Gb>h z#hAZQ2xsOzfm&<=SIg>dW-3P2%2Dco*wv#X<4k}9{cRaxG}7LNZH&~CZS|4!(a&R@@T0$A96jUCypGMH! zItfFl*i?)3^XkA_BbWzv-8Pp*4% z^&}`sO#1bDr5{y^PplX_W^7e_&PbVQ+MNd5iOwg9sXcs!!SOMM-$Sqk^|02}B(fB37f)n8ve zeDu`CUN`FX^ouzw|L|+`uLs`vwdjmNHX2bHjnRzi=HyN3(EZ9Wd8qt^2ZkkK5QaO z8k7TE=uN|A13`o4J;k|fhssQ+7LYuoca1F&03+NnWz46m0{Ju(slbU-`W5Q zDC2&2?X2PKKzrwP_h<#&s-&HgRpP~7i7^$Co;@6ORf%s)eWt$QJU1-ctxkLYb~TmG zO-DX8d^+&S=UsiHJ#|1ZHLqE2SU#}iu@5fYS^#?}Pm*GQ3%`kveDkGWKa#&#I;PpH zjD?Iw1si7~0*%Hfw_}9ASNea(P@{68d}eapmV15tf8S^&2EH4yjsnMS8NJlgu>A%I zJy=HyQ^lJXMk_??ThH|;+`suUTmLtAcHra-;U=5fb~Q9S+L=){W5a< zq-UV7*ALr3zgv<#Bq+sR6uWGF^$gq$ov#lG+a7ViEx9kBeur8K{#7Tk&;2IYE2{fE zecx_=vX-)pkL~#4voGI&!k8`#`GA(IpS2wPXy4X zK`U-7e{j+4=IZQ7KQeFg{#qTc>(l!43T!YsojWK%$#$4RdFLtg?L^_UIpH4+(L8UW zO-TiTC$58g2LHjX^kZP2CndTkQ7HN$aUhhOMgrFhUk}_kgnI74Udr8w;dp`L9$(Z% zMU6=Bf9=KlveN0%o7^uvhHUOy3dg;)>6M=@uHzQrwr|7wjq8892u08HuWekvantHq z9GAlX30kX~X(gf0Bxa%>ue%(=+q$;S5vP{S0o8=byTde~4 z4I5sh^n5hCal_A8nWhH_|IwqAKJVcTn>MX~_+G+)_!M>Z{>i|%{RDoWP3vdF6(>CV zGDfxm(__zn66nSa^Eqzm;%8poxbde?tmL@qPra~l4X-?v0{80U zsPfVaKgLA%zKFSd|X2Fzw*+f^KeqR{L(8MpP5<8g-_zRO`D!w zfh4%lUVCIdcfFY7-W7Pd1OaeL7THd+KuW^I30c z{&d@3LLP{ALB4TRYV1V#-KA~LHG z>BT3BGoB_^S@&OJ6+DZvieG@tDr)L4+EZ{)QOR-w3yDe)h+i}?{7|kNbzq?A_h*(B zOb~1puqmPcnj8MGpriB5g5mLpd4cPJ4FcwKf1-J}O2(lX<1|24j&6THIK`DpKiJVV z8j_biJj4Bos69wS4fF#~@#i&ef6y95n5slIfkW#;AsHtf3%DPp6Ha%8^YmQ8AzXn9 zFv#2%?e&%AA=iOxU?W0LyiK!54#$T-UhWoVz}H)-MLc(v+Q1;Njs()+dI=?q8m~Dk zj~+8Y1!z)OKiMpMhRq>*pdm#OJR{FFjfta5co?J&5IreZQ?nBR#JYKJeE& z2>;PV;GXuSgx`M%xafHtxO@5zk3z8c^@5D9&*AL3;MpF#>1%?RiX4ujghZI$hjZLx zm_t$e>s0jaC??G<{aXl@Bc(})S7Kh_H4~-&m$C_kqUrvoi4jARjz1humLXp$-vBO03gog`|8-o3Wa3gJGnXudbB+<$|Bm*5IBj`wG7nD18B@il z2pUeoD`51uC<7E4r9bYVu_$<@LS?3-mf8=yCsY{dkK}jq!_LiX3;}@}A;1rYHG#h> z&|wXGQ=nEG1ieV7SLjqhf{xb<2Dn90qMJ(i`3GoqdZWoq5mQ0If>p2xh@#=Gd@vuP zwvlkTfO0TtYIR0)uq`Y+A~Gr}S{p4yiIEX}1Ru^v^hHKRN5{m(s$#LD^5hs#;ycSG%h! zE6RoP-ioT~n!5V>2BATyr%Ii0nfnFk%%Rb)l&pfXs=CIeapNaUm?%tCOyZjrle?QI zO`I@c{P=NW$BmmXaZ)qi%unJciW5jaqcV;UT4QjeGd>|cB{R36sHCi{yu3oFP*n0& zs_JW171d35PASjN&Cf633xs?=Ps~NlE|}WTRXjB&!4+w=hJ=PiVE)umifAE58QT*T zSF&hEqK%@4#9*reQA!H#3e`bRE#dZLy9p+=UQOcC*`YKBK9K81E`kE|)(B^c3tP29 zFe*e+Wv_GE?8onmrYS}I%1|*!ij%zPycxXI!x>7RUFibj;15Pw1uMrM7G_Vv!&o^m z71yULd+`Txpgwm)5oiszn9U|qfNUl5)_lrdFuETHi2c?1afP z?pin}L&sXE`llZyP+K=lWouU7H~+2~Q&e(dkAXeXZS93#UiLU{yJ}|x|Bs?nC8JB<+k&>F4h6RvcQZ;7E-S;oe zgAtYM8*0E;_Di)M}~%}TVh zBL`K-3Wmg!cvOtT8OJ0_YDRWJc|-H; z`yTYp+_2hrPpr(2j|?U~iJpy~YYi4#M2sUoDJ3mE15ziipro>X!p!;0mKVr`tNry) zygkpodqPE4T!e)UoEN^sya(#dArY}IFi%c?VNo&or>b_$#2It$d-$QkTdL0aWGAn4$(FCXhguq^HP>^1y zhJ4`qjP<`ArK(S;nZsfelMV1%@_a96Y#6O5i7uZ=N`cT(2wH4=Oel?BfwirMXh+*g z`tNO<=g-R{utmouhG=NxGnvhZ#iRYMC&U&S9u=X(;-U#0eFl#Eq?a?n5$#G$PRpSE zDHr<_Ca`Wy^H|sSP9Ui7-G$^0qG_+PnkCJ&bjw4d*tbLIcM?8$JUj})E~w?KI#uLen5~B8>b^aIXx?f_SNE&@~ZlAQ)e$; zy=qj>Wvu6|tyDqaVBLgA+aa+MlT%VL{=8y{t2s-S=F1CyXwl{R_#S#b|DC+xwTR7z zxFBgUC~U^gSm0alkX1LYIXdQ?KxHOuKro9nI0Tdmi->k4W)#*;x_d#kPtJKIwIR>9 z+F?E5Pf}fI@rbh0>GcL9&N^0Gq&+FSeC(__nes+6DC7-;Vn&0i*Re^mLlkQB0eoP$ zR%b9{?W7eoOrP!DqpE%T!;pLnKvns$Mm8kgkN3Y4pw*j0W0Ui1r`(w?KXS0%kXQ}$ z9{B_Wy<8yy6&#h2Q`J1nSK|%u_Qm`LQG|eh|8{=I3o?=yKLw4l`dII zyGDK{%uEz>eub8>Ioy?5K4E&QT(-)$OPb17ExvP7Lq##d1pS7WEgL`8Tek8S-W}Cc z@yN3IvznV~5$ntIR}?CLttr%zUfMJzg|(`y`1y}yT4L{g_`wBtO>g#=RQd%PL+xqB zjgykJ)K^{k%TM_B`A1gYKmV?oQ-+na#ikV2k580KmcI0;e5TMQPnkTfv9_v0E-9c* zLr8Q|UiFxGvV!*o75&4*E;@bmD77{l1#_F%ayK}Q6vb(wR~e)b&Lmkm3v{hu$;gFB z1};?6abZ#r7Y_80(c#8wG(k za|*=3QFz)7KeAAx5UB~+AB=*5*LUlJ5JyP%SY)vroLX!`$c135)nYOlbV0}*%!9~^ z{*W4YKw?X%qg#LtS!&aNQey|mT%xXctIPUT-w-FU6hq%M_wR$NUiHQgiPRYqYeyB zt8S_-$xgC|S@eN0yHhTrScHBU*Pt~C+&RG*S24D(Br7pC)T|5eJ{vZ7<4Hr1KlyV6 z*nc@brhGzOab|pUh*1lxIW?o=#IjPp^khj8A2bG z1>+k_Gvgx%n^ST}t18ONE350rHqV$nXIcsjyC`QXnp9JeVh=U>dZ*wn73AjRN1^HYq2RO%#Damk#n}akoEhy)S3=QE!u7pO!#HHqy zkC`@inw1RLm-S5(DsvK|2A>Q|b%@!>8^1GIL!(@&`Bf9{n4LzO2B)lQt}951wHX3o zdnXET7tHz~SVi<^TU1;|al_QR#)MIm%Iu~wCFzbZ@2HXFHfRH2S2=;xerRk`F50}y zNlhx8HRH>n5txG*^{7dGAl8x}j^@?~S4PR084FUWNg=p&QdLeuq*cf0f~51HkP3$i zoWilQ71U0dn@g^bOMGDd@@iOam9sJSLDG0IEw z5zJv?%rSXRHkl4C3*qJ9_V`H4ARBr4P2=OY>GP6^GZYE6W6LsKSU-b|2^NN;=H=kZ z33tqlWd-xcH5P-8L%i-?6b3Accr1!3vuZ-9psJ?1HlKKHX!T&78H{Y1rR7(TpLHh* z7cj7)X*^cskSxH8WNY_oXhba5ZpD~s_cR-*$$+@=jV0-{$mLFwFo=ncinhBFnJ~C( z2F!dY7hHa`&^Vfs2wP%Gx-484R2Z3XiJdgIJc|i6pAfp7pOc-Pn_moJHRaBG#@RvZ zONN|gm>;mZ<)<>wePuaceyyUqu4&SY*>^WP$=P^GNUCcp&rXO6rV*12B2fyf;P^>X zX52M*MunZU8IIRiPOK?FbC6W>Q&FsW5=3)z^ThG>>7npHK``sGn;IZ+S!1%CS8`%` z8DDm?q_`k6A(F&5ifQ7S##H#^H(P+Pk(f`IjJg15zVe>omsm5dDj)U}>)_U-f^|^# z5=7t>c=HvB(;!_@IJHLO^h#F@PP4H%)jDv>b>YODh}VUaIkU8uU-y4;TRh(ZwL3s> zjf~GKuXT}B*A+auOg3b?6-tmv5l%`7Q|`^-&a}ev7-GsBff)}kieffVr)(3271tJ% zlv5Z^zUN*|!jd~|%(Cj_&JI~tQ?p~uK?Z$j);-e_e8yR_jN@H5))O*-M#rZnx?ljv zET7g~2D@*8P{8L4WcH<=oFxg%%rx;SP>^A69@Eq`mJG{^NdshOW~EM(PRtD1F=R=~ zBeQZvWi>SV`i6#jzMij(^tQzeg`ttLPG+tmVO#urg|o6H(HtBa5k&@B7j*Na zWU?pnsX_|m5_&Bw&^m+35)3OeGvj0bjaA1YhDo;B!t-m5b>4Im&UkFfu z>v(V-Ph3YD6_=21TZok7=kV zNRN-9Bc=D$39E>OH=nTt+am0VITiH{HKn;}anYeTm-^#{BnSwVQ>=K{x{&CAV^kM;Jbun;qj^?u&w@$Pv_GN0T*YJtlU8yyiEY&Hz31$Z@vkL6{Z zB7C?`0YydDDWHv?b{LM{op z&TxSL*B^>_5OooAh7gdUwtOFoh(Vh7uV#f8@%0pasCka#7WRxxulWtfH6t3~E~L-B zgYwqGX%rw?xFjsAG#EcJu*@=USs84)@}|suXyioGLoO;~1-sy&ayUwm{Tgn1d|kpOyIfuH^Tj795)6q# zk}{c$^SsbOdo+3JEg|H0cf~^shdVJngU?WAYO@sC${cdMwfjy2C>I`uBXuKWxRl(bhWcjuqb;xECH~|RFX5kp%IQQoPwC&olF>tX7?nVh4}G`apW3nle?zHvm#>c z_81&?5D$ghn}{LolUm z)7M28_ik zm$2%*a0KDKjDLf}qFDV*czWrajYHA6=IOH+ELr7ydijP&0*@dG znQ&N5VN7wrTB4z|M^ndr+4^_^-d&;oN zv{WE{oNzGE`r?2|?&6Qbz70E6va=XJIVAJyB{S=6!76N){1JcT_^7iyJcI?BN^>v*Jk{v|m zGUgMGjJ2EnSg;wqtZYxDZ7(!DR7)c5=BEr**1$Cy5*}lZ2&HH-ii1f_#~LF;e%;u5 zJ9DQGn{nThdEpqXY?9lUwYcQ;Y*{ijj-5Dp2At_jA6oZ=Vit)poYo*0?azqbp=o1J zmR;c#0#sR3KW6;o86*;Z_(NaaLG~t6`|6S}nl@U91gvR@B*-+jG-ljA>e3({gFc4o zK0dUh!6aZaCQu4WDiC}zW6t8`-k=|>Nv}B26LZt4gW`SUbAZtjj6emp#baIO6qHtv zX`VH2$^AJXD|u6WVoXnqN#Ivg4GOrSsZLDI%qy)KJ7xBwd$Vt@N{g9X;Oqc^ z$0E=NwgWTvpD;Yy5tp2iS5`M+=Dhi`2YpENLWbPIHbh=HK@at#GZYR<*pE>cB0M>> zuoB@-bJArmaoI~=nQh2hG#eF(Cr?sExh9>jOk~)A2{EU%ZX%5Oa=p?Qe<*JihJyR( zi%s%@u8mcsxD?i3a`Y!=7FLg&F-vw|>q=hm?l6Xe2NpmFrL$oho%ZN~bl#(Qs<7y| zG}tSr&VW0F_SKS~dABvTG0htp-eff5BL^&>IjYH?!$bE| zhx1{4C~rF-5^No`4-L7p^_<{P*mz#*Von7*1)hed&mbz`}~s!rY9M zcze{yvXJT~d}PsZ5`kFZ_LRJ`%8C+L|6tx@a#R$4*~VvS`_ zxec8t%u045E#3qVNyq4~3CO`AM%{MfNgO^x-{MR8V`>CnC^y1WiC0Eza9;935cy@c^DP71mW2W+aXXWKeRQ_`qLHSz#{1 z=X3H3OA+1IJY#Y``Pa|;N7dApQB*~0GR|IEd5D7@KV?=E zoNK6|t!SvoC(APR36^E5q2M~EjHxWlPECx5B?3BKZgExP#A#FFpfPjj6$On|MVX1Q z6n)_{7Nf?b`m)@#L}zSFOe{M-;N&rRN`;lxNUB0qMY4lv;j?|C#`qcpVmM#^9 zI85%K5naLcj497eaYcs(BL?(JFb=yoEmSs6nvq8Cq4T^Zy$Lq?1Q^~2&FTv7Y-3qY zGV!_zp@c9C;H*$qKVfWaxhAxBRj^c}~f{EEg&^$CA#jfTYGy}J1sgX0+NRbz8~Jk@ZG|4+u@9XXAo#zi&a z{tL4kPP1FpG`W&OCN3n7t0_j1yEp!atq{!atrRQQ*LYXrAa9|@#5#DF60mloar95h zfY#8|++1X)8pOp=s4#|Kr`m&+mr`@o<` z>6|{kwk$s@Ed`EC=-$PZ_2VYbs1KrN_{Y>h^d{dVwL0&ZJf^y|fW#cz@5nMSJ_A-W z?z}0tmPGGOa&e>}t!`+VG;LPCjtSC;I+DCzDMuSHlZ)r(-7#f6$!dy_s%;$KJZ)NK z1bF0JP)cJZWD^ruv^O!SsKmFW(r}DVW$*8W;K**n(Ve{VK2Q8`NM3p0b+dP#JoI*` z2jr!9LRE-^_cH;W&m?$0)8O^Yg4c5`zx)3O&y!D)=b@fK&%#S$5Nnx5H4~cUaNwY# z=e+hxW68>;m=1h)71mi^d5J9H2=9V`DySSgA%TSiYYU!PDO)#9Yn$YGvwnR4C2DrwI z!BhpKZnPgS4 z{4-=bAPZA)cx-%HW|T>*(*>u_oZz$?WkWbUgcr`Rr|7XmITBOj9get!wBkwQi?g$` zv$OatF%x}}{SjW1QuN)_i#cRjB&&R;*a8V9&0pJ}y`BepFJ@#)_ zfZ1PQsDnu$0d`H8CSjB=DOHqG0XRL#x+A~c6oI!5WMdkf0erTQLqQ3$AA^){W)6wO zsRB+B#62V>3&~VK_OGz_8;AKr3=I~EMbby^l!4AF-`V0&?G zSpjzUC>v=hEY=fURC9jdJr<=*hx!h|=qjv2L$cW zvEihxu{dkm6Ro5a2HN8CDytAOk`foq@04f3N59gyaXCZl54Q2R}Iy0N1Db!3zoI!+S=->iqfLIY{a7= zN}S@zNtI-i!b2X{9UoGyON)zeILJszba_KlhV_gMg7SQK+0bI1hHmXQvvJ{dLuM{E=nduZzjt@M?I4S);^Z3s{#gMytI z6c8y-b^^JFm_Ha4WXr_am}pog!pSm$U`n(z!ZIP7H@a-FZXo={fM8`9NNAL@4MESz z=8+3ba~@_Mfpozav0+#d;H~x5+ zfxws{qmR!FBpZbcJfsC=6Jk#AQ4UF_u_|m9&mEp3e;cI z&2f)laBF{rUv+tv$ofP2;#%|AcENu7hX?pL}@@-|IkWWe+FD*wywt z&fj~9h3r;b;7~jAxZ414k?#TXBqKSK^Wb&607p9Kj_q_!)`=V z9_dX6Q8L$Qa3&++DK9+gFgTK_hC#RF z*}K!UlVvy9=@9#3~N+w|3g?_!i%d{(zfO!{)`2H@-u6Tf*)}3(#f_JhhJdf$S zC|}~Gi)x@^_XYKZ+k*3I)DXMB51dnCP^F7y8=KW$MBl;u1T%ECkMVS~_VOFLat2>|qd}&NZEDB>V*RxrVciZ7dwm{ zG>ttBqGV@>p@XLJiWd&x^P6lMFPzn%rD?puAWC+e)t$vOUSaoQTf3nh)7aK-Y)9AH z+D+}4#qV=XoZJ7- zJ(5PkWF|d-K?;=cflSTpefXMf66a2N>7rzSis+g0A%5Elz5c0Wq1$+o-Fl{U-Z9>x@{^aLL*T|`_-rY-ahVj1@N?c#f=>(D?GEf35Iurjba45cpTo|Hc0@jE9P~3( z4tp#DJ_8-F%{bgHI}fvX5d7C}KGp%$6K{7Y9UfN#tj&5yAiL)Q(VU;%~5%2_5dp6vODc!EJ|uPG0vV%Al8|U zcLYScGlzf+0N*)IcIE=)2g86%k=5Gt; zZ}XPTZ_)g1iRP4BJe#-B^licPNpBPCd3W;`B%=jP-xf^YyKisd0mUtw{Wg2HyoKpA zY!llAS^Tmo+~kiS)p-fzXsZN=!`!HC~p%k6#_v$tKbt$6Ul{WitU zV)@zuxTAO{yWT6_p?D9qb`-yd`}fLsD0U-d*Wk4qdAm@PU6{M>t2B4rSFT*8x$C|n zToJF*+;wB_F4Ej}W9}|?U%o_h*A3q5c3;9LSTT9sn7kWTyJ_;eFL$fD#Va&--I%-X zE8RSx*nQdWvggu8V!ex(b(h6%n!XzsE-No%YNX47m-{c^cjYc#Hc%Gk?<&e;{w}+_ zxf_=QsL-X$PUN8^Chp?pYyy`qW?#hoUAve=-~s^VPP&*2kWc6Zpz8Agn8J&g!V8$H z^SI6_x{3!cOhT8Uvv}~reW#+Mc<{n~hvIDU;D!6MiuU5c3-|4cwqkZ+5|=$slekQh zo~KD%hEJb~&tnpSDG7_^L_MYww`#hL<_0o*evyF*MT$SAL~-I9^liD_Qz+IBsR5Kq zzi3wWFEqcF8WzaSA8R?kSk6@gYNHEt$~>8d!FN~f7dJdQTl$#DS7_m zC^~~_&y=zW{0hJyFBtYQhzIAu?e0;xGYI*ai(6F8mDDxjTxo4!K9uJQ&J*qAN+9LL zILwm(VFwAvzJpFOr-;LH$0bH$@|5->PU%Eu0Rabq%2|j@jKzGa?FG1ynMc57*Fr3D z@$A4xhc>HS%*L(NAx^z5u*-Sh2OQ!Qx%z0p4V4EUmD4zMFrA3WoE@mq?m=PR)`l}1zJd=R3tLg;dxua^E6JY!(EDBHk47%MhT{>D2F@c zk{6kd3K3}A>J;yRi3$&K(*HJmjmF^&=bX+k&gBf{;+!@_F@SP!Z6?ZX5#J}uZ2{#r z^=y5PNVgRN7^K?@(rpztgLD9I5e2uRb{^-qHRx?| zE0Jz1jy|}oTgCTi(%%vH-xjBDvYnyGSdoN}i zblYaz;rW(PXlKX{_s+gS^b478J3`*`Tw%o85&WL}V@9`_9n=m;^`3QydoPk?l-dE+ zcDUb1D3Xzw(SDxc7b%ezzDm-ccO#4B6fPo2k8s) zb&EHMeBB`5W&HFd$VUH}oR{;y51bP(Q}xk-yX)KqqTe}Pm%H75U9IVCX}A77l9CNEzBXqFT7jo zgy+f$50(?2EI^Ml(P;y8OMgjqh9FUpJivmH1ni?!ht=WqVit$Xi01Fju3VP|D#n2H*@^_0(FIqumhzlPFl0`gA2uuxOOQ;`C2PffcIbz2-)c*VB6^+5V{j&0_xc-C%L6dAM6d;%g6*Q zcrPQ<#eMHt-(zIDxNnbT4)?m#4;itJ17XA>gc0i~5KQw|BfvNEofmxhf%*d!`SK(6N4J2(a^m6r>ix(Y4fqfE z2t`mPR%jcRQX7_18)yXRX*=C!1BCJHOj`&Nu?T<#BMFP3_jK@SA`uYlX`&Di%V{DI z5c6rG4-nI7A`cMbX-1vi)2H;O7)hxG$YNt_KRDh*#T> z2OTG(czQdJ2OeiMabG-n-2XVEiFozYG4(Mme;Jo`PMUC#Sna8!L1 zt+bB<{5`kQ1PVt8eDeS{xak2x;j#BbTn7Y!s`^GxzK&!kvah2u6ez%8m_t03*I^dH zECDjJ4Imk2=$4*O9TbURqB{N{B3y=`Hj&Pe;(&*dkKw8RA%)ka!2@Rc&-2^mmiBg*bU z0}2^Zpv#WPFTW`lwOy#`y+sQlolkVT^ndx*o`DBHYY#QjqPQ_kWLg;!A*Pi0N-ivG^dteJ0yl}r8c9FrW ze3xP$tRmhE!`z3OgBR}iC_Y4O?}Z_MSpET?_YtoBssT^$f&F>=G2(Az&<%irf4V<+ zKMnjWgAnwee4O(!8~9-$e#q%&5W{QzIQwI0)>69{_A!WzqX(1+JfD4XAn5>&`_cja zK>uHlUY5Q*;5s1w^?-1I>=5{VK%3%p@!*B-o>rVH9=tHvQ;L(tgID=U#fjp<3#Cse zju#JJxIeBqRy=sE71kZYFpsJ{p8jJ;^NwPe?ML#C09*&aFuyyJdxVC0kwN^(&Cw${ zNI>V$0HI-CXAlYB9?3pJ!@R(tTN?Q0NEQ;P=wany&+#LNlMd53uN@W+51hFsojmM3 z?D_f|+^r2-C#j4cN1v1@;RFA_mq85!9{!kYdRW5%*(Y*frx5=rgIWeW!nd*!;}L*( zX9$=F6K(|_>cxYB+q`%P z7wg4ve)VFZT(lR%@!nzawgz>K^_(RZ(d>-aE1s2kWM|lWp1p*FNA`y9@$Bsc z4j$QS^Wu0(;Jpwp4leOxAcPmgjPJ4R_Uxg0py?j77suoy-F+3^2AAyi9AR9t+w+%g z#2mX}64>oSK41uJ1A9na*@f&~A^Tu@L34!cVu)=YV)6Dtu?ZuvhuCEwtO{V4eW82M zmKWK_kdQs@55D;rX!M@`mUjHc70`%1?gPiK$?USn{b`3=W|uudAG!~GdrfASJ%Jy( z{|Y6Vu?ud*uVr@mQ2a_}mk-@!83DU|D7F&2j07LZi64y)zN8BK#oGeae(};k(16%a z?6O~MmDy##*d;T`e$U4qA9jkEL-DZ(rWP>C$L?e36d|OnTa{oT1XpwY+s+ppkcz0rW`D1H|+$RoOmH;#xWW!^XxQB6y!Emx) z;2zQf&Cdk&0k7jDu1IHQUjVOJBNc^U-FS%s&m|3*AXXA*%ise5yCkL%Gf3hK40tYT zKxNoI?vw#Z3G|}3=$2HNY_7hVjbs>cK3=FrTQKW%znlfL4h%g9=4oN*3BeEIZA?dK zrd4wH+ymziS)u+r3{KmKjYb%_V1sBh1{p;y@d1u%pfJGQ01C--PQxI6#|*!jCwX9m zBPjkJD<7Z$04J7%ATGdcfZ-+|hO7EuXNgnB!5`#wDO^Pa1Q&Qm5+8H%E^5vFhRY>4 z|AtF$_YJfpNTq1B40Swmf|t#U(M zt#U(Mw`u6}FO*-Pp)U?84xynh4he@?LuU^0hgd`B4jqykI(JBJ=-h1@ig;G}EE-yd zb&iI}<5j>7L_GWKdysG=(~Bvamte|-e+(8)&oYTCMH(BOhU=3&71ghtLkW>i@_iK- zH1Qh4S}abh9i>2V6ER_k2i8?I89 zM|#6m;=(Mw;VO24iQaG(xkMTxutIq7SyF*Zkdb^BqW>lFmr%y=wi6+_ct@BaIWDz~ zWYbsUC@af_SEC8Zbm5gaLNZ)9( zqLc69CP_S;p3fj7bD*V%Xd*O@y94>lkXzryD)a=40E!)2>H$? z-yuF4tvG@mEDwRqbZ2phPoWjE1>w#l-!U`L8x-R}dCUa#1_eXV9W(#@B32vA?wIB0 z_1FOUE`?foJ=R~o(}Mx=qr@$rouMC;6~8>-Jka;qz?qf<2atj28}jyk!4mQ7SQoyO z+4T?-#Fim;rCQ>_A$FyEH{#;ZBG^^VC5HU+pz|OOKO?bEoS%rFv9#~SzM$MssT{*T zza4h?Gv{X_%N@eLZgIBAX}86`qV~R`{(VX8`l;ATc+01XPrrPftvRiXUAtPd zTN%4{wPv+4cI|4(Y+>x$)soS|*tM%Atp(e@7f)&Nw0YUJt0k$$-R5K0&Xxoe!*nxt z?QDsY?-;vw(%>nNv1@0GUA|-N+Sw8#-!XRWJQ#IQq~HH#?Am!SLcU|{+WBeNr!0@L zYv-pn`Hrz`=fPmPBxBdkgO-EhDYU}awez4!zGLj#`HA5ZmdDt&^AnwX$Jn*=pjIx) z*tO&1fR9;8#;zS7`^$G;c0G3d`~aRHk|y1E%zggY;lqqw`-xpoutc!ym6I~Neuo6{ z_z=5(OZP{I*p=?xM}6!n=Mp=Nihbq$N<6}HzZW}BxreFT-;N#o()p!a_xrJUwWw8Q*K@6ft&CmIwG^~4 zc0JdU-@@3nvn8*Ev1?~bZVO}A&X$}O#;%<$*)5D+J6p0^7`t{J%sj~0wew)cLB^t; z2h$FUCuDZ*JeYFObHdB6od=T+x=;Anwc}s{ih+QPT{{lO$#;xhJ4lvL9%I*z19th2 zv1|K*82OH|Yy1AF{UZHjI%C)N{SopVW7qbN!aiboj9uG5vdMRhUEB5t%Ox4Rw!z1M z7&kAww(T>?cZ^-z_8Rtz$B@U^wQa9XzGLj#woki{w+ zC>~VyCgSzxX&+MP4V4k`!`uI#!N91`;`h+eYtOTaXJN;caEvOIJP6<^kud0GQZXHV zmPzzuegK7Z^jRj+PyGSVul@nh4*&wt@5KSo*Y5zbX-b!2nga=B;*A&p`r;u#8Yw3X zOCd~8X-R~!=q0AQm?@2^G*-gUL2`QnvlA9d81xBplLCt(ERwJY!lDTaBMc!?E**lGN+S~6fQ_cQaP=Rq;Y{VlFkLlNCqrj>~N9E`O8Qa z=O-iC_|`nj%Hd#~qJv8=eixG=d7MH<@;O093OHUy3gJVg;}8zb1n9UwID)mx$2n$x zz{p^H7)&1;*{9f;F|t;StQ8|`Wor>5YZcF+F^o(`Fftjz$YcZ~lM#$eMliA#wnt%P zGJ=uGSs0m&U}Q3ak;w=~CLQM1<2q631s4Xr2tv@9T$LXct`qL;D*(FE@rhbK*09z(K2ged zthJ6$1o@7&*73ic``uoqhgNEh0C?J~z*z)#Y$zjgQ@*FWKTfBq%M?Y+iv zhx?Gmt>xX^fM)s=zXMP9)~tN(-MxSP9ISgSW6}D}@9%H@^7zps;PUIyGoSv$w!I&J ze)Q-!if>Z5Yo@Ue{c7|32R{4m^tXrAhZDIg)k`-1>79Ko$4<9*oIUfk-`6hgM$YV~ zfBWZM2amL!70wFn;?b}CzKZ2;L`_`ti$A^l@z-tb{_Wz)Fa5uaX783x~6|BxiK;`XLxH5=J#^L&a0iMA zA)Az=&3t3a*?PwF7kMmg_>TUa?AvEwJOD?K8gw7u(Oms~V6CNa?$kxvQ=t*>lo$95 z**8xg-@DDZ&AY{>yrwPkU%Ax9!O~pUM}6`vE5~K4zIyWL9*iRRpRr-<0c#)MrLK#t zEex0F>l@E^@bs*|^%?&&!A*#ugzC-RMX+&-%>qcps6B12jjLDcP2NrH26^ZH4c-k7 z<=ynftCoA9Ws^pWHKxq6a`KM6!M^eF=G{k6o_}-iI`2A%^47rG$H&Xr-pYKEmzsge zd>48krCKeihEZCP~;7Ls?Cb!u4*E9C>;(AOnlrqo=f|vd{0`yfmUVf(`R1Z?(*v zmyhS>vC+~pGP81W=zM}bkv+b1{o)8dA{Zvl^C++R#`Y^bktVa}PFJ4{EjgdOb@jq9 zZCG%gIs}!_r&xMXr8gSUqhY+Pe1)X|u1y1>CUV~{Z@7@+opkZowa)DvEv#Vj*p0jk$=`v7e~ z&`<9JA)zaC0xb;^%~?i|PQb;9P8Ia;8wWn^n>3%`YVPJ{AJ-NY6) zma;UTHOq{tF$psE*q-B`V>fHSb21XT!&9(;N6l8&IJ2yo)68uG5nX@o67&S-m{@4S z>or|qw!cZx#B9`roIW${psfXSP3KNxL4ETqmo@Sl*$tYMxBRJ=uullIWHOHqDQe8D z%+5Bj8@Ojd#4s>l;%H}OG27UJ!__gGHGf9NS=L$J84%%Grgls0t>IzM(`2Ix%_7e* z&IszYAlJ_{D^Oy=+?i(67;5^EO3=2RQ_rc>f`>7Ao^m%W(-{^F9;$CzRUNaAR|_Jl z&MXJe0yfgJ;IcI*8=Fr*SWB-J)M&$_m|=~SFf%dNM)l2^yR3#*qgMUNa-`NH%dsrJ zxVwMk{J7=FZ0#F7KRDdqtF#`;?q5KA>qq?ze;L@E3@Co4$6hobi&S{i?gY77;J=b^X_JfBH z@CVTAz23p>m_qrtx^rwsS$)ri+YcYyXWb|7cDFOz!GJ6ZNiD2yAG!7D!F|Sk+C9Zc z8>0=q5mhd-=H%3Za)#2v+j zbF6b%vw-r;pUg<$-8=j{(%V;ty3RGTn(-zTmZbj<`;PS1<-v~TCQcKzQH3EXzD>U^ z`|RRCMTpj>1JR&$mU;#de4;Tq4d0I7 z480yq#RO9le};L6Rxdx((os*VXVj^(Cc*XedZO-QeM|3!>o?o$h&ooSN{dnoBCJ+g z-`aQKIz+-KhHLR!P7M^DXj2Z>VzsifZG9IZ@Q}QAwg#)=R;#or;`V_{*V)$wT;}HP^QwVxC~vPT^SwjZmupXE9IABB`WPo`DN^~Y^bB5vJ@}Xgd!@e zN@81mSqZyDP^{9X_!o1E1w~k)N}FPvQ^YC~6ygOcZOWcPMxmg9ny=EPtS_Jy2=eK9 zDs75OKAtZtsAwq8Q_sV51(bhGv?(Vl8@tYrwCCmJ@^h&T9Fg{EU73J`A=-Dc* z%2p_#IFrrKW@M?dDt70w^JI5+7B`EPsnV*{4Gk4%axys?U^XUNm7{}A8EP2}iAtN| z*Ox8fOIYcuY|6ZnbbdPfv`U*2cba>edkRDoZOW=s>{I-csw|4tN!Cfh36&NlhOF|aLKQXJYxU{OV zy{(nrs;Ee!CK3C_EzF7RywaMcj`mhoE7^Q-AHI*4h-}Q-gXx(?)lHr4t#~WDMUkC| zC(`$VjY-L?YU=7}Z4tDPb%}dvdkOH1j>1^wH1IerjB|>#J+wX4gg=>?m~)(SjApVa zFM*ms-~FG%72jq-v$U~1^YEVC#BSy;6=tTmnF01kdbUJzaQ80yF4j&Jh9Cvzt!UX`WU(u6n3>hez{#dG3$J5<@36;*l1_U+*B;BQwsWLdvsJAb<%PO(kp zkR>RN7bn{mmz27Vy$z35WoA}@nm1L?(e^?1D^ zKZX~hh*mvr35mgCWbub2rC`3u+MUsyXk4t)#snP7EU#zQV|9vSVvd;Ds?y3VEwAI$ zk)_dFSzBpaRNI)Nb@V!Ftzz#M<`%|gl|z@4si3B&ny8i)pGuD1NZZKWz@vOWarly4U0s!bEK$6Hxq%m@(#k|0-n}`B z9VJ+=(#m+Q=d2g3!y;8$8JqBRtaTcZDs7A?k{cj z_iqfx!Ub#bFqJlDO>A;{N!f|DYs2_q)KHZ+CZP;3Ba5R#`Jwa>l{O{>46h zORQ97W`w?kuV5v01;{20U5sI4TEPmsk4ih^?!)(CE>~$~te10_v%OV~T}-_>-kfEs zEX>rU%b3e}OI6wzr={GbEH71KmpNW+FOFvlmJItXP{H#mB^Ycq9dOTn!09Z=j%qD# z20!jEI1RFp>zjv_Ja`RJW#*J$Fe;Oyp=aWA^7=o1Rlr==9LlfCE2p_64t(=3Id~pT zDZfG$e4Qx^qZ^*QMJoNOWMgw-{z-~Y z^1C()^GrA_Cv-*Cf1KcmPK*;Y(f)CQ z>a$Hr$Y|a{?_f%c35(wsiEaV~aYVC3uIaE6)&R zD+w7-6eS(A0G5pK@cSAl4n-~$^^6mSq9aN|#_|pQ0Mlh0iVS!5`y41PMLti|JWl9| z_9_V(-6!b%m^sLhKDU8_HT|!4#J<=z|CuU$j9s zHV09J$Et6E5>Vup@R*c@q3EcR0DZzfz*L<;sm~Lj)Q~>s#tA)9qLKi8g8wx(8)P!f zQFIq5KZ@KuQV7H959%B8`)}O7beurnBx}vl*2{MuJbwPo^E+E9^C%xY5^6`T-+%b* z+vn8hWD5rK86SjO$_6gqee~rw;DIBbOo!>a-}J+>IxgP0`}sG|zv6zSSO=`%2d~_D z^!4+vzIw)grl_Jq8v2E8NVsn!$7mt;&~JP|;k|1qFwUkbj!KPN}f zj*ZkW1fLU+$%iOJ5dU23G4=?IFKl7*SnvpcsAxnb9tj>&ACR{o7LtNL6g+@QEI%Nz z2U_=udl1(MJD=Pa+@swkFEC-&k+>(g%ebSk2NJnjS+^B?fYiQCyG6EP5RXXQ7TjWd zMqW1t8oMR z{~G-&dF{$o{Ho$0aQIglR}@#TP_K~f1Wb5nT){76S7?{Xi}T=2aRr=`REWdAM8Bli z3=IAy+C}p6B{Ydq4yU|}^b3m17pNDc7cYzqpcMg}3ycvGPN*Y_izC>GVioWNBlxg< zALQfaJ)~AU;SA zH~^as9z$|uc#weKA@n;C4_qF23dzCY0Rn!{58wlMKUs~zv@Umm=qHDV!0AU0oQGec zkL)%9KD(dVM-Ghi5q+}3fxf<8S}z2)LN38RIxMCi=*3};{ZKEpm*{~&QmCaD>yh`3 z^!DIAr1Xm%$CJF%Sa>cC(FmqFOGh1M?XZEv#x z5vv1lCwu$b(QkJ<(N1fVws*I+tp^dKo!UkYcD7+{@{ayi_>a*l?dWQ4%>fy$jn)cP zLGPRF>3}{v(K6c6+0xQz0D-uyR;-2W?`eVG*1i^63+)`)*>(;)r&s|(MhkvU-a6C@ z=VW*LIRf>)yIPyEX81_-DD-ngGuhn-r{uZbW}=zdM0PbdVNGN;8)nw2&9o+RunnDe zwL+(w)=2g=fNzFuumLHvN#58xh)$b(8|jU-2C}Ie95ad-kkK3Q2C}EK0c#*TTN^fUVty3IEwbtWxAJ3dWTUXcC3_MmX z*;5Bj8LWmZQ-?5SqK>E~oBP2Rqv)xn)d?|($ zYNeoptgI-fmqR2qO!!s^%CR!Cvb>Ba!^-7lMLH-rq`Zt@hL@5RWu??oMR_S!s>N zR*IL9r4=RA5=B`FUZU6zJdF~vytJ4oCQD0-@nSNI2{46~s1+;9il{}3k|LsrESwKK zr{<9brFn2(Sdd4{!*XHMg-H}15M*&dE}l!~73ETMi5$f)glD-}jy$&{HwVul z3v+Xb9BMXsnvd`-hsY+23bOHRG8g?avJ^H5&$6jm@|=?FEIdnAn3I*2$;^b81$dT4 z%Ovv)GVx4VZhmHF1|vh3opG2z=#xp$kY^TWW#Ac#!VGE#RU*yEm82_IcV*xbGB;m> zesd&*gqALq8Hs2>{Hk&+3CDfr@(+y4P8`RMlLuJ{&rZ;elZ6s=o}CWA z>|YR7$n*j@mz~HxcI+tosO)&o(W6JOBd{wEM0*`$93^uzk77sV znWv5(;T(~l%s2vWBQgO&8uQ4rfb}zo!)+Nli^ZnR>U~D zPWyycC))>yJIb<79LrgY@XGtZnf`&E>gC%mfY=lke;&N`l&dWe4rR)YXuWx-t{FBx`#Ok^XxIw&|mO;OB3 zcy&OK0*)+SgjXra{A9*{ay!B+{(e>xxevB4q41J8`xHxn;Oyfi!Z$pF!b%kEC3o`> zQ0?XJQCI-M*~3qO?#gruJ3+9UjG+T8F?S1g;X4&mfu!#e?4-uS3zYwJ5FH*q+)v%-~cRxwdA^pwY0TZ7z7>xU;#l!MTFsDYoo$K@lbMISSS%n4IzV=2(Utl5OQ5)2p&R)MZhm(jba)CtPtuNd1zF~8f=X; zWL?ObHNo^?`I_Lx2(#AEg30iRU_4kBwl+98h!G?U2@)wWE0`W64~|+Bgu#E2L93ao zr9ok<1N8uWh#=-_GAwd64!O zB#OWQRsb2iE&vaZg#`r!_%r>nAk3fi<08BYp!<^%A^w=ZEHK>PAG{G{@ak3AD)$_4Ewvw|_zQQ+T1+fB*E(lhVp+S(xl??J< z!CHa)kbXfvgb(Sigr^lBeb@W?_^b@|VfhftN&kT5@aqkk+$l^SG9Y|8www&|Th3Tc z^CkmD-c)Z;U57$nj(LxUu7+*pL%r$VjAdl-ie=Pg(3gPDmN&laqxbq1%a#TB0gt&< zzASXvQtDFDlL62}EF+eV`mKdjvtdhFOQ~L@cd!@arkxBT`qGbHApu@qtNgqeUUW}# zWq>Emlk}JmBAVClo?cPjo?c;Io^(%!2f516gX)2~lgMgKK|ta|78RnIenxH#@HN<28tL*`7&tcGgY!Y4W9YPR_~rZe z_cu-xKms@9e_-*oCRs+eeEohDoE@X`Zx2l6Q2}m{V}z@zKR2}O@tY3{@Onsp8F1HP z1BUz?RcyTCBSU79tME%(a0%r=*7=6SiUE{UttEdOryx3zL&yA57JjH zo|TA^hX<-dp1E~*Hr=qHj5c#NPE}u3| zzHjr=W&vq1CL4;Kgt|)xW76A2#!wep+a`V4Y_Gu>C*|F1^f*9H8k1f5gPpm`$F9F{DPNuO5E0?n1=Z!L>Jb6BMcnr}dw!y0&a@R`P- zxsrU{FdsCB^{ivEUSCigQPLaxk>Watd|ovh6jzd?txll04y-GbUfhHf*GA;yToX`S zN&eKZ02J4T1(33Se^4Az(p!g+;#!D&Sz`u@D@n5585D=*`=EF}Ulz;+#g*i5 zjY3dd6BbuWhk}se8i>4?h7?zluWQUeaSd3UDeH0v#StaF5r-64N96M&Q&3z z#noXwr}V-aq_`TW_B&^?1?=|@ks#R2k2Jonk?H)|)lBFl!z2`4*UZ5IMQ2R6pmVe! z`e90b6}^gIsY7|EF~fS1EhHc_p2uaW>rXSA9#x4t(BKRJ_afKOVlm1-W-@yc9UOKu z=i0*5u|_#u#?Uu&PzFs*pA9~^Ni*h5f&MnXOa~%2G>xt8tu4%EPMgbMsq0OhHR%ZS zyalC-5_r___(m2G)iK9(x|urEIK_CnH+ZTAC0Ma8U3}m3Bu_2YKX)t}ZDNJE$ zfaEPz)7a8_0c1%tnoY3OAhC!}0rbcPMS7^hc?&^eY&4s};A>BwVX(OfgUMm6Ko4Sy z^k;!=w((RGEh;z)r%qp3Sipylw;ttpn$`@V5E5cd#ioA5S!%k5#s>MCAovjz7|sO& zBzc=jFaRW`X`Rf^&*MV}T%Yoeqi;GcGv?7a8u~^CJ}?hV12PD%CCr!l|;4C%L%vQ_BvJ6mdGnE2^>2w-^%M?v<7C#Hm1PMcH z8j=`IGE#%uv<(cbGr5`643H3NlV>X>lk}iA4Lw8M40Z-xG6~+5p0QG(Z-mh}kS0ZN zT%v~fOs%OXzujcLDJ%j|PhWH8IG?IE$rPHHtZ$%>Gq{isd&W2)*5CyqSx+01 z5nSylAk)^=;zFVXNHjh}!jZ@eZs!4-y@%kaDM+TFP9YeOHJpcJJ-s;~dk0_JWCLAo z4K)oaxUsov++7luM3#TAs1`}#8-{EEkbqE~iP0dfEq4);cAuAYBPqrd=;%Q<0W|@n z89=FJG$l-lH1B@77devhCtsdL1;C2(Ab}&KEr9eY zkSv7-!@duhj|VbKF5ka5043hRouf>Vkkt`uLdpaWUOr~nRWc9fYYA(Ie6(=f8Un6uVnMeGqtL1kkH zDu5AmxE@{}lRtU(Og*(}9%1`aPvw=?we()P0Y>^I&d}0gvJ7ThOw*iX8xXtqz_HB2 z>V__G2tWA;$T8;knkE*edYbc=u8Y}~oOU{=w7#`x7@~FGy?cwUpJF~$W4dF&dhzzX z2bGt03|@Ww?W=e19@iu*Z^1<0D=1>qwq5(vPUn`^oa?@N_t|%^UiO@T@>@{>b+r(b z>H6UDto({IT^DbE{_NY!1xdRO?-9+_*MtJ!;|pVUCLKPRRb1OSbp77vS2L4#rX7y7 zo2&(iVkocOlGD=jPzczK>lItgDA-2N8MvcQKb>soXARK@6}ad0T%CNz=#bb6YzgJgb?r(RO0`x1ZDzzw<`P>?WXVM z?gGgK^q;$nx{JS4CHU*@Bw!4v3jQ^>#>Yc60JdEf{4d_Ff%p@^-*7t)F#`IK%;vA) zuO1h-jlT_xRR#ZkvHVy(Miu;<#c*S&(W>BIGnySu7psE*O=2~~p8)@h#`zQAf6_RA z0{ruVj{y5G@XzxX3lM(-{Lc~d#fsf3;a?qza#X=TXN#CCCU@4Wgnwrw%Sl%W|C1n= zLB6wLhbs7IBUw&voGSR|1c_N<*^YC&52=KIb0p0!7AsB11o#(-nPTaVmVM${!1wom zg?|>3W>t%Kw5dXWmWP-w9*t`|Bu-PxC!qgiB+oo67Uy@ULVt#sCjJ=LeoP$Og5(p> zpS1?u4IeXFqQ%F$>Q$n@P)vw_kL!?##r0h|s?eX03S@M}i4%Kzj;lg{h8P!ri0vqd zNviD{XoM0k{)+yLWnyu3Zg1mYcTSj{qY^p)0{%0ORQI2++u>rjGWl=7zw78q8<$Og z5B`RB0ZIRF!2dz%j=u;0@7hwLVs=OT4fu~pwrxs`T{=UJqZ0h%Hzg(pFVN;QRDplE zqk({<3jD#2|DS{ZR8OWS&qF2nyLiw&`0lE}U+7MC7r23QOw}M@h8ylCaK&9zSpa=k z%vIn*IIFY)4$gdM+7eYZVCE9;5{8p18(`qXabh~EvH{wQ9hr{o#j0$;yv3Zwj76$! zz?4PoMN9{kHbBFH>A<#!VZ($WfaXGbnmyM}l?9k#N44YGs*#I+ZzBO*8$_D6LajmG9s%!wyl5I&Bs;~jP`9d|spI`%w#`zO$fZ8~Jf(>8; zUzH7*B@`h31RJ0$~=3+s?rANAZbXHFa${_*Z`)GDU{kJxC*zavH^4? zT_YCSC91Lk%xOZpaMWh+QsHu?e1Z+&BYE(4OJPKkDjPr*(u5yv5`Bf%dyssB4WOF| zslt!Jd#r@M$#E)efS!;L{=P6NNGRNv5~|7uFi?Tj$u>gg0|$as*#N2#7k;qb7jC_D z_wj-(RW^VIBW%maLr1F1lRO|(f{Fov=9-ksn)EeuW-Q*P+6D;Ax7kg#2>*KaR9Js)s`44t!iuD;BCk#c$l!T1!8}<>F;A<*leVzfOMUh*G+Q$h!QIe8Cu|B_y znScz%`aA+k8|hQ4BxKr1A7fyFK7sGXDa=lE3n(3=Poa{K2}GO7u^GTZ`uqTtF4E`X zIAJ6@sU$$3wNhCAuM7IP_`Lv1k0PHh>K-TbAxH~xWb_rmzd`UAP`IP$DNy>b*P^Il zoB(~41XAZ`*g!xZw6+u72g(4cQ=%ls)iDAVsN?^3%m{W#lv{~zj1yBu=}H3B375mp z1CyX3SHB;CnvB#L8Yd=+4k!tv4vKD@tW@U(P*adPZR3QtC|*g(sLNM>fV~T*DAjoa zlp*AegO{og+m(Iy8K4o;Ne;Ufe3ZR@)OvCU0w@{b>HjVGpnsJqe);-*Zqm9{qGgVN zJ@SR3KG-Gr=bs;6sY;6u60LAqXa>nQ3@Ngyq7(%zEq+ISce5pP&-&GY-cHu$C?&Q4 z+^fH#F+82AxjN)7?2`B6FJBDTp4zz~BE-+#0oEWIgS$sp2fU7-gTn!mz(W33O)VW= z{mIKW#6KB>jqmUE6d&0o-Wn0;4GZe5E#^WdYS@&(7&dacg%Kz@Fl3E|9PyCP+IZ%y z*>mioc%iY!0#o2ku5+}#SkYN(m7~cRl5jbpgm6>kmSO_6u zu9J(q=d$I#q1%&_cExUqideJSUxf0CF83bKbm`{myaa-+Kf@W|eA3pRGSzt291F{Z z_Kwc(Udvam3Jl$_b#qjBXz=R50DnJpCAttYWV*S)bv(*TxJ7V2X(L@tX3d$0RJC91 zxWw7T)y)lNE^s%@^;al5uA{x3Ex3(e;Z$f$O+y04~=+bp@9VZG_MXem|Y^|*9sev^>oC;ZYO+`gH ztsJ5{v?z+o3TlOPsJ`M%YXwokDVJCER+LlA@iN6RkkZOW>xRn9yQ{&C%PfNk8SvRs zOUWVwa9QI}NZQ<4R@+fVFJqOGO)cQFrI)}AC+y2nR!Wpgdm2kSt00nsQzC0?0H-aZ z7)F4a6l#gIbhxszvxF$&6_eey#nfVI5t+>f-!`>aTGwCPRs%(uMe@?V@*=#5QK+y6 zk)jxAS!-KacM-jaT_~;UD=I83U>3mqEL>M88>l~5TSzP96v)nW6cpsM^2uX3IKzno zS;@un-U4a?FQ05V2ay#F2&zD-%!qth-9STqJ|mxO z3~7FUMQ&~mI|m}XK$0sT=xxZQ<#Kc6W&MTO+-y>!4j(+8BP+OcJ|99Y_}OG#UpA4= z&VmV35M;}1E}zS0!a2k=Wno#|ObAQ>ewMuDQhpX4&gFe)GBbD?tgic1nLiM(kb1M&8c^ZEvW zNA`4qgO)t42f3YOl9Fr95>1Ibd$>`eE`cC6#Ou9Yg?MEnRfx9@@rrKV%tO4U3;Dn! zkF!wXr{1fVQGNZxAb~dloU0dlQK{UEU5FKhSS`1j5UcufBVxrPR^9Ci#A?4%4t2>h zDt!>M^G+6Gj$Fxsn;Xq4tgT$H4}9{{WyCJHHVnSm-!clS&K4#3I@+is(cK;-I&-ZJ zysN+DRW#Qe3tO=!X}^~(%=c5s`)8H)U9SP}=KJij#-_X&uceWt{THw2`P(m0mMD>E zNM4rTyp|)8WEG!nuS)iF4LsI;{_@Rx4?b^*aI$5?-f`r;3{+&y&p#_fBbKmGQfZ{EIseKXE} zIn3rmQUtiS=Q$N^0}ZJGZh=SI`z~C)b?4z1&tJZJ_v4SRU%maJ#BGH$3ze*uuqDs3 zOPhzfayPiI*xuMRbm{u7dyk)d`~8n^UjO*|&G&uB*ZPL|S!%(JD0097{3^4!VPL2( z!Fy>$VSC^3l^eGneE!w9KfM1D`|+38Z*CQD6NLnOSQuzA;dRm^U*^~MUg|g*1#7hM}Vf1Fv$T*?5VzTS>P#5@J#D*q_1wOdUTd?Px(6>_xdC_g)r%kA{`lt2 z`-j!LgVwEIVL4eF&DedN5#r&ORB^WJ{KYHK`j^jN{_y7Qn|I%J9*GR!8tgP(7aE^T zd9e`|vch7=YquUgdiKpf-$Ic$*YaaSqocg%>T9r|8~J_hxt?KY=JT&!yn6fg-Mc6C z$>CdLf@}?SAd5ET)xg!;@O=OABZ&Svn7(;q{Cc;Udg?UTz31(NXD?o(EAEu+joKOO zH{U>$1)HAy1V!I})0(z%$L@&5Mmpdook#ig^`)%%n4}o*S$Y~Ym^S`hynj!y)g)~; z>|Y{vhUbJsWt=*gp*r}yvAKCqv^U!Ih8Fo}@_p$q_+`$vNX#x7zfB!>sVZdrcn z$wRx?yXZT~G|5idPFg&<8KEb>Yjo$H@}yj}a*7#Go;(px#4~n~+b4lcu`?cvm!%dS zI~>o5XYU}h5AVQsu(t#50Z{IcrAzYSchG?$-zM)8O;Ms#I{T0c2<ywd(_`o{+-UjE!X)rWLTC@bWwbo5Dj}L0 z%@@m)i{cU68L{E4GFq%DmLD&SM$9lJ6N1>pTHs*J0!JsgMGL{BIHC3=Di>FBOsp=3 z3~z{+TbqJ-Nu|k%w+iug*VXS()>jq}JaQ8q{(9v(HC0fbJijDbtRYsIB34aVE-JRY zA`|{ocq7)SGsh7trSd3Zg(6mJLlR=8S0zDZ^60~kh?&v24KYir-;rksa8M%i-ZS7aX)+pE0-DXWiMOyYik(WvJwcEwwZTr%T(#0;;%MN}* z(i)LDZ089`dJf&-%iQ|Ydykjo?-N-&ZT=II4z9I;jqL&XD&S^bMDMGuEkC)=-hOq# ze?sZi^``dPs8}Z4-t!%)1$6~G+!ii9Fab&*=U6VXW}uP>#cc7jxa92ms>1;bmux=! zS1A2QXUa;aDOlygEmA5TIujvut4?K*XNH%MQmT^I#ms>1Sk!Qi}jtW$zuSN z%Ix$Z+y-{AB9usqc~IR)*Z8};hkCjb?i@E+h?kq2E6o)k3wo#SZiL&Y`#Nv8o^{%cVp5R^Nxj;~nGt-$|!a?}x^3iXN%hJ^@F5p`vSFT#Z zTB0ySeCN?nU*}*?=Oz3lawq>4PAn&g34|``5~*{Pn|rVm-wAgl*LXRy97%f=fkAbW zdPg_~csX(%iN*3IVJ?f=ixt{P;3x|SbPidpzF6+Lb}_gQ$02sHG-COxWs4W_7hw+M z@*oGc18Hvs#fe4IC7WEs7CG=72z%1s&z@7rLW=pq$I68o3JDF`n=t^6*EyG6c60yjJuwgDFt?39w zZKXDgHb>dpun@~TY$36bZLQEnXt_}Cxy5fG6V4&t%Nn!hS}9Br-&($Ev%NLlnqwui z4qt9%$+INq|MYrNw${ixzz^4bPLjf)=$}cfwlRDQ2OK|GPKZxlEg_gCK7++{+6~j~$6A(Vo+%d*u8lhK{V@Oyi(p!Ekra z7x`^V+Z$*OZB4+?2gme{XP{;kUP89;nXQlbNZRH_hGwA?F!W{fHapXW3-r;WP}qVu z@|Cq`RH`I#x$$)O%!wF!`H0UfYrEOHG%AiTRQRQ(OW6MGBM}P>7bZ-=(5I#A7YH4N zMr?3wX~3uR!{Wd_IaxcLjm*|egwP*4_PAI$FPa5W8#sc{xXhB&Ri@M3PECN&Tawia z9NmPInN$>3_I#7Ik!9=z2>rG$+QEL=A`=d@J%#eZZz6)8Kj^d2+jD^~YHuQfKF*7@ z_wsd|&LRjv(AR(JjsBx`r@OPim$^E4`DXp^&gj*nYn%dC*&0y6z4r+~pH*yi5rulq z94yY5^hqL8TuJ_rIa8lGPQExg3+{mhHH<1+x6agLjI-C8I!^6(oxq#d+aUw?%BY}?ZG0E*P{6|jr24)3Qt{3f# z^b;*dRT*gUm^2K0di=4s(pL|g4sTs8@^!Z}H=d%S#)hh3Dc;x{#ZO-irtb>#6L~FO z06svd3R;H+Ib&}>e0QTXc|(9`xwF;m>65g1EE*2c_Qcp**{g@m$6|w_E_-tmLtS+? z9rYYgO+#6Gpz)=S3ua9-7{3oQ%1@PtHV2A)oUP6NxCI!7p=;-ve9~M9t2<{+|D>%o zMLMn>?1RZ+G6)C+lhUYEf`H&69D~3Z9YPz^!Za}rOdV5u1EE(uE`%RJ1QUucqCzng zhWgNW1|+zjHf^erk>QjnlP6Cy(AU$|)zQ|{)X-3ekSGXgLRXAm09bTqzQw$G=E&DJ zdzPuGiHY%y>2R&#l<^Cd*FhlGA6KDP;0Z!arWpfT7k z4v~(iA-Fr`%^?-hBl&b|%I5)h23CF6mr(4}Q!s+(X8yp zdamffDxVQ*okl~i45LKAR*aM-pAPsBTRwhJ1gkbgZ&<%`%eL65K9TWa5TU&tmH+uELMIC|NS1Up>O!#?*ZZBfAL`)7ytV`jN8cneh>fs y9{${4`tSGfr|$u5#-E?f{|ODaGKP--+rC!{B?EO~>pmTbUiIlf^z2Uu<$nQOKjGv6 literal 0 HcmV?d00001 diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py new file mode 100644 index 00000000..33372c50 --- /dev/null +++ b/calibre-plugin/jobs.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Jim Miller' +__copyright__ = '2011, Grant Drake ' +__docformat__ = 'restructuredtext en' + +import time, os, traceback + +from ConfigParser import SafeConfigParser +from StringIO import StringIO +#from itertools import izip +#from threading import Event + +#from calibre.gui2.convert.single import sort_formats_by_preference +from calibre.utils.ipc.server import Server +from calibre.utils.ipc.job import ParallelJob +from calibre.utils.logging import Log + +from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload, + OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY) +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions +from calibre_plugins.fanfictiondownloader_plugin.epubmerge import doMerge + +# ------------------------------------------------------------------------------ +# +# Functions to perform downloads using worker jobs +# +# ------------------------------------------------------------------------------ + +def do_download_worker(book_list, options, + cpus, notification=lambda x,y:x): + ''' + Master job, to launch child jobs to extract ISBN for a set of books + This is run as a worker job in the background to keep the UI more + responsive and get around the memory leak issues as it will launch + a child job for each book as a worker process + ''' + server = Server(pool_size=cpus) + + print(options['version']) + total = 0 + # Queue all the jobs + print("Adding jobs for URLs:") + for book in book_list: + if book['good']: + print("%s"%book['url']) + total += 1 + args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', + 'do_download_for_worker', + (book,options)] + job = ParallelJob('arbitrary', + "url:(%s) id:(%s)"%(book['url'],book['calibre_id']), + done=None, + args=args) + job._book = book + # job._book_id = book_id + # job._title = title + # job._modified_date = modified_date + # job._existing_isbn = existing_isbn + server.add_job(job) + + # This server is an arbitrary_n job, so there is a notifier available. + # Set the % complete to a small number to avoid the 'unavailable' indicator + notification(0.01, 'Downloading FanFiction Stories') + + # dequeue the job results as they arrive, saving the results + count = 0 + while True: + job = server.changed_jobs_queue.get() + # A job can 'change' when it is not finished, for example if it + # produces a notification. Ignore these. + job.update() + if not job.is_finished: + continue + # A job really finished. Get the information. + output_book = job.result + #print("output_book:%s"%output_book) + book_list.remove(job._book) + book_list.append(job.result) + book_id = job._book['calibre_id'] + #title = job._title + count = count + 1 + notification(float(count)/total, 'Downloaded Story') + # Add this job's output to the current log + print('Logfile for book ID %s (%s)'%(book_id, job._book['title'])) + print(job.details) + + if count >= total: + # All done! + break + + server.close() + + # return the book list as the job result + return book_list + +def do_download_for_worker(book,options): + ''' + Child job, to extract isbn from formats for this specific book, + when run as a worker job + ''' + try: + book['comment'] = 'Download started...' + + ffdlconfig = SafeConfigParser() + ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) + ffdlconfig.readfp(StringIO(options['personal.ini'])) + + adapter = adapters.getAdapter(ffdlconfig,book['url']) + adapter.is_adult = book['is_adult'] + adapter.username = book['username'] + adapter.password = book['password'] + + story = adapter.getStoryMetadataOnly() + writer = writers.getWriter(options['fileform'],adapter.config,adapter) + + outfile = book['outfile'] + + ## No need to download at all. Shouldn't ever get down here. + if options['collision'] in (CALIBREONLY): + print("Skipping CALIBREONLY 'update' down inside worker--this shouldn't be happening...") + book['comment'] = 'Metadata collected.' + + ## checks were done earlier, it's new or not dup or newer--just write it. + elif options['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \ + ('epub_for_update' not in book and options['collision'] in (UPDATE, UPDATEALWAYS)): + print("write to %s"%outfile) + writer.writeStory(outfilename=outfile, forceOverwrite=True) + book['comment'] = 'Download %s completed, %s chapters.'%(options['fileform'],story.getMetadata("numChapters")) + + ## checks were done earlier, just update it. + elif 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS): + + urlchaptercount = int(story.getMetadata('numChapters')) + ## First, get existing epub with titlepage and tocpage stripped. + updateio = StringIO() + (epuburl,chaptercount) = doMerge(updateio, + [book['epub_for_update']], + titlenavpoints=False, + striptitletoc=True, + forceunique=False) + print("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)) + print("write to %s"%outfile) + + ## Get updated title page/metadata by itself in an epub. + ## Even if the title page isn't included, this carries the metadata. + titleio = StringIO() + writer.writeStory(outstream=titleio,metaonly=True) + + newchaptersio = None + if urlchaptercount > chaptercount : + ## Go get the new chapters + newchaptersio = StringIO() + adapter.setChaptersRange(chaptercount+1,urlchaptercount) + + adapter.config.set("overrides",'include_tocpage','false') + adapter.config.set("overrides",'include_titlepage','false') + writer.writeStory(outstream=newchaptersio) + + ## Merge the three epubs together. + doMerge(outfile, + [titleio,updateio,newchaptersio], + fromfirst=True, + titlenavpoints=False, + striptitletoc=False, + forceunique=False) + + book['comment'] = 'Update %s completed, added %s chapters for %s total.'%\ + (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount) + + except NotGoingToDownload as d: + book['good']=False + book['comment']=unicode(d) + book['icon'] = d.icon + + except Exception as e: + book['good']=False + book['comment']=unicode(e) + book['icon']='dialog_error.png' + print("Exception: %s:%s"%(book,unicode(e))) + traceback.print_exc() + + #time.sleep(10) + return book diff --git a/calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt b/calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt new file mode 100644 index 00000000..e69de29b diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 00000000..e72999f4 --- /dev/null +++ b/cron.yaml @@ -0,0 +1,10 @@ +cron: +- description: cleanup job + url: /r3m0v3r + schedule: every 2 hours + +# There's a bug in the Python 2.7 runtime that prevents this from +# working properly. In theory, there should never be orphans anyway. +#- description: orphan cleanup job +# url: /r3m0v3rOrphans +# schedule: every 4 hours diff --git a/css/index.css b/css/index.css new file mode 100644 index 00000000..eae546b7 --- /dev/null +++ b/css/index.css @@ -0,0 +1,73 @@ +body +{ + font: 0.9em "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif; +} + +#main +{ + width: 60%; + margin-left: 20%; + background-color: #dae6ff; + padding: 2em; +} + +#greeting +{ +# margin-bottom: 1em; + border-color: #efefef; +} + + + +#logpassword:hover, #logpasswordtable:hover, #urlbox:hover, #typebox:hover, #helpbox:hover, #yourfile:hover +{ + border: thin solid #fffeff; +} + +h1 +{ + text-decoration: none; +} + +#logpasswordtable +{ + padding: 1em; +} + +#logpassword, #logpasswordtable { +// display: none; +} + +#urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile +{ + margin: 1em; + padding: 1em; + border: thin dotted #fffeff; +} + +div.field +{ + margin-bottom: 0.5em; +} + +#submitbtn +{ + padding: 1em; +} + +#typelabel +{ +} + +#typeoptions +{ + margin-top: 0.5em; +} + +#error +{ + color: #f00; +} +.recent { + font-size: large; +} diff --git a/defaults.ini b/defaults.ini new file mode 100644 index 00000000..5b623f97 --- /dev/null +++ b/defaults.ini @@ -0,0 +1,363 @@ +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +[defaults] + +## [defaults] section applies to all formats and sites but may be +## overridden at several levels + +## All available titlepage_entries and the label used for them: +## _label:
      +

      + FanFictionDownLoader +

      + +
      + + +
      + +
      + +
      +

      Edit Config

      +
      + Editing configuration for {{ nickname }}. +
      +
      + +
      +
      + +
      + +
      +
      + +
      +

      Default System configuration

      +
      +{{ defaultsini }}
      +
      +
      + +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © Fanficdownloader team +
      + +
      + + +
      +
      + + diff --git a/epubmerge.py b/epubmerge.py new file mode 100644 index 00000000..7c83fdc8 --- /dev/null +++ b/epubmerge.py @@ -0,0 +1,389 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import re +#import StringIO +from optparse import OptionParser + +import zlib +import zipfile +from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED +from time import time + +from exceptions import KeyError + +from xml.dom.minidom import parse, parseString, getDOMImplementation + +def main(argv): + # read in args, anything starting with -- will be treated as --= + usage = "usage: %prog [options] [...]" + parser = OptionParser(usage) + parser.add_option("-o", "--output", dest="outputopt", default="merge.epub", + help="Set OUTPUT file, Default: merge.epub", metavar="OUTPUT") + parser.add_option("-t", "--title", dest="titleopt", default=None, + help="Use TITLE as the metadata title. Default: ' Anthology'", metavar="TITLE") + parser.add_option("-d", "--description", dest="descopt", default=None, + help="Use DESC as the metadata description. Default: ' by ' for each epub.", metavar="DESC") + parser.add_option("-a", "--author", + action="append", dest="authoropts", default=[], + help="Use AUTHOR as a metadata author, multiple authors may be given, Default: ", metavar="AUTHOR") + parser.add_option("-f", "--first", + action="store_true", dest="fromfirst", default=False, + help="Take all metadata from first input epub",) + parser.add_option("-n", "--titles-in-toc", + action="store_true", dest="titlenavpoints", + help="Put an entry in the TOC for each epub, in addition to each epub's chapters.",) + parser.add_option("-s", "--strip-title-toc", + action="store_true", dest="striptitletoc", + help="Strip any title_page.xhtml and toc_page.xhtml files.",) + + (options, args) = parser.parse_args() + + ## Add .epub if not already there. + if not options.outputopt.lower().endswith(".epub"): + options.outputopt=options.outputopt+".epub" + + print "output file: "+options.outputopt + doMerge(options.outputopt, + args, + options.authoropts, + options.titleopt, + options.descopt, + options.fromfirst, + options.titlenavpoints, + options.striptitletoc) + + # output = StringIO.StringIO() + # files = [] + # for file in args: + # f = open(file,"rb") + # fio = StringIO.StringIO(f.read()) + # f.close() + # files.append(fio) + + # doMerge(output,files,authoropts,titleopt,descopt,fromfirst,titlenavpoints,striptitletoc) + + # out = open(outputopt,"wb") + # out.write(output.getvalue()) + +def doMerge(outputio,files,authoropts=[],titleopt=None,descopt=None, + fromfirst=False, + titlenavpoints=True, + striptitletoc=False, + forceunique=True): + ''' + outputio = output file name or StringIO. + files = list of input file names or StringIOs. + authoropts = list of authors to use, otherwise add from all input + titleopt = title, otherwise ' Anthology' + descopt = description, otherwise ' by <author>' list for all input + fromfirst if true, take all metadata (including author, title, desc) from first input + titlenavpoints if true, put in a new TOC entry for each epub + striptitletoc if true, strip out any (title|toc)_page.xhtml files + forceunique if true, guarantee uniqueness of contents by adding a dir for each input + ''' + ## Python 2.5 ZipFile is rather more primative than later + ## versions. It can operate on a file, or on a StringIO, but + ## not on an open stream. OTOH, I suspect we would have had + ## problems with closing and opening again to change the + ## compression type anyway. + + filecount=0 + source=None + + ## Write mimetype file, must be first and uncompressed. + ## Older versions of python(2.4/5) don't allow you to specify + ## compression by individual file. + ## Overwrite if existing output file. + outputepub = ZipFile(outputio, "w", compression=ZIP_STORED) + outputepub.debug = 3 + outputepub.writestr("mimetype", "application/epub+zip") + outputepub.close() + + ## Re-open file for content. + outputepub = ZipFile(outputio, "a", compression=ZIP_DEFLATED) + outputepub.debug = 3 + + ## Create META-INF/container.xml file. The only thing it does is + ## point to content.opf + containerdom = getDOMImplementation().createDocument(None, "container", None) + containertop = containerdom.documentElement + containertop.setAttribute("version","1.0") + containertop.setAttribute("xmlns","urn:oasis:names:tc:opendocument:xmlns:container") + rootfiles = containerdom.createElement("rootfiles") + containertop.appendChild(rootfiles) + rootfiles.appendChild(newTag(containerdom,"rootfile",{"full-path":"content.opf", + "media-type":"application/oebps-package+xml"})) + outputepub.writestr("META-INF/container.xml",containerdom.toprettyxml(indent=' ',encoding='utf-8')) + + ## Process input epubs. + + items = [] # list of (id, href, type) tuples(all strings) -- From .opfs' manifests + items.append(("ncx","toc.ncx","application/x-dtbncx+xml")) ## we'll generate the toc.ncx file, + ## but it needs to be in the items manifest. + itemrefs = [] # list of strings -- idrefs from .opfs' spines + navmaps = [] # list of navMap DOM elements -- TOC data for each from toc.ncx files + + booktitles = [] # list of strings -- Each book's title + allauthors = [] # list of lists of strings -- Each book's list of authors. + + filelist = [] + + booknum=1 + firstmetadom = None + for file in files: + if file == None : continue + + book = "%d" % booknum + bookdir = "" + bookid = "" + if forceunique: + bookdir = "%d/" % booknum + bookid = "a%d" % booknum + #print "book %d" % booknum + + epub = ZipFile(file, 'r') + + ## Find the .opf file. + container = epub.read("META-INF/container.xml") + containerdom = parseString(container) + rootfilenodelist = containerdom.getElementsByTagName("rootfile") + rootfilename = rootfilenodelist[0].getAttribute("full-path") + + ## Save the path to the .opf file--hrefs inside it are relative to it. + relpath = os.path.dirname(rootfilename) + if( len(relpath) > 0 ): + relpath=relpath+"/" + + metadom = parseString(epub.read(rootfilename)) + if booknum==1: + firstmetadom = metadom.getElementsByTagName("metadata")[0] + try: + source=firstmetadom.getElementsByTagName("dc:source")[0].firstChild.data.encode("utf-8") + except: + source="" + #print "Source:%s"%source + + ## Save indiv book title + booktitles.append(metadom.getElementsByTagName("dc:title")[0].firstChild.data) + + ## Save authors. + authors=[] + for creator in metadom.getElementsByTagName("dc:creator"): + if( creator.getAttribute("opf:role") == "aut" ): + authors.append(creator.firstChild.data) + allauthors.append(authors) + + for item in metadom.getElementsByTagName("item"): + if( item.getAttribute("media-type") == "application/x-dtbncx+xml" ): + # TOC file is only one with this type--as far as I know. + # grab the whole navmap, deal with it later. + tocdom = parseString(epub.read(relpath+item.getAttribute("href"))) + + for navpoint in tocdom.getElementsByTagName("navPoint"): + navpoint.setAttribute("id",bookid+navpoint.getAttribute("id")) + + for content in tocdom.getElementsByTagName("content"): + content.setAttribute("src",bookdir+relpath+content.getAttribute("src")) + + navmaps.append(tocdom.getElementsByTagName("navMap")[0]) + else: + id=bookid+item.getAttribute("id") + href=bookdir+relpath+item.getAttribute("href") + href=href.encode('utf8') + #print "href:"+href + if not striptitletoc or not re.match(r'.*/(title|toc)_page\.xhtml', + item.getAttribute("href")): + if href not in filelist: + try: + outputepub.writestr(href, + epub.read(relpath+item.getAttribute("href"))) + if re.match(r'.*/(file|chapter)\d+\.x?html',href): + filecount+=1 + items.append((id,href,item.getAttribute("media-type"))) + filelist.append(href) + except KeyError, ke: + pass # Skip missing files. + + for itemref in metadom.getElementsByTagName("itemref"): + + if not striptitletoc or not re.match(r'(title|toc)_page', itemref.getAttribute("idref")): + itemrefs.append(bookid+itemref.getAttribute("idref")) + + booknum=booknum+1; + if not forceunique: + # If not forceunique, it's an epub update. + # If there's a "calibre_bookmarks.txt", it's from reading + # in Calibre and should be preserved. + try: + fn = "META-INF/calibre_bookmarks.txt" + outputepub.writestr(fn,epub.read(fn)) + except: + pass + + + ## create content.opf file. + uniqueid="epubmerge-uid-%d" % time() # real sophisticated uid scheme. + contentdom = getDOMImplementation().createDocument(None, "package", None) + package = contentdom.documentElement + if fromfirst and firstmetadom: + metadata = firstmetadom + firstpackage = firstmetadom.parentNode + package.setAttribute("version",firstpackage.getAttribute("version")) + package.setAttribute("xmlns",firstpackage.getAttribute("xmlns")) + package.setAttribute("unique-identifier",firstpackage.getAttribute("unique-identifier")) + else: + package.setAttribute("version","2.0") + package.setAttribute("xmlns","http://www.idpf.org/2007/opf") + package.setAttribute("unique-identifier","epubmerge-id") + metadata=newTag(contentdom,"metadata", + attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/", + "xmlns:opf":"http://www.idpf.org/2007/opf"}) + metadata.appendChild(newTag(contentdom,"dc:identifier",text=uniqueid,attrs={"id":"epubmerge-id"})) + if( titleopt is None ): + titleopt = booktitles[0]+" Anthology" + metadata.appendChild(newTag(contentdom,"dc:title",text=titleopt)) + + # If cmdline authors, use those instead of those collected from the epubs + # (allauthors kept for TOC & description gen below. + if( len(authoropts) > 1 ): + useauthors=[authoropts] + else: + useauthors=allauthors + + usedauthors=dict() + for authorlist in useauthors: + for author in authorlist: + if( not usedauthors.has_key(author) ): + usedauthors[author]=author + metadata.appendChild(newTag(contentdom,"dc:creator", + attrs={"opf:role":"aut"}, + text=author)) + + metadata.appendChild(newTag(contentdom,"dc:contributor",text="epubmerge",attrs={"opf:role":"bkp"})) + metadata.appendChild(newTag(contentdom,"dc:rights",text="Copyrights as per source stories")) + metadata.appendChild(newTag(contentdom,"dc:language",text="en")) + + if not descopt: + # created now, but not filled in until TOC generation to save loops. + description = newTag(contentdom,"dc:description",text="Anthology containing:\n") + else: + description = newTag(contentdom,"dc:description",text=descopt) + metadata.appendChild(description) + + package.appendChild(metadata) + + manifest = contentdom.createElement("manifest") + package.appendChild(manifest) + for item in items: + (id,href,type)=item + manifest.appendChild(newTag(contentdom,"item", + attrs={'id':id, + 'href':href, + 'media-type':type})) + + spine = newTag(contentdom,"spine",attrs={"toc":"ncx"}) + package.appendChild(spine) + for itemref in itemrefs: + spine.appendChild(newTag(contentdom,"itemref", + attrs={"idref":itemref, + "linear":"yes"})) + + ## create toc.ncx file + tocncxdom = getDOMImplementation().createDocument(None, "ncx", None) + ncx = tocncxdom.documentElement + ncx.setAttribute("version","2005-1") + ncx.setAttribute("xmlns","http://www.daisy.org/z3986/2005/ncx/") + head = tocncxdom.createElement("head") + ncx.appendChild(head) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:uid", "content":uniqueid})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:depth", "content":"1"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:totalPageCount", "content":"0"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:maxPageNumber", "content":"0"})) + + docTitle = tocncxdom.createElement("docTitle") + docTitle.appendChild(newTag(tocncxdom,"text",text=titleopt)) + ncx.appendChild(docTitle) + + tocnavMap = tocncxdom.createElement("navMap") + ncx.appendChild(tocnavMap) + + ## TOC navPoints can be nested, but this flattens them for + ## simplicity, plus adds a navPoint for each epub. + booknum=0 + for navmap in navmaps: + navpoints = navmap.getElementsByTagName("navPoint") + if titlenavpoints: + ## Copy first navPoint of each epub, give a different id and + ## text: bookname by authorname + newnav = navpoints[0].cloneNode(True) + newnav.setAttribute("id","book"+newnav.getAttribute("id")) + ## For purposes of TOC titling & desc, use first book author + newtext = newTag(tocncxdom,"text",text=booktitles[booknum]+" by "+allauthors[booknum][0]) + text = newnav.getElementsByTagName("text")[0] + text.parentNode.replaceChild(newtext,text) + tocnavMap.appendChild(newnav) + + if not descopt and not fromfirst: + description.appendChild(contentdom.createTextNode(booktitles[booknum]+" by "+allauthors[booknum][0]+"\n")) + + for navpoint in navpoints: + #print "navpoint:%s"%navpoint.getAttribute("id") + if not striptitletoc or not re.match(r'(title|toc)_page',navpoint.getAttribute("id")): + tocnavMap.appendChild(navpoint) + booknum=booknum+1; + + ## Force strict ordering of playOrder + playorder=1 + for navpoint in tocncxdom.getElementsByTagName("navPoint"): + navpoint.setAttribute("playOrder","%d" % playorder) + if( not navpoint.getAttribute("id").startswith("book") ): + playorder = playorder + 1 + + ## content.opf written now due to description being filled in + ## during TOC generation to save loops. + outputepub.writestr("content.opf",contentdom.toxml('utf-8')) + outputepub.writestr("toc.ncx",tocncxdom.toxml('utf-8')) + + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in outputepub.filelist: + zf.create_system = 0 + outputepub.close() + + return (source,filecount) + +## Utility method for creating new tags. +def newTag(dom,name,attrs=None,text=None): + tag = dom.createElement(name) + if( attrs is not None ): + for attr in attrs.keys(): + tag.setAttribute(attr,attrs[attr]) + if( text is not None ): + tag.appendChild(dom.createTextNode(text)) + return tag + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/example.ini b/example.ini new file mode 100644 index 00000000..67392708 --- /dev/null +++ b/example.ini @@ -0,0 +1,40 @@ +## This is an example of what your personal configuration might look +## like. + +[defaults] +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Most common, I expect will be using this to save username/passwords +## for different sites. +[www.twilighted.net] +#username:YourPenname +#password:YourPassword + +[www.ficwad.com] +#username:YourUsername +#password:YourPassword + +[www.adastrafanfic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. +#is_adult:true + +## The [defaults] section here will override the system [defaults], +## but not format, site for site:format sections. +[defaults] +## Directories only useful in commandline or zip files. +#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext} +#output_filename: books/${site}/${authorId}/${title}-${storyId}${formatext} + +## For example, zip_output here will turn on zip for html and txt, but +## not epub because the system [epub] section explicitly says +## zip_output: false (epubs *are* specially formated zip files.) +#zip_output: true +#zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip + +## This section will override anything in the system defaults or other +## sections here. +[overrides] diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py new file mode 100644 index 00000000..4b17b853 --- /dev/null +++ b/fanficdownloader/BeautifulSoup.py @@ -0,0 +1,2014 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.2.0" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "<?%s?>" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!--%s-->" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!%s>" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '</%s>' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "<foo><bar></foo>" actually means + "<foo><bar></bar></foo>". + + [Another possible explanation is "<foo><bar /></foo>", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile('<!\s+([^<>]*)>'), + lambda x: '<!' + x.group(1) + '>') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + + <br/> (No space between name of closing tag and tag close) + <! --Comment--> (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: + <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'. + <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'. + <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'. + + <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. + <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' + <td><tr><td> *<td>* should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print "</%s> is not real!" % name + self.handle_data('</%s>' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '<![CDATA[': + k = self.rawdata.find(']]>', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a <p> tag should implicitly close the previous <p> tag. + + <p>Para1<p>Para2 + should be transformed into: + <p>Para1</p><p>Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a <blockquote> tag should _not_ implicitly close the previous + <blockquote> tag. + + Alice said: <blockquote>Bob said: <blockquote>Blah + should NOT be transformed into: + Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a <tr> tag should + implicitly close the previous <tr> tag within the same <table>, + but not close a <tr> tag in another table. + + <table><tr>Blah<tr>Blah + should be transformed into: + <table><tr>Blah</tr><tr>Blah + but, + <tr>Blah<table><tr>Blah + should NOT be transformed into + <tr>Blah<table></tr><tr>Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + <b>Foo<b>Bar</b></b> + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "<b>Foo<b>Bar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '</b></b>' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + <script> tags contain Javascript and should not be parsed, that + META tags may contain encoding information, and so on. + + This also makes it better for subclassing than BeautifulStoneSoup + or BeautifulSoup.""" + + RESET_NESTING_TAGS = buildTagMap('noscript') + NESTABLE_TAGS = {} + +class BeautifulSOAP(BeautifulStoneSoup): + """This class will push a tag with only a single string child into + the tag's parent as an attribute. The attribute's name is the tag + name, and the value is the string child. An example should give + the flavor of the change: + + <foo><bar>baz</bar></foo> + => + <foo bar="baz"><bar>baz</bar></foo> + + You can then access fooTag['bar'] instead of fooTag.barTag.string. + + This is, of course, useful for scraping structures that tend to + use subelements instead of attributes, such as SOAP messages. Note + that it modifies its input, so don't print the modified version + out. + + I'm not sure how many people really want to use this class; let me + know if you do. Mainly I like the name.""" + + def popTag(self): + if len(self.tagStack) > 1: + tag = self.tagStack[-1] + parent = self.tagStack[-2] + parent._getAttrMap() + if (isinstance(tag, Tag) and len(tag.contents) == 1 and + isinstance(tag.contents[0], NavigableString) and + not parent.attrMap.has_key(tag.name)): + parent[tag.name] = tag.contents[0] + BeautifulStoneSoup.popTag(self) + +#Enterprise class names! It has come to our attention that some people +#think the names of the Beautiful Soup parser classes are too silly +#and "unprofessional" for use in enterprise screen-scraping. We feel +#your pain! For such-minded folk, the Beautiful Soup Consortium And +#All-Night Kosher Bakery recommends renaming this file to +#"RobustParser.py" (or, in cases of extreme enterprisiness, +#"RobustParserBeanInterface.class") and using the following +#enterprise-friendly class aliases: +class RobustXMLParser(BeautifulStoneSoup): + pass +class RobustHTMLParser(BeautifulSoup): + pass +class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): + pass +class RobustInsanelyWackAssHTMLParser(MinimalSoup): + pass +class SimplifyingSOAPParser(BeautifulSOAP): + pass + +###################################################### +# +# Bonus library: Unicode, Dammit +# +# This class forces XML data into a standard format (usually to UTF-8 +# or Unicode). It is heavily based on code from Mark Pilgrim's +# Universal Feed Parser. It does not rewrite the XML or HTML to +# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi +# (XML) and BeautifulSoup.start_meta (HTML). + +# Autodetects character encodings. +# Download from http://chardet.feedparser.org/ +try: + import chardet +# import chardet.constants +# chardet.constants._debug = 1 +except ImportError: + chardet = None + +# cjkcodecs and iconv_codec make Python know about more character encodings. +# Both are available from http://cjkpython.i18n.org/ +# They're built in if you use Python 2.4. +try: + import cjkcodecs.aliases +except ImportError: + pass +try: + import iconv_codec +except ImportError: + pass + +class UnicodeDammit: + """A class for detecting the encoding of a *ML document and + converting it to a Unicode string. If the source encoding is + windows-1252, can replace MS smart quotes with their HTML or XML + equivalents.""" + + # This dictionary maps commonly seen values for "charset" in HTML + # meta tags to the corresponding Python codec names. It only covers + # values that aren't in Python's aliases and can't be determined + # by the heuristics in find_codec. + CHARSET_ALIASES = { "macintosh" : "mac-roman", + "x-sjis" : "shift-jis" } + + def __init__(self, markup, overrideEncodings=[], + smartQuotesTo='xml', isHTML=False): + self.declaredHTMLEncoding = None + self.markup, documentEncoding, sniffedEncoding = \ + self._detectEncoding(markup, isHTML) + self.smartQuotesTo = smartQuotesTo + self.triedEncodings = [] + if markup == '' or isinstance(markup, unicode): + self.originalEncoding = None + self.unicode = unicode(markup) + return + + u = None + for proposedEncoding in overrideEncodings: + u = self._convertFrom(proposedEncoding) + if u: break + if not u: + for proposedEncoding in (documentEncoding, sniffedEncoding): + u = self._convertFrom(proposedEncoding) + if u: break + + # If no luck and we have auto-detection library, try that: + if not u and chardet and not isinstance(self.markup, unicode): + u = self._convertFrom(chardet.detect(self.markup)['encoding']) + + # As a last resort, try utf-8 and windows-1252: + if not u: + for proposed_encoding in ("utf-8", "windows-1252"): + u = self._convertFrom(proposed_encoding) + if u: break + + self.unicode = u + if not u: self.originalEncoding = None + + def _subMSChar(self, orig): + """Changes a MS smart quote character to an XML or HTML + entity.""" + sub = self.MS_CHARS.get(orig) + if isinstance(sub, tuple): + if self.smartQuotesTo == 'xml': + sub = '&#x%s;' % sub[1] + else: + sub = '&%s;' % sub[0] + return sub + + def _convertFrom(self, proposed): + proposed = self.find_codec(proposed) + if not proposed or proposed in self.triedEncodings: + return None + self.triedEncodings.append(proposed) + markup = self.markup + + # Convert smart quotes to HTML if coming from an encoding + # that might have them. + if self.smartQuotesTo and proposed.lower() in("windows-1252", + "iso-8859-1", + "iso-8859-2"): + markup = re.compile("([\x80-\x9f])").sub \ + (lambda(x): self._subMSChar(x.group(1)), + markup) + + try: + # print "Trying to convert document to %s" % proposed + u = self._toUnicode(markup, proposed) + self.markup = u + self.originalEncoding = proposed + except Exception, e: + # print "That didn't work!" + # print e + return None + #print "Correct encoding: %s" % proposed + return self.markup + + def _toUnicode(self, data, encoding): + '''Given a string and its encoding, decodes the string into Unicode. + %encoding is a string recognized by encodings.aliases''' + + # strip Byte Order Mark (if present) + if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == '\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == '\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == '\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + newdata = unicode(data, encoding) + return newdata + + def _detectEncoding(self, xml_data, isHTML=False): + """Given a document, tries to detect its XML encoding.""" + xml_encoding = sniffed_xml_encoding = None + try: + if xml_data[:4] == '\x4c\x6f\xa7\x94': + # EBCDIC + xml_data = self._ebcdic_to_ascii(xml_data) + elif xml_data[:4] == '\x00\x3c\x00\x3f': + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ + and (xml_data[2:4] != '\x00\x00'): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x3f\x00': + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ + (xml_data[2:4] != '\x00\x00'): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\x00\x3c': + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x00\x00': + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\xfe\xff': + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\xff\xfe\x00\x00': + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == '\xef\xbb\xbf': + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' + xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + else: + sniffed_xml_encoding = 'ascii' + pass + except: + xml_encoding_match = None + xml_encoding_match = re.compile( + '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) + if not xml_encoding_match and isHTML: + regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I) + xml_encoding_match = regexp.search(xml_data) + if xml_encoding_match is not None: + xml_encoding = xml_encoding_match.groups()[0].lower() + if isHTML: + self.declaredHTMLEncoding = xml_encoding + if sniffed_xml_encoding and \ + (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', + 'iso-10646-ucs-4', 'ucs-4', 'csucs4', + 'utf-16', 'utf-32', 'utf_16', 'utf_32', + 'utf16', 'u16')): + xml_encoding = sniffed_xml_encoding + return xml_data, xml_encoding, sniffed_xml_encoding + + + def find_codec(self, charset): + return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ + or (charset and self._codec(charset.replace("-", ""))) \ + or (charset and self._codec(charset.replace("-", "_"))) \ + or charset + + def _codec(self, charset): + if not charset: return charset + codec = None + try: + codecs.lookup(charset) + codec = charset + except (LookupError, ValueError): + pass + return codec + + EBCDIC_TO_ASCII_MAP = None + def _ebcdic_to_ascii(self, s): + c = self.__class__ + if not c.EBCDIC_TO_ASCII_MAP: + emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, + 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, + 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, + 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, + 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, + 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, + 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, + 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, + 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, + 201,202,106,107,108,109,110,111,112,113,114,203,204,205, + 206,207,208,209,126,115,116,117,118,119,120,121,122,210, + 211,212,213,214,215,216,217,218,219,220,221,222,223,224, + 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, + 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, + 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, + 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, + 250,251,252,253,254,255) + import string + c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ + ''.join(map(chr, range(256))), ''.join(map(chr, emap))) + return s.translate(c.EBCDIC_TO_ASCII_MAP) + + MS_CHARS = { '\x80' : ('euro', '20AC'), + '\x81' : ' ', + '\x82' : ('sbquo', '201A'), + '\x83' : ('fnof', '192'), + '\x84' : ('bdquo', '201E'), + '\x85' : ('hellip', '2026'), + '\x86' : ('dagger', '2020'), + '\x87' : ('Dagger', '2021'), + '\x88' : ('circ', '2C6'), + '\x89' : ('permil', '2030'), + '\x8A' : ('Scaron', '160'), + '\x8B' : ('lsaquo', '2039'), + '\x8C' : ('OElig', '152'), + '\x8D' : '?', + '\x8E' : ('#x17D', '17D'), + '\x8F' : '?', + '\x90' : '?', + '\x91' : ('lsquo', '2018'), + '\x92' : ('rsquo', '2019'), + '\x93' : ('ldquo', '201C'), + '\x94' : ('rdquo', '201D'), + '\x95' : ('bull', '2022'), + '\x96' : ('ndash', '2013'), + '\x97' : ('mdash', '2014'), + '\x98' : ('tilde', '2DC'), + '\x99' : ('trade', '2122'), + '\x9a' : ('scaron', '161'), + '\x9b' : ('rsaquo', '203A'), + '\x9c' : ('oelig', '153'), + '\x9d' : '?', + '\x9e' : ('#x17E', '17E'), + '\x9f' : ('Yuml', ''),} + +####################################################################### + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) + print soup.prettify() diff --git a/fanficdownloader/__init__.py b/fanficdownloader/__init__.py new file mode 100644 index 00000000..40a96afc --- /dev/null +++ b/fanficdownloader/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py new file mode 100644 index 00000000..6b841870 --- /dev/null +++ b/fanficdownloader/adapters/__init__.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os, re, sys, glob, types +from os.path import dirname, basename, normpath +import logging +import urlparse as up + +from .. import exceptions as exceptions + +## must import each adapter here. + +import adapter_test1 +import adapter_fanfictionnet +import adapter_castlefansorg +import adapter_fanfictionnet +import adapter_fictionalleyorg +import adapter_fictionpresscom +import adapter_ficwadcom +import adapter_fimfictionnet +import adapter_harrypotterfanfictioncom +import adapter_mediaminerorg +import adapter_potionsandsnitchesnet +import adapter_tenhawkpresentscom +import adapter_adastrafanficcom +import adapter_thewriterscoffeeshopcom +import adapter_tthfanficorg +import adapter_twilightednet +import adapter_twiwritenet +import adapter_whoficcom +import adapter_siyecouk +import adapter_archiveofourownorg +import adapter_ficbooknet + +## This bit of complexity allows adapters to be added by just adding +## importing. It eliminates the long if/else clauses we used to need +## to pick out the adapter. + +## List of registered site adapters. +__class_list = [] + +def imports(): + for name, val in globals().items(): + if isinstance(val, types.ModuleType): + yield val.__name__ + +for x in imports(): + if "fanficdownloader.adapters.adapter_" in x: + #print x + __class_list.append(sys.modules[x].getClass()) + +def getAdapter(config,url): + ## fix up leading protocol. + fixedurl = re.sub(r"(?i)^[htp]+[:/]+","http://",url.strip()) + if not fixedurl.startswith("http"): + fixedurl = "http://%s"%url + ## remove any trailing '#' locations. + fixedurl = re.sub(r"#.*$","",fixedurl) + + ## remove any trailing '&' parameters--?sid=999 will be left. + ## that's all that any of the current adapters need or want. + fixedurl = re.sub(r"&.*$","",fixedurl) + + parsedUrl = up.urlparse(fixedurl) + domain = parsedUrl.netloc.lower() + if( domain != parsedUrl.netloc ): + fixedurl = fixedurl.replace(parsedUrl.netloc,domain) + + logging.debug("site:"+domain) + cls = getClassFor(domain) + if not cls: + logging.debug("trying site:www."+domain) + cls = getClassFor("www."+domain) + fixedurl = fixedurl.replace("http://","http://www.") + if cls: + adapter = cls(config,fixedurl) # raises InvalidStoryURL + return adapter + # No adapter found. + raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] ) + +def getClassFor(domain): + for cls in __class_list: + if cls.matchesSite(domain): + return cls diff --git a/fanficdownloader/adapters/adapter_adastrafanficcom.py b/fanficdownloader/adapters/adapter_adastrafanficcom.py new file mode 100644 index 00000000..bf840d68 --- /dev/null +++ b/fanficdownloader/adapters/adapter_adastrafanficcom.py @@ -0,0 +1,227 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class AdAstraFanficComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','aaff') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Star Trek") + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.adastrafanfic.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&warning=5" + else: + addurl="" + + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data: + raise exceptions.AdultCheckRequired(self.url) + + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + data = data[data.index("<body"):] + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## <meta name='description' content='<p>Description</p> ...' > + ## Summary, strangely, is in the content attr of a <meta name='description'> tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = '' + while value and not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + # sometimes poorly formated desc (<p> w/o </p>) leads + # to all labels being included. + svalue=svalue[:svalue.find('<span class="label">')] + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.strip(), "%d %b %Y")) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%d %b %Y")) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + data = data[data.index("<body"):] + + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return AdAstraFanficComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_archiveofourownorg.py b/fanficdownloader/adapters/adapter_archiveofourownorg.py new file mode 100644 index 00000000..05e4d261 --- /dev/null +++ b/fanficdownloader/adapters/adapter_archiveofourownorg.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +def getClass(): + return ArchiveOfOurOwnOrgAdapter + + +class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["utf8", + "Windows-1252"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/works/'+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','ao3') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y-%b-%d" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.archiveofourown.org' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/works/123456" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/works/")+r"\d+(/chapters/\d+)?/?$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "?view_adult=true" + else: + addurl="" + + meta = self.url+addurl + url = self.url+'/navigate'+addurl + logging.debug("URL: "+meta) + + try: + data = self._fetchUrl(url) + meta = self._fetchUrl(meta) + + if "This work could have adult content. If you proceed you have agreed that you are willing to see such content." in meta: + raise exceptions.AdultCheckRequired(self.url) + + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.meta) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + metasoup = bs.BeautifulSoup(meta) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r"^/works/\w+")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"^/users/\w+/pseuds/\w+")) + self.story.setMetadata('authorId',a['href'].split('/')[2]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.text) + + # Find the chapters: + chapters=soup.findAll('a', href=re.compile(r'/works/'+self.story.getMetadata('storyId')+"/chapters/\d+$")) + self.story.setMetadata('numChapters',len(chapters)) + logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) + for x in range(0,len(chapters)): + # just in case there's tags, like <i> in chapter titles. + chapter=chapters[x] + if len(chapters)==1: + self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+chapter['href']+addurl)) + else: + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href']+addurl)) + + + + a = metasoup.find('blockquote',{'class':'userstuff'}) + if a != None: + self.story.setMetadata('description',a.text) + + a = metasoup.find('dd',{'class':"rating tags"}) + if a != None: + self.story.setMetadata('rating',stripHTML(a.text)) + + a = metasoup.find('dd',{'class':"fandom tags"}) + fandoms = a.findAll('a',{'class':"tag"}) + fandomstext = [fandom.string for fandom in fandoms] + for fandom in fandomstext: + self.story.addToList('category',fandom.string) + + a = metasoup.find('dd',{'class':"warning tags"}) + if a != None: + warnings = a.findAll('a',{'class':"tag"}) + warningstext = [warning.string for warning in warnings] + for warning in warningstext: + if warning.string == "Author Chose Not To Use Archive Warnings": + warning.string = "No Archive Warnings Apply" + if warning.string != "No Archive Warnings Apply": + self.story.addToList('warnings',warning.string) + + a = metasoup.find('dd',{'class':"freeform tags"}) + if a != None: + genres = a.findAll('a',{'class':"tag"}) + genrestext = [genre.string for genre in genres] + for genre in genrestext: + self.story.addToList('genre',genre.string) + a = metasoup.find('dd',{'class':"category tags"}) + if a != None: + genres = a.findAll('a',{'class':"tag"}) + genrestext = [genre.string for genre in genres] + for genre in genrestext: + if genre != "Gen": + self.story.addToList('genre',genre.string) + + a = metasoup.find('dd',{'class':"character tags"}) + if a != None: + chars = a.findAll('a',{'class':"tag"}) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + a = metasoup.find('dd',{'class':"relationship tags"}) + if a != None: + chars = a.findAll('a',{'class':"tag"}) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + + stats = metasoup.find('dl',{'class':'stats'}) + dt = stats.findAll('dt') + dd = stats.findAll('dd') + for x in range(0,len(dt)): + label = dt[x].text + value = dd[x].text + + if 'Words:' in label: + self.story.setMetadata('numWords', value) + + if 'Chapters:' in label: + if value.split('/')[0] == value.split('/')[1]: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + if 'Completed' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + + try: + # Find Series name from series URL. + a = metasoup.find('dd',{'class':"series"}) + b = a.find('a', href=re.compile(r"/series/\d+")) + series_name = b.string + series_url = 'http://'+self.host+'/fanfic/'+b['href'] + series_index = int(a.text.split(' ')[1]) + self.setSeries(series_name, series_index) + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + + chapter=bs.BeautifulSoup('<div class="story"></div>') + soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')) + + headnotes = soup.find('div', {'class' : "preface group"}).find('div', {'class' : "notes module"}) + if headnotes != None: + headnotes = headnotes.find('blockquote', {'class' : "userstuff"}) + if headnotes != None: + chapter.append(bs.BeautifulSoup("<b>Author's Note:</b>")) + chapter.append(headnotes) + + chapsumm = soup.find('div', {'id' : "summary"}) + if chapsumm != None: + chapsumm = chapsumm.find('blockquote') + chapter.append(bs.BeautifulSoup("<b>Summary for the Chapter:</b>")) + chapter.append(chapsumm) + chapnotes = soup.find('div', {'id' : "notes"}) + if chapnotes != None: + chapnotes = chapnotes.find('blockquote') + if chapnotes != None: + chapter.append(bs.BeautifulSoup("<b>Notes for the Chapter:</b>")) + chapter.append(chapnotes) + + text = soup.find('div', {'class' : "userstuff module"}) + chtext = text.find('h3', {'class' : "landmark heading"}) + if chtext: + chtext.extract() + chapter.append(text) + + chapfoot = soup.find('div', {'class' : "end notes module", 'role' : "complementary"}) + if chapfoot != None: + chapfoot = chapfoot.find('blockquote') + chapter.append(bs.BeautifulSoup("<b>Notes for the Chapter:</b>")) + chapter.append(chapfoot) + + footnotes = soup.find('div', {'id' : "work_endnotes"}) + if footnotes != None: + footnotes = footnotes.find('blockquote') + chapter.append(bs.BeautifulSoup("<b>Author's Note:</b>")) + chapter.append(footnotes) + + if None == soup: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(chapter) diff --git a/fanficdownloader/adapters/adapter_castlefansorg.py b/fanficdownloader/adapters/adapter_castlefansorg.py new file mode 100644 index 00000000..4b8d40f1 --- /dev/null +++ b/fanficdownloader/adapters/adapter_castlefansorg.py @@ -0,0 +1,308 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +# By virtue of being recent and requiring both is_adult and user/pass, +# adapter_fanficcastletvnet.py is the best choice for learning to +# write adapters--especially for sites that use the eFiction system. +# Most sites that have ".../viewstory.php?sid=123" in the story URL +# are eFiction. + +# For non-eFiction sites, it can be considerably more complex, but +# this is still a good starting point. + +# In general an 'adapter' needs to do these five things: + +# - 'Register' correctly with the downloader +# - Site Login (if needed) +# - 'Are you adult?' check (if needed--some do one, some the other, some both) +# - Grab the chapter list +# - Grab the story meta-data (some (non-eFiction) adapters have to get it from the author page) +# - Grab the chapter texts + +# Search for XXX comments--that's where things are most likely to need changing. + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return CastleFansOrgAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class CastleFansOrgAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfic/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','cslf') # XXX + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Castle") # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%b %d, %Y" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'castlefans.org' # XXX + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/fanfic/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/fanfic/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/fanfic/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=4" # XXX + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + if "Age Consent Required" in data: # XXX + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfic/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + ## Not all sites use Genre, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + ## Not all sites use Warnings, but there's no harm to + ## leaving it in. Check to make sure the type_id number + ## is correct, though--it's site specific. + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/fanfic/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(div) diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py new file mode 100644 index 00000000..73c8f635 --- /dev/null +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -0,0 +1,277 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 +import time + +from .. import BeautifulSoup as bs +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class FanFictionNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','ffnet') + + # get storyId from url--url validation guarantees second part is storyId + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + + # normalized story URL. + self._setURL("http://"+self.getSiteDomain()\ + +"/s/"+self.story.getMetadata('storyId')+"/1/") + + # ffnet update emails have the latest chapter URL. + # Frequently, when they arrive, not all the servers have the + # latest chapter yet and going back to chapter 1 to pull the + # chapter list doesn't get the latest. So save and use the + # original URL given to pull chapter list & metadata. + self.origurl = url + if "http://m." in self.origurl: + ## accept m(mobile)url, but use www. + self.origurl = self.origurl.replace("http://m.","http://www.") + + @staticmethod + def getSiteDomain(): + return 'www.fanfiction.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.fanfiction.net','m.fanfiction.net'] + + def getSiteExampleURLs(self): + return "http://www.fanfiction.net/s/1234/1/ http://www.fanfiction.net/s/1234/12/ http://www.fanfiction.net/s/1234/1/Story_Title" + + def getSiteURLPattern(self): + return r"http://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[a-zA-Z0-9_-]+)?/?$" + + def extractChapterUrlsAndMetadata(self): + + # fetch the chapter. From that we will get almost all the + # metadata and chapter list + + url = self.origurl + logging.debug("URL: "+url) + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + data = self._fetchUrl(url) + #print("\n===================\n%s\n===================\n"%data) + soup = bs.BeautifulSoup(data) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(url) + else: + raise e + + if "Unable to locate story with id of " in data: + raise exceptions.StoryDoesNotExist(url) + + # some times "Chapter not found...", sometimes "Chapter text not found..." + if "not found. Please check to see you are not using an outdated url." in data: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! 'Chapter not found. Please check to see you are not using an outdated url.'" % url) + + try: + # rather nasty way to check for a newer chapter. ffnet has a + # tendency to send out update notices in email before all + # their servers are showing the update on the first chapter. + try: + chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option')) + # get chapter part of url. + except: + chapcount = 1 + chapter = url.split('/',)[5] + tryurl = "http://%s/s/%s/%d/"%(self.getSiteDomain(), + self.story.getMetadata('storyId'), + chapcount+1) + print('=Trying newer chapter: %s' % tryurl) + newdata = self._fetchUrl(tryurl) + if "not found. Please check to see you are not using an outdated url." \ + not in newdata: + print('=======Found newer chapter: %s' % tryurl) + soup = bs.BeautifulSoup(newdata) + except: + pass + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"^/u/\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[2]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + + # start by finding a script towards the bottom that has a + # bunch of useful stuff in it. + + # var storyid = 6577076; + # var chapter = 1; + # var chapters = 17; + # var words = 42787; + # var userid = 2645830; + # var title = 'The+Invitation'; + # var title_t = 'The Invitation'; + # var summary = 'Dudley Dursley would be the first to say he lived a very normal life. But what happens when he gets invited to his cousin Harry Potter\'s wedding? Will Dudley get the courage to apologize for the torture he caused all those years ago? Harry/Ginny story.'; + # var categoryid = 224; + # var cat_title = 'Harry Potter'; + # var datep = '12-21-10'; + # var dateu = '04-06-11'; + # var author = 'U n F a b u l o u s M e'; + + for script in soup.findAll('script', src=None): + if not script: + continue + if not script.string: + continue + if 'var storyid' in script.string: + for line in script.string.split('\n'): + m = re.match(r"^ +var ([^ ]+) = '?(.*?)'?;\r?$",line) + if m == None : continue + var,value = m.groups() + # remove javascript escaping from values. + value = re.sub(r'\\(.)',r'\1',value) + #print var,value + if 'words' in var: + self.story.setMetadata('numWords', value) + if 'title_t' in var: + self.story.setMetadata('title', value) + if 'summary' in var: + self.story.setMetadata('description', value) + if 'datep' in var: + self.story.setMetadata('datePublished',makeDate(value, '%m-%d-%y')) + if 'dateu' in var: + self.story.setMetadata('dateUpdated',makeDate(value, '%m-%d-%y')) + if 'cat_title' in var: + if "Crossover" in value: + value = re.sub(r' Crossover$','',value) + for c in value.split(' and '): + self.story.addToList('category',c) + # Screws up when the category itself + # contains ' and '. But that's rare + # and the only alternative is to find + # the 'Crossover' category URL and + # parse that page to search for <a> + # with href /crossovers/(name)/(num)/ + # <a href="/crossovers/Harry_Potter/224/">Harry Potter</a> + # <a href="/crossovers/Naruto/1402/">Naruto</a> + else: + self.story.addToList('category',value) + break # for script in soup.findAll('script', src=None): + + # Find the chapter selector + select = soup.find('select', { 'name' : 'chapter' } ) + + if select is None: + # no selector found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = u'http://%s/s/%s/%s/' % ( self.getSiteDomain(), + self.story.getMetadata('storyId'), + o['value']) + # just in case there's tags, like <i> in chapter titles. + title = u"%s" % o + title = re.sub(r'<[^>]+>','',title) + self.chapterUrls.append((title,url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Pull some additional data from html. Find Rating and look around it. + + a = soup.find('a', href='http://www.fictionratings.com/') + self.story.setMetadata('rating',a.string) + + # used below to get correct characters. + metatext = a.findNext(text=re.compile(r' - Reviews:')) + if metatext == None: # indicates there's no Reviews, look for id: instead. + metatext = a.findNext(text=re.compile(r' - id:')) + + # after Rating, the same bit of text containing id:123456 contains + # Complete--if completed. + if 'Complete' in a.findNext(text=re.compile(r'id:'+self.story.getMetadata('storyId'))): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + # Parse genre(s) from <meta name="description" content="..." + # <meta name="description" content="A Transformers/Beast Wars - Humor fanfiction with characters Prowl & Sideswipe. Story summary: Sideswipe is bored. Prowl appears to be so, too or at least, Sideswipe thinks he looks bored . So Sideswipe entertains them. After all, what's more fun than a race? Song-fic."> + # <meta name="description" content="Chapter 1 of a Transformers/Beast Wars - Adventure/Friendship fanfiction with characters Bumblebee. TFA: What would you do if you was being abused all you life? Follow NightRunner as she goes through her spark breaking adventure of getting away from her father.."> + # (fp)<meta name="description" content="Chapter 1 of a Sci-Fi - Adventure/Humor fiction. Felix Max was just your regular hyperactive kid until he accidently caused his own fathers death. Now he has meta-humans trying to hunt him down with a corrupt goverment to back them up. Oh, and did I mention he has no Powers yet?."> + # <meta name="description" content="Chapter 1 of a Bleach - Adventure/Angst fanfiction with characters Ichigo K. & Neliel T. O./Nel. Time travel with a twist. Time can be a real bi***. Ichigo finds that fact out when he accidentally goes back in time. Is this his second chance or is fate just screwing with him. Not a crack fic.IchixNelXHime."> + # <meta name="description" content="Chapter 1 of a Harry Potter and Transformers - Humor/Adventure crossover fanfiction with characters: Harry P. & Ironhide. IT’s one thing to be tossed thru the Veil for something he didn’t do. It was quite another to wake in his animigus form in a world not his own. Harry just knew someone was laughing at him somewhere. Mech/Mech pairings inside.."> + m = re.match(r"^(?:Chapter \d+ of a|A) (?:.*?) (?:- (?P<genres>.*?) )?(?:crossover )?(?:fan)?fiction(?P<chars>[ ]+with characters)?", + soup.find('meta',{'name':'description'})['content']) + if m != None: + genres=m.group('genres') + if genres != None: + # Hurt/Comfort is one genre. + genres=re.sub('Hurt/Comfort','Hurt-Comfort',genres) + for g in genres.split('/'): + self.story.addToList('genre',g) + + if m.group('chars') != None: + + # At this point we've proven that there's character(s) + # We can't reliably parse characters out of meta name="description". + # There's no way to tell that "with characters Ichigo K. & Neliel T. O./Nel. " ends at "Nel.", not "T." + # But we can pull them from the reviewstext line, now that we know about existance of chars. + # reviewstext can take form of: + # - English - Shinji H. - Updated: 01-13-12 - Published: 12-20-11 - id:7654123 + # - English - Adventure/Angst - Ichigo K. & Neliel T. O./Nel - Reviews: + # - English - Humor/Adventure - Harry P. & Ironhide - Reviews: + mc = re.match(r" - (?P<lang>[^ ]+ - )(?P<genres>[^ ]+ - )? (?P<chars>.+?) - (Reviews|Updated|Published)", + metatext) + chars = mc.group("chars") + for c in chars.split(' & '): + self.story.addToList('characters',c) + m = re.match(r" - (?P<lang>[^ ]+)",metatext) + if m.group('lang') != None: + self.story.setMetadata('language',m.group('lang')) + + return + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + time.sleep(0.5) ## ffnet(and, I assume, fpcom) tends to fail + ## more if hit too fast. This is in + ## additional to what ever the + ## slow_down_sleep_time setting is. + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(data) + + ## Remove the 'share' button. + sharediv = soup.find('div', {'class' : 'a2a_kit a2a_default_style'}) + if sharediv: + sharediv.extract() + else: + logging.debug('share button div not found') + + div = soup.find('div', {'id' : 'storytext'}) + + if None == div: + logging.debug('div id=storytext not found. data:%s'%data) + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(div) + +def getClass(): + return FanFictionNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_ficbooknet.py b/fanficdownloader/adapters/adapter_ficbooknet.py new file mode 100644 index 00000000..c9a98bc7 --- /dev/null +++ b/fanficdownloader/adapters/adapter_ficbooknet.py @@ -0,0 +1,221 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import datetime +import logging +import re +import urllib2 +from .. import translit + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + + +def getClass(): + return FicBookNetAdapter + + +class FicBookNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["utf8", + "Windows-1252"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/readfic/'+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','fbn') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %m %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.ficbook.net' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/readfic/12345" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/readfic/")+r"\d+" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + url=self.url + logging.debug("URL: "+url) + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # Now go hunting for all the meta data and the chapter list. + + table = soup.find('td',{'width':'50%'}) + + ## Title + a = soup.find('h1') + self.story.setMetadata('title',a.string) + logging.debug("Title: (%s)"%self.story.getMetadata('title')) + + # Find authorid and URL from... author url. + a = table.find('a') + self.story.setMetadata('authorId',a.text) # Author's name is unique + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.text) + logging.debug("Author: (%s)"%self.story.getMetadata('author')) + + # Find the chapters: + chapters = soup.find('div', {'class' : 'part_list'}) + if chapters != None: + chapters=chapters.findAll('a', href=re.compile(r'/readfic/'+self.story.getMetadata('storyId')+"/\d+#part_content$")) + self.story.setMetadata('numChapters',len(chapters)) + for x in range(0,len(chapters)): + chapter=chapters[x] + churl='http://'+self.host+chapter['href'] + self.chapterUrls.append((stripHTML(chapter),churl)) + if x == 0: + pubdate = translit.translit(stripHTML(bs.BeautifulSoup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span'))) + if x == len(chapters)-1: + update = translit.translit(stripHTML(bs.BeautifulSoup(self._fetchUrl(churl)).find('div', {'class' : 'part_added'}).find('span'))) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + self.story.setMetadata('numChapters',1) + pubdate=translit.translit(stripHTML(soup.find('div', {'class' : 'part_added'}).find('span'))) + update=pubdate + + logging.debug("numChapters: (%s)"%self.story.getMetadata('numChapters')) + + if not ',' in pubdate: + pubdate=datetime.date.today().strftime(self.dateformat) + if not ',' in update: + update=datetime.date.today().strftime(self.dateformat) + pubdate=pubdate.split(',')[0] + update=update.split(',')[0] + + fullmon = {"yanvarya":"01", "января":"01", + "fievralya":"02", "февраля":"02", + "marta":"03", "марта":"03", + "aprielya":"04", "апреля":"04", + "maya":"05", "мая":"05", + "iyunya":"06", "июня":"06", + "iyulya":"07", "июля":"07", + "avghusta":"08", "августа":"08", + "sentyabrya":"09", "сентября":"09", + "oktyabrya":"10", "октября":"10", + "noyabrya":"11", "ноября":"11", + "diekabrya":"12", "декабря":"12" } + for (name,num) in fullmon.items(): + if name in pubdate: + pubdate = pubdate.replace(name,num) + if name in update: + update = update.replace(name,num) + + self.story.setMetadata('dateUpdated', makeDate(update, self.dateformat)) + self.story.setMetadata('datePublished', makeDate(pubdate, self.dateformat)) + self.story.setMetadata('language','Russian') + + pr=soup.find('a', href=re.compile(r'/printfic/\w+')) + pr='http://'+self.host+pr['href'] + pr = bs.BeautifulSoup(self._fetchUrl(pr)) + pr=pr.findAll('div', {'class' : 'part_text'}) + i=0 + for part in pr: + i=i+len(stripHTML(part).split(' ')) + self.story.setMetadata('numWords', str(i)) + + i=0 + fandoms = table.findAll('a', href=re.compile(r'/fanfiction/\w+')) + for fandom in fandoms: + self.story.addToList('category',fandom.string) + i=i+1 + if i > 1: + self.story.addToList('genre', 'Кроссовер') + + meta=table.findAll('a', href=re.compile(r'/ratings/')) + i=0 + for m in meta: + if i == 0: + self.story.setMetadata('rating', m.find('b').text) + i=1 + elif i == 1: + if not "," in m.nextSibling: + i=2 + self.story.addToList('genre', m.find('b').text) + elif i == 2: + self.story.addToList('warnings', m.find('b').text) + + + if table.find('span', {'style' : 'color: green'}): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In Progress') + + + tags = table.findAll('b') + for tag in tags: + label = translit.translit(tag.text) + if 'Piersonazhi:' in label or 'Персонажи:' in label: + chars=tag.nextSibling.string.split(', ') + for char in chars: + self.story.addToList('characters',char) + break + + summary=soup.find('span', {'class' : 'urlize'}) + self.story.setMetadata('description', summary.text) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + chapter = soup.find('div', {'class' : 'public_beta'}) + if chapter == None: + chapter = soup.find('div', {'class' : 'public_beta_disabled'}) + + if None == chapter: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(chapter) diff --git a/fanficdownloader/adapters/adapter_fictionalleyorg.py b/fanficdownloader/adapters/adapter_fictionalleyorg.py new file mode 100644 index 00000000..fba44110 --- /dev/null +++ b/fanficdownloader/adapters/adapter_fictionalleyorg.py @@ -0,0 +1,230 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class FictionAlleyOrgSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fa') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Harry Potter") + self.is_adult=False + + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('authorId',m.group('auth')) + self.story.setMetadata('storyId',m.group('id')) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + # normalized story URL. + self._setURL(url) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + @staticmethod + def getSiteDomain(): + return 'www.fictionalley.org' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/authors/drt/DA.html http://"+self.getSiteDomain()+"/authors/drt/JOTP01a.html" + + def getSiteURLPattern(self): + # http://www.fictionalley.org/authors/drt/DA.html + # http://www.fictionalley.org/authors/drt/JOTP01a.html + return re.escape("http://"+self.getSiteDomain())+"/authors/(?P<auth>[a-zA-Z0-9_]+)/(?P<id>[a-zA-Z0-9_]+)\.html" + + def _postFetchWithIAmOld(self,url): + if self.is_adult or self.getConfig("is_adult"): + params={'iamold':'Yes', + 'action':'ageanswer'} + logging.info("Attempting to get cookie for %s" % url) + ## posting on list doesn't work, but doesn't hurt, either. + data = self._postUrl(url,params) + else: + data = self._fetchUrl(url) + return data + + def extractChapterUrlsAndMetadata(self): + + ## could be either chapter list page or one-shot text page. + url = self.url + logging.debug("URL: "+url) + + try: + data = self._postFetchWithIAmOld(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + chapterdata = data + # If chapter list page, get the first chapter to look for adult check + chapterlinklist = soup.findAll('a',{'class':'chapterlink'}) + if chapterlinklist: + chapterdata = self._postFetchWithIAmOld(chapterlinklist[0]['href']) + + if "Are you over seventeen years old" in chapterdata: + raise exceptions.AdultCheckRequired(self.url) + + if not chapterlinklist: + # no chapter list, chapter URL: change to list link. + # second a tag inside div breadcrumbs + storya = soup.find('div',{'class':'breadcrumbs'}).findAll('a')[1] + self._setURL(storya['href']) + url=self.url + logging.debug("Normalizing to URL: "+url) + ## title's right there... + self.story.setMetadata('title',storya.string) + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(data) + chapterlinklist = soup.findAll('a',{'class':'chapterlink'}) + else: + ## still need title from somewhere. If chapterlinklist, + ## then chapterdata contains a chapter, find title the + ## same way. + chapsoup = bs.BeautifulSoup(chapterdata) + storya = chapsoup.find('div',{'class':'breadcrumbs'}).findAll('a')[1] + self.story.setMetadata('title',storya.string) + del chapsoup + + del chapterdata + + ## authorid already set. + ## <h1 class="title" align="center">Just Off The Platform II by <a href="http://www.fictionalley.org/authors/drt/">DrT</a></h1> + authora=soup.find('h1',{'class':'title'}).find('a') + self.story.setMetadata('author',authora.string) + self.story.setMetadata('authorUrl',authora['href']) + + if len(chapterlinklist) == 1: + self.chapterUrls.append((self.story.getMetadata('title'),chapterlinklist[0]['href'])) + else: + # Find the chapters: + for chapter in chapterlinklist: + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Go scrape the rest of the metadata from the author's page. + data = self._fetchUrl(self.story.getMetadata('authorUrl')) + soup = bs.BeautifulSoup(data) + + # <dl><dt><a class = "Rid story" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/TMH.html"> + # [Rid] The Magical Hottiez</a> by <a class = "pen_name" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/">Aafro Man Ziegod</a> </small></dt> + # <dd><small class = "storyinfo"><a href = "http://www.fictionalley.org/ratings.html" target = "_new">Rating:</a> PG-13 - Spoilers: PS/SS, CoS, PoA, GoF, QTTA, FB - 4264 hits - 5060 words<br /> + # Genre: Humor, Romance - Main character(s): None - Ships: None - Era: Multiple Eras<br /></small> + # Chaos ensues after Witch Weekly, seeking to increase readers, decides to create a boyband out of five seemingly talentless wizards: Harry Potter, Draco Malfoy, Ron Weasley, Neville Longbottom, and Oliver "Toss Your Knickers Here" Wood.<br /> + # <small class = "storyinfo">Published: June 3, 2002 (between Goblet of Fire and Order of Phoenix) - Updated: June 3, 2002</small> + # </dd></dl> + + storya = soup.find('a',{'href':self.story.getMetadata('storyUrl')}) + storydd = storya.findNext('dd') + + # Rating: PG - Spoilers: None - 2525 hits - 736 words + # Genre: Humor - Main character(s): H, R - Ships: None - Era: Multiple Eras + # Harry and Ron are back at it again! They reeeeeeally don't want to be back, because they know what's awaiting them. "VH1 Goes Inside..." is back! Why? 'Cos there are soooo many more couples left to pick on. + # Published: September 25, 2004 (between Order of Phoenix and Half-Blood Prince) - Updated: September 25, 2004 + + ## change to text and regexp find. + metastr = stripHTML(storydd).replace('\n',' ').replace('\t',' ') + + m = re.match(r".*?Rating: (.+?) -.*?",metastr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genre: (.+?) -.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Published: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr) + if m: + self.story.setMetadata('datePublished',makeDate(m.group(1), "%B %d, %Y")) + + m = re.match(r".*?Updated: ([a-zA-Z]+ \d\d?, \d\d\d\d).*?",metastr) + if m: + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%B %d, %Y")) + + m = re.match(r".*? (\d+) words Genre.*?",metastr) + if m: + self.story.setMetadata('numWords', m.group(1)) + + for small in storydd.findAll('small'): + small.extract() ## removes the <small> tags, leaving only the summary. + self.story.setMetadata('description',stripHTML(storydd)) + + return + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # find <!-- headerend --> & <!-- footerstart --> and + # replaced with matching div pair for easier parsing. + # Yes, it's an evil kludge, but what can ya do? Using + # something other than div prevents soup from pairing + # our div with poor html inside the story text. + data = data.replace('<!-- headerend -->','<crazytagstringnobodywouldstumbleonaccidently id="storytext">').replace('<!-- footerstart -->','</crazytagstringnobodywouldstumbleonaccidently>') + + # problems with some stories confusing Soup. This is a nasty + # hack, but it works. + data = data[data.index("<crazytagstringnobodywouldstumbleonaccidently"):] + + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + body = soup.findAll('body') ## some stories use a nested body and body + ## tag, in which case we don't + ## need crazytagstringnobodywouldstumbleonaccidently + ## and use the second one instead. + if len(body)>1: + text = body[1] + text.name='div' # force to be a div to avoid multiple body tags. + else: + text = soup.find('crazytagstringnobodywouldstumbleonaccidently', {'id' : 'storytext'}) + text.name='div' # change to div tag. + + if not data or not text: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(text) + +def getClass(): + return FictionAlleyOrgSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_fictionpresscom.py b/fanficdownloader/adapters/adapter_fictionpresscom.py new file mode 100644 index 00000000..76b2353a --- /dev/null +++ b/fanficdownloader/adapters/adapter_fictionpresscom.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 +import time + +## They're from the same people and pretty much identical. +from adapter_fanfictionnet import FanFictionNetSiteAdapter + +class FictionPressComSiteAdapter(FanFictionNetSiteAdapter): + + def __init__(self, config, url): + FanFictionNetSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fpcom') + + @staticmethod + def getSiteDomain(): + return 'www.fictionpress.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.fictionpress.com','m.fictionpress.com'] + + def getSiteExampleURLs(self): + return "http://www.fictionpress.com/s/1234/1/ http://www.fictionpress.com/s/1234/12/ http://www.fictionpress.com/s/1234/1/Story_Title" + + def getSiteURLPattern(self): + return r"http://(www|m)?\.fictionpress\.com/s/\d+(/\d+)?(/|/[a-zA-Z0-9_-]+)?/?$" + +def getClass(): + return FictionPressComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_ficwadcom.py b/fanficdownloader/adapters/adapter_ficwadcom.py new file mode 100644 index 00000000..e556041b --- /dev/null +++ b/fanficdownloader/adapters/adapter_ficwadcom.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 +import time +import httplib, urllib + +from .. import BeautifulSoup as bs +from .. import exceptions as exceptions +from ..htmlcleanup import stripHTML + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class FicwadComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fw') + + # get storyId from url--url validation guarantees second part is storyId + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + + self.username = "NoneGiven" + self.password = "" + + @staticmethod + def getSiteDomain(): + return 'www.ficwad.com' + + def getSiteExampleURLs(self): + return "http://www.ficwad.com/story/137169" + + def getSiteURLPattern(self): + return re.escape(r"http://"+self.getSiteDomain())+"/story/\d+?$" + + def performLogin(self,url): + params = {} + + if self.password: + params['username'] = self.username + params['password'] = self.password + else: + params['username'] = self.getConfig("username") + params['password'] = self.getConfig("password") + + loginUrl = 'http://' + self.getSiteDomain() + '/account/login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['username'])) + d = self._postUrl(loginUrl,params) + + if "Login attempt failed..." in d: + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['username'])) + raise exceptions.FailedToLogin(url,params['username']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + # fetch the chapter. From that we will get almost all the + # metadata and chapter list + + url = self.url + logging.debug("URL: "+url) + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + h3 = soup.find('h3') + storya = h3.find('a',href=re.compile("^/story/\d+$")) + if storya : # if there's a story link in the h3 header, this is a chapter page. + # normalize story URL on chapter list. + self.story.setMetadata('storyId',storya['href'].split('/',)[2]) + url = "http://"+self.getSiteDomain()+storya['href'] + logging.debug("Normalizing to URL: "+url) + self._setURL(url) + try: + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # if blocked, attempt login. + if soup.find("li",{"class":"blocked"}): + if self.performLogin(url): # performLogin raises + # FailedToLogin if it fails. + soup = bs.BeautifulSoup(self._fetchUrl(url)) + + # title - first h4 tag will be title. + titleh4 = soup.find('h4') + self.story.setMetadata('title', titleh4.a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"^/author/\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[2]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + # description + storydiv = soup.find("div",{"id":"story"}) + self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string) + + # most of the meta data is here: + metap = storydiv.find("p",{"class":"meta"}) + self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string) + + # warnings + # <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span> + spanreq = metap.find("span",{"class":"req"}) + if spanreq: # can be no warnings. + for a in spanreq.findAll("a"): + self.story.addToList('warnings',a['title']) + + ## perhaps not the most efficient way to parse this, using + ## regexps for each rather than something more complex, but + ## IMO, it's more readable and amenable to change. + metastr = stripHTML(str(metap)).replace('\n',' ').replace('\t',' ') + #print "metap: (%s)"%metastr + + m = re.match(r".*?Rating: (.+?) -.*?",metastr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genres: (.+?) -.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Characters: (.*?) -.*?",metastr) + if m: + for g in m.group(1).split(','): + if g: + self.story.addToList('characters',g) + + m = re.match(r".*?Published: ([0-9/]+?) -.*?",metastr) + if m: + self.story.setMetadata('datePublished',makeDate(m.group(1), "%Y/%m/%d")) + + # Updated can have more than one space after it. <shrug> + m = re.match(r".*?Updated: ([0-9/]+?) +-.*?",metastr) + if m: + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%Y/%m/%d")) + + m = re.match(r".*? - ([0-9/]+?) words.*?",metastr) + if m: + self.story.setMetadata('numWords',m.group(1)) + + if metastr.endswith("Complete"): + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + # get the chapter list first this time because that's how we + # detect the need to login. + storylistul = soup.find('ul',{'id':'storylist'}) + if not storylistul: + # no list found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + chapterlistlis = storylistul.findAll('li') + for chapterli in chapterlistlis: + if "blocked" in chapterli['class']: + # paranoia check. We should already be logged in by now. + raise exceptions.FailedToLogin(url,self.username) + else: + #print "chapterli.h4.a (%s)"%chapterli.h4.a + self.chapterUrls.append((chapterli.h4.a.string, + u'http://%s%s'%(self.getSiteDomain(), + chapterli.h4.a['href']))) + #print "self.chapterUrls:%s"%self.chapterUrls + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + return + + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'storytext'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return FicwadComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py new file mode 100644 index 00000000..b829beac --- /dev/null +++ b/fanficdownloader/adapters/adapter_fimfictionnet.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 +import cookielib as cl +import datetime + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +def getClass(): + return FimFictionNetSiteAdapter + +class FimFictionNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','fimficnet') + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) + self._setURL("http://"+self.getSiteDomain()+"/story/"+self.story.getMetadata('storyId')+"/") + self.is_adult = False + + @staticmethod + def getSiteDomain(): + return 'www.fimfiction.net' + + @classmethod + def getAcceptDomains(cls): + # mobile.fimifction.com isn't actually a valid domain, but we can still get the story id from URLs anyway + return ['www.fimfiction.net','mobile.fimfiction.net', 'www.fimfiction.com', 'mobile.fimfiction.com'] + + def getSiteExampleURLs(self): + return "http://www.fimfiction.net/story/1234/story-title-here http://www.fimfiction.net/story/1234/ http://www.fimfiction.com/story/1234/1/ http://mobile.fimfiction.net/story/1234/1/story-title-here/chapter-title-here" + + def getSiteURLPattern(self): + return r"http://(www|mobile)\.fimfiction\.(net|com)/story/\d+/?.*" + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + cookieproc = urllib2.HTTPCookieProcessor() + cookie = cl.Cookie(version=0, name='view_mature', value='true', + port=None, port_specified=False, + domain=self.getSiteDomain(), domain_specified=False, domain_initial_dot=False, + path='/story', path_specified=True, + secure=False, + expires=time.time()+10000, + discard=False, + comment=None, + comment_url=None, + rest={'HttpOnly': None}, + rfc2109=False) + cookieproc.cookiejar.set_cookie(cookie) + self.opener = urllib2.build_opener(cookieproc) + + try: + data = self._fetchUrl(self.url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Warning: mysql_fetch_array(): supplied argument is not a valid MySQL result resource" in data: + raise exceptions.StoryDoesNotExist(self.url) + + if "This story has been marked as having adult content." in data: + raise exceptions.AdultCheckRequired(self.url) + + soup = bs.BeautifulSoup(data).find("div", {"class":"content_box post_content_box"}) + + titleheader = soup.find("h2") + title = titleheader.find("a", href=re.compile(r'^/story/')).text + author = titleheader.find("a", href=re.compile(r'^/user/')).text + self.story.setMetadata("title", title) + self.story.setMetadata("author", author) + self.story.setMetadata("authorId", author) # The author's name will be unique + self.story.setMetadata("authorUrl", "http://%s/user/%s" % (self.getSiteDomain(),author)) + + chapterDates = [] + + for chapter in soup.findAll("a", {"class":"chapter_link"}): + chapterDates.append(chapter.span.extract().text.strip("()")) + self.chapterUrls.append((chapter.text.strip(), "http://"+self.getSiteDomain() + chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + for character in [character_icon['title'] for character_icon in soup.findAll("a", {"class":"character_icon"})]: + self.story.addToList("characters", character) + for category in [category.text for category in soup.find("div", {"class":"categories"}).findAll("a")]: + self.story.addToList("genre", category) + self.story.addToList("category", "My Little Pony") + + + # The very last list element in the list of chapters contains the status, rating and word count e.g.: + # + # <li> + # Incomplete | Rating: + # <span style="color:#c78238;">Teen</span> + # <div class="word_count"><b>5,203</b>words total</div> + # </li> + # + + status_bar = soup.findAll('li')[-1] + # In the case of fimfiction.net, possible statuses are 'Completed', 'Incomplete', 'On Hiatus' and 'Cancelled' + # For the sake of bringing it in line with the other adapters, 'Incomplete' and 'On Hiatus' become 'In-Progress' + # and 'Complete' beomes 'Completed'. 'Cancelled' seems an important enough (not to mention more strictly true) + # status to leave unchanged. + status = status_bar.text.split("|")[0].strip().replace("Incomplete", "In-Progress").replace("On Hiatus", "In-Progress").replace("Complete", "Completed") + self.story.setMetadata('status', status) + self.story.setMetadata('rating', status_bar.span.text) + # This way is less elegant, perhaps, but more robust in face of format changes. + numWords = status_bar.find("div",{"class":"word_count"}).b.text + self.story.setMetadata('numWords', numWords) + + description_soup = soup.find("div", {"class":"description"}) + # Sometimes the description has an expanding element + # This removes the ellipsis and the expand button + try: + description_soup.find('span', {"id":re.compile(r"description_more_elipses_\d+")}).extract() # Web designer can't spell 'ellipsis' + description_soup.find('a', {"class":"more"}).extract() + except: + pass + self.story.setMetadata('description', description_soup.text) + + # Unfortunately, nowhere on the page is the year mentioned. + # Best effort to deal with this: + # Use this year, if that's a date in the future, subtract one year. + # Their earliest story is Jun, so they'll probably change the date + # around then. + + now = datetime.datetime.now() + + # Get the date of creation from the first chapter + datePublished_text = chapterDates[0] + day, month = datePublished_text.split() + day = re.sub(r"[^\d.]+", '', day) + datePublished = makeDate("%s%s%s"%(now.year,month,day), "%Y%b%d") + if datePublished > now : + datePublished = datePublished.replace(year=now.year-1) + self.story.setMetadata("datePublished", datePublished) + dateUpdated_soup = bs.BeautifulSoup(data).find("div", {"class":"calendar"}) + dateUpdated_soup.find('span').extract() + dateUpdated = makeDate("%s%s"%(now.year,dateUpdated_soup.text), "%Y%b%d") + if dateUpdated > now : + dateUpdated = datePublished.replace(year=now.year-1) + self.story.setMetadata("dateUpdated", dateUpdated) + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + soup = bs.BeautifulSoup(self._fetchUrl(url),selfClosingTags=('br','hr')).find('div', {'id' : 'chapter_container'}) + if soup == None: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + return utf8FromSoup(soup) + diff --git a/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py new file mode 100644 index 00000000..5682b9e6 --- /dev/null +++ b/fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','hp') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Harry Potter") + self.is_adult=False + + # get storyId from url--url validation guarantees query is only psid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.harrypotterfanfiction.com' + + @classmethod + def getAcceptDomains(cls): + return ['www.harrypotterfanfiction.com','harrypotterfanfiction.com'] + + def getSiteExampleURLs(self): + return "http://www.harrypotterfanfiction.com/viewstory.php?psid=1234 http://harrypotterfanfiction.com/viewstory.php?psid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'\?psid='+self.story.getMetadata('storyId'))) + self.story.setMetadata('title',a.string) + ## javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?psid=290995' + if "This story may contain adult themes." in a['href'] and not (self.is_adult or self.getConfig("is_adult")): + raise exceptions.AdultCheckRequired(self.url) + + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?showuid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + ## hpcom doesn't give us total words--but it does give + ## us words/chapter. I'd rather add than fetch and + ## parse another page. + words=0 + for tr in soup.find('table',{'class':'text'}).findAll('tr'): + tdstr = tr.findAll('td')[2].string + if tdstr and tdstr.isdigit(): + words+=int(tdstr) + self.story.setMetadata('numWords',str(words)) + + # Find the chapters: + tablelist = soup.find('table',{'class':'text'}) + for chapter in tablelist.findAll('a', href=re.compile(r'\?chapterid=\d+')): + #javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?chapterid=433441&i=1' + # just in case there's tags, like <i> in chapter titles. + chpt=re.sub(r'^.*?(\?chapterid=\d+).*?',r'\1',chapter['href']) + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/viewstory.php'+chpt)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Finding the metadata is a bit of a pain. Desc is the only thing this color. + desctable= soup.find('table',{'bgcolor':'#f0e8e8'}) + self.story.setMetadata('description',stripHTML(desctable)) + + ## Finding the metadata is a bit of a pain. Most of the meta + ## data is in a center.table without a bgcolor. + for center in soup.findAll('center'): + table = center.find('table',{'bgcolor':None}) + if table: + metastr = stripHTML(str(table)).replace('\n',' ').replace('\t',' ') + # Rating: 12+ Story Reviews: 3 + # Chapters: 3 + # Characters: Andromeda, Ted, Bellatrix, R. Lestrange, Lucius, Narcissa, OC + # Genre(s): Fluff, Romance, Young Adult Era: OtherPairings: Other Pairing, Lucius/Narcissa + # Status: Completed + # First Published: 2010.09.02 + # Last Published Chapter: 2010.09.28 + # Last Updated: 2010.09.28 + # Favorite Story Of: 1 users + # Warnings: Scenes of a Mild Sexual Nature + + m = re.match(r".*?Status: Completed.*?",metastr) + if m: + self.story.setMetadata('status','Completed') + else: + self.story.setMetadata('status','In-Progress') + + m = re.match(r".*?Rating: (.+?) Story Reviews.*?",metastr) + if m: + self.story.setMetadata('rating', m.group(1)) + + m = re.match(r".*?Genre\(s\): (.+?) Era.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('genre',g) + + m = re.match(r".*?Characters: (.+?) Genre.*?",metastr) + if m: + for g in m.group(1).split(','): + self.story.addToList('characters',g) + + m = re.match(r".*?Warnings: (.+).*?",metastr) + if m: + for w in m.group(1).split(','): + if w != 'Now Warnings': + self.story.addToList('warnings',w) + + m = re.match(r".*?First Published: ([0-9\.]+).*?",metastr) + if m: + self.story.setMetadata('datePublished',makeDate(m.group(1), "%Y.%m.%d")) + + # Updated can have more than one space after it. <shrug> + m = re.match(r".*?Last Updated: ([0-9\.]+).*?",metastr) + if m: + self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%Y.%m.%d")) + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + ## most adapters use BeautifulStoneSoup here, but non-Stone + ## allows nested div tags. + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'fluidtext'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(div) + +def getClass(): + return HarryPotterFanFictionComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_mediaminerorg.py b/fanficdownloader/adapters/adapter_mediaminerorg.py new file mode 100644 index 00000000..23c72e30 --- /dev/null +++ b/fanficdownloader/adapters/adapter_mediaminerorg.py @@ -0,0 +1,234 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class MediaMinerOrgSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','mm') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('storyId',m.group('id')) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfic/view_st.php/'+self.story.getMetadata('storyId')) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + @staticmethod + def getSiteDomain(): + return 'www.mediaminer.org' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/fanfic/view_st.php/123456 http://"+self.getSiteDomain()+"/fanfic/view_ch.php/1234123/123444#fic_c" + + def getSiteURLPattern(self): + ## http://www.mediaminer.org/fanfic/view_st.php/76882 + ## http://www.mediaminer.org/fanfic/view_ch.php/167618/594087#fic_c + return re.escape("http://"+self.getSiteDomain())+\ + "/fanfic/view_(st|ch)\.php/"+r"(?P<id>\d+)(/\d+(#fic_c)?)?$" + + def extractChapterUrlsAndMetadata(self): + + url = self.url + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + # [ A - All Readers ], strip '[' ']' + ## Above title because we remove the smtxt font to get title. + smtxt = soup.find("font",{"class":"smtxt"}) + if not smtxt: + raise exceptions.StoryDoesNotExist(self.url) + rating = smtxt.string[1:-1] + self.story.setMetadata('rating',rating) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"/fanfic/src.php/u/\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[-1]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',a.string) + + ## Title - Good grief. Title varies by chaptered, 1chapter and 'type=one shot'--and even 'one-shot's can have titled chapter. + ## But, if colspan=2, there's no chapter title. + ## <td class="ffh">Atmosphere: Chapter 1</b> <font class="smtxt">[ P - Pre-Teen ]</font></td> + ## <td colspan=2 class="ffh">Hearts of Ice <font class="smtxt">[ P - Pre-Teen ]</font></td> + ## <td colspan=2 class="ffh">Suzaku no Princess <font class="smtxt">[ P - Pre-Teen ]</font></td> + ## <td class="ffh">The Kraut, The Bartender, and The Drunkard: Chapter 1</b> <font class="smtxt">[ P - Pre-Teen ]</font></td> + ## <td class="ffh">Betrayal and Justice: A Cold Heart</b> <font size="-1">( Chapter 1 )</font> <font class="smtxt">[ A - All Readers ]</font></td> + ## <td class="ffh">Question and Answer: Question and Answer</b> <font size="-1">( One-Shot )</font> <font class="smtxt">[ A - All Readers ]</font></td> + title = soup.find('td',{'class':'ffh'}) + for font in title.findAll('font'): + font.extract() # removes 'font' tags from inside the td. + if title.has_key('colspan'): + titlet = title.text + else: + ## No colspan, it's part chapter title--even if it's a one-shot. + titlet = ':'.join(title.text.split(':')[:-1]) # strip trailing 'Chapter X' or chapter title + self.story.setMetadata('title',titlet) + ## The story title is difficult to reliably parse from the + ## story pages. Getting it from the author page is, but costs + ## another fetch. + # authsoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + # titlea = authsoup.find('a',{'href':'/fanfic/view_st.php/'+self.story.getMetadata('storyId')}) + # self.story.setMetadata('title',titlea.text) + + # save date from first for later. + firstdate=None + + # Find the chapters + select = soup.find('select',{'name':'cid'}) + if not select: + self.chapterUrls.append(( self.story.getMetadata('title'),self.url)) + else: + for option in select.findAll("option"): + chapter = stripHTML(option.string) + ## chapter can be: Chapter 7 [Jan 23, 2011] + ## or: Vigilant Moonlight ( Chapter 1 ) [Jan 30, 2004] + ## or even: Prologue ( Prologue ) [Jul 31, 2010] + m = re.match(r'^(.*?) (\( .*? \) )?\[(.*?)\]$',chapter) + chapter = m.group(1) + # save date from first for later. + if not firstdate: + firstdate = m.group(3) + self.chapterUrls.append((chapter,'http://'+self.host+'/fanfic/view_ch.php/'+self.story.getMetadata('storyId')+'/'+option['value'])) + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # category + # <a href="/fanfic/src.php/a/567">Ranma 1/2</a> + for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/a/")): + self.story.addToList('category',a.string) + + # genre + # <a href="/fanfic/src.php/a/567">Ranma 1/2</a> + for a in soup.findAll('a',href=re.compile(r"^/fanfic/src.php/g/")): + self.story.addToList('genre',a.string) + + # if firstdate, then the block below will only have last updated. + if firstdate: + self.story.setMetadata('datePublished', makeDate(firstdate, "%b %d, %Y")) + # Everything else is in <tr bgcolor="#EEEED4"> + + metastr = stripHTML(soup.find("tr",{"bgcolor":"#EEEED4"})).replace('\n',' ').replace('\r',' ').replace('\t',' ') + # Latest Revision: August 03, 2010 + m = re.match(r".*?(?:Latest Revision|Uploaded On): ([a-zA-Z]+ \d\d, \d\d\d\d)",metastr) + if m: + self.story.setMetadata('dateUpdated', makeDate(m.group(1), "%B %d, %Y")) + if not firstdate: + self.story.setMetadata('datePublished', + self.story.getMetadataRaw('dateUpdated')) + + else: + self.story.setMetadata('dateUpdated', + self.story.getMetadataRaw('datePublished')) + + # Words: 123456 + m = re.match(r".*?\| Words: (\d+) \|",metastr) + if m: + self.story.setMetadata('numWords', m.group(1)) + + # Summary: .... + m = re.match(r".*?Summary: (.*)$",metastr) + if m: + self.story.setMetadata('description', m.group(1)) + + # completed + m = re.match(r".*?Status: Completed.*?",metastr) + if m: + self.story.setMetadata('status','Completed') + else: + self.story.setMetadata('status','In-Progress') + + return + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data=self._fetchUrl(url) + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + anchor = soup.find('a',{'name':'fic_c'}) + + if None == anchor: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + ## find divs with align=left, those are paragraphs in newer stories. + divlist = anchor.findAllNext('div',{'align':'left'}) + if divlist: + for div in divlist: + div.name='p' # convert to <p> mediaminer uses div with + # a margin for paragraphs. + anchor.append(div) # cheat! stuff all the content + # divs into anchor just as a + # holder. + del div['style'] + del div['align'] + anchor.name='div' + return utf8FromSoup(anchor) + + else: + logging.debug('Using kludgey text find for older mediaminer story.') + ## Some older mediaminer stories are unparsable with BeautifulSoup. + ## Really nasty formatting. Sooo... Cheat! Parse it ourselves a bit first. + ## Story stuff falls between: + data = "<div id='HERE'>" + data[data.find('<a name="fic_c">'):] +"</div>" + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + for tag in soup.findAll('td',{'class':'ffh'}) + \ + soup.findAll('div',{'class':'acl'}) + \ + soup.findAll('div',{'class':'footer smtxt'}) + \ + soup.findAll('table',{'class':'tbbrdr'}): + tag.extract() # remove tag from soup. + + return utf8FromSoup(soup) + + +def getClass(): + return MediaMinerOrgSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py new file mode 100644 index 00000000..20eafff8 --- /dev/null +++ b/fanficdownloader/adapters/adapter_potionsandsnitchesnet.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class PotionsAndSnitchesNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','pns') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Harry Potter") + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfiction/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.potionsandsnitches.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.potionsandsnitches.net','potionsandsnitches.net'] + + def getSiteExampleURLs(self): + return "http://www.potionsandsnitches.net/fanfiction/viewstory.php?sid=1234 http://potionsandsnitches.net/fanfiction/viewstory.php?sid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("potionsandsnitches.net/fanfiction/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/fanfiction/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfiction/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## <meta name='description' content='<p>Description</p> ...' > + ## Summary, strangely, is in the content attr of a <meta name='description'> tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next div class='listbox' + svalue = "" + while not defaultGetattr(value,'class') == 'listbox': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + if char == "!Snape and Harry (required)": + self.story.addToList('characters',"Snape") + self.story.addToList('characters',"Harry") + else: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), "%b %d %Y")) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), "%b %d %Y")) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/fanfiction/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(div) + +def getClass(): + return PotionsAndSnitchesNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_siyecouk.py b/fanficdownloader/adapters/adapter_siyecouk.py new file mode 100644 index 00000000..8bf1aa42 --- /dev/null +++ b/fanficdownloader/adapters/adapter_siyecouk.py @@ -0,0 +1,298 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +# This function is called by the downloader in all adapter_*.py files +# in this dir to register the adapter class. So it needs to be +# updated to reflect the class below it. That, plus getSiteDomain() +# take care of 'Registering'. +def getClass(): + return SiyeCoUkAdapter # XXX + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class SiyeCoUkAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8",]# 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + # self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + # self.password = "" + # self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/siye/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','siye') # XXX + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Harry Potter") # XXX + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y.%m.%d" # XXX + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.siye.co.uk' # XXX + + @classmethod + def getAcceptDomains(cls): + return ['www.siye.co.uk','siye.co.uk'] + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/siye/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?siye\.co\.uk/(siye/)?"+re.escape("viewstory.php?sid=")+r"\d+$" + + # ## Login seems to be reasonably standard across eFiction sites. + # def needToLoginCheck(self, data): + # if 'Registered Users Only' in data \ + # or 'There is no such account on our website' in data \ + # or "That password doesn't match the one in our database" in data: + # return True + # else: + # return False + + # def performLogin(self, url): + # params = {} + + # if self.password: + # params['penname'] = self.username + # params['password'] = self.password + # else: + # params['penname'] = self.getConfig("username") + # params['password'] = self.getConfig("password") + # params['cookiecheck'] = '1' + # params['submit'] = 'Submit' + + # loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + # logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + # params['penname'])) + + # d = self._fetchUrl(loginUrl, params) + + # if "Member Account" not in d : #Member Account + # logging.info("Failed to login to URL %s as %s" % (loginUrl, + # params['penname'])) + # raise exceptions.FailedToLogin(url,params['penname']) + # return False + # else: + # return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # if self.is_adult or self.getConfig("is_adult"): + # # Weirdly, different sites use different warning numbers. + # # If the title search below fails, there's a good chance + # # you need a different number. print data at that point + # # and see what the 'click here to continue' url says. + # addurl = "&ageconsent=ok&warning=4" # XXX + # else: + # addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + # Except it doesn't this time. :-/ + url = self.url #+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # if self.needToLoginCheck(data): + # # need to log in for this one. + # self.performLogin(url) + # data = self._fetchUrl(url) + + # # The actual text that is used to announce you need to be an + # # adult varies from site to site. Again, print data before + # # the title search to troubleshoot. + # if "Age Consent Required" in data: # XXX + # raise exceptions.AdultCheckRequired(self.url) + + # if "Access denied. This story has not been validated by the adminstrators of this site." in data: + # raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/siye/'+a['href']) + self.story.setMetadata('author',a.string) + + # need(or easier) to pull other metadata from the author's list page. + authsoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + ## Title + titlea = authsoup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',titlea.string) + + # Find the chapters (from soup, not authsoup): + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/siye/'+chapter['href'])) + + if self.chapterUrls: + self.story.setMetadata('numChapters',len(self.chapterUrls)) + else: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + self.story.setMetadata('numChapters',1) + + # The stuff we can get from the chapter list/one-shot page are + # in the first table with 95% width. + metatable = soup.find('table',{'width':'95%'}) + + # Categories + cat_as = metatable.findAll('a', href=re.compile(r'categories.php')) + for cat_a in cat_as: + self.story.addToList('category',stripHTML(cat_a)) + + moremetaparts = stripHTML(metatable).split('\n') + for part in moremetaparts: + part = part.strip() + if part.startswith("Characters:"): + part = part[part.find(':')+1:] + for item in part.split(','): + if item.strip() == "Harry/Ginny": + self.story.addToList('characters',"Harry") + self.story.addToList('characters',"Ginny") + elif item.strip() not in ("None","All"): + self.story.addToList('characters',item) + + if part.startswith("Genres:"): + part = part[part.find(':')+1:] + for item in part.split(','): + if item.strip() != "None": + self.story.addToList('genre',item) + + if part.startswith("Warnings:"): + part = part[part.find(':')+1:] + for item in part.split(','): + if item.strip() != "None": + self.story.addToList('warnings',item) + + if part.startswith("Rating:"): + part = part[part.find(':')+1:] + self.story.setMetadata('rating',part) + + if part.startswith("Summary:"): + part = part[part.find(':')+1:] + self.story.setMetadata('description',part) + + # want to get the next tr of the table. + #print("%s"%titlea.parent.parent.findNextSibling('tr')) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + moremeta = stripHTML(titlea.parent.parent.parent.find('div',{'class':'desc'})) + for part in moremeta.replace(' - ','\n').split('\n'): + #print("part:%s"%part) + try: + (name,value) = part.split(': ') + except: + # not going to worry about fancier processing for the bits + # that don't match. + continue + name=name.strip() + value=value.strip() + if name == 'Published': + self.story.setMetadata('datePublished', makeDate(value, self.dateformat)) + if name == 'Updated': + self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat)) + if name == 'Completed': + if value == 'Yes': + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + if name == 'Words': + self.story.setMetadata('numWords', value) + + try: + # Find Series name from series URL. + a = titlea.findPrevious('a', href=re.compile(r"series.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + # soup = bs.BeautifulSoup(self._fetchUrl(url)) + # BeautifulSoup objects to <p> inside <span>, which + # technically isn't allowed. + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + # not the most unique thing in the world, but it appears to be + # the best we can do here. + story = soup.find('span', {'style' : 'font-size: 100%;'}) + + if None == story: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(story) diff --git a/fanficdownloader/adapters/adapter_tenhawkpresentscom.py b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py new file mode 100644 index 00000000..e505c428 --- /dev/null +++ b/fanficdownloader/adapters/adapter_tenhawkpresentscom.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TenhawkPresentsComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','thpc') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self.dateformat = "%b %d %Y" + + + @staticmethod + def getSiteDomain(): + return 'fanfiction.tenhawkpresents.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&ageconsent=ok&warning=3" + else: + addurl="" + + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + addurl = "&ageconsent=ok&warning=4" + url = self.url+'&index=1'+addurl + logging.debug("Changing URL: "+url) + self.performLogin(url) + data = self._fetchUrl(url) + + if "This story contains mature content which may include violence, sexual situations, and coarse language" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId'))) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return TenhawkPresentsComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py new file mode 100644 index 00000000..4fbd6021 --- /dev/null +++ b/fanficdownloader/adapters/adapter_test1.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime +import time +import logging + +from .. import BeautifulSoup as bs +from .. import exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TestSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','tst1') + self.crazystring = u" crazy tests:[bare amp(&) quote(') amp(&) gt(>) lt(<) ATnT(AT&T) pound(£)]" + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + self.username='' + self.is_adult=False + + @staticmethod + def getSiteDomain(): + return 'test1.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"?sid=1234" + + def getSiteURLPattern(self): + return BaseSiteAdapter.getSiteURLPattern(self)+r'/?\?sid=\d+$' + + def extractChapterUrlsAndMetadata(self): + + if self.story.getMetadata('storyId') == '665' and not (self.is_adult or self.getConfig("is_adult")): + logging.warn("self.is_adult:%s"%self.is_adult) + raise exceptions.AdultCheckRequired(self.url) + + if self.story.getMetadata('storyId') == '666': + raise exceptions.StoryDoesNotExist(self.url) + + if self.story.getMetadata('storyId').startswith('670'): + time.sleep(1.0) + + if self.story.getMetadata('storyId').startswith('671'): + time.sleep(1.0) + + if self.getConfig("username"): + self.username = self.getConfig("username") + + if self.story.getMetadata('storyId') == '668' and self.username != "Me" : + raise exceptions.FailedToLogin(self.url,self.username) + + if self.story.getMetadata('storyId') == '664': + self.story.setMetadata(u'title',"Test Story Title "+self.story.getMetadata('storyId')+self.crazystring) + self.story.setMetadata('author','Test Author aa bare amp(&) quote(') amp(&)') + else: + self.story.setMetadata(u'title',"Test Story Title "+self.story.getMetadata('storyId')) + self.story.setMetadata('author','Test Author aa') + self.story.setMetadata('storyUrl',self.url) + self.story.setMetadata('description',u'Description '+self.crazystring+u''' Done + +Some more longer description. "I suck at summaries!" "Better than it sounds!" "My first fic" +''') + self.story.setMetadata('datePublished',makeDate("1975-03-15","%Y-%m-%d")) + self.story.setMetadata('dateCreated',datetime.datetime.now()) + if self.story.getMetadata('storyId') == '669': + self.story.setMetadata('dateUpdated',datetime.datetime.now()) + else: + self.story.setMetadata('dateUpdated',makeDate("1975-04-15","%Y-%m-%d")) + self.story.setMetadata('numWords','123456') + + idnum = int(self.story.getMetadata('storyId')) + if idnum % 2 == 1: + self.story.setMetadata('status','In-Progress') + else: + self.story.setMetadata('status','Completed') + + langs = { + 0:"English", + 1:"Russian", + 2:"French", + 3:"German", + } + if idnum < 10: + self.story.setMetadata('language',langs[idnum%len(langs)]) + # greater than 10, no language. + + self.setSeries('The Great Test',idnum) + + self.story.setMetadata('rating','Tweenie') + + self.story.setMetadata('authorId','98765') + self.story.setMetadata('authorUrl','http://author/url') + + self.story.addToList('warnings','Swearing') + self.story.addToList('warnings','Violence') + + self.story.addToList('category','Harry Potter') + self.story.addToList('category','Furbie') + self.story.addToList('category','Crossover') + self.story.addToList('category',u'Puella Magi Madoka Magica/魔法少女まどか★マギカ') + self.story.addToList('category',u'Magical Girl Lyrical Nanoha') + self.story.addToList('genre','Fantasy') + self.story.addToList('genre','SF') + self.story.addToList('genre','Noir') + + self.chapterUrls = [(u'Prologue '+self.crazystring,self.url+"&chapter=1"), + ('Chapter 1, Xenos on Cinnabar',self.url+"&chapter=2"), + ('Chapter 2, Sinmay on Kintikin',self.url+"&chapter=3"), + ('Chapter 3, Over Cinnabar',self.url+"&chapter=4"), + ('Chapter 4',self.url+"&chapter=5"), + ('Chapter 5',self.url+"&chapter=6"), + ('Chapter 6',self.url+"&chapter=6"), + ('Chapter 7',self.url+"&chapter=6"), + # ('Chapter 8',self.url+"&chapter=6"), + # ('Chapter 9',self.url+"&chapter=6"), + # ('Chapter 0',self.url+"&chapter=6"), + # ('Chapter a',self.url+"&chapter=6"), + # ('Chapter b',self.url+"&chapter=6"), + # ('Chapter c',self.url+"&chapter=6"), + # ('Chapter d',self.url+"&chapter=6"), + # ('Chapter e',self.url+"&chapter=6"), + # ('Chapter f',self.url+"&chapter=6"), + # ('Chapter g',self.url+"&chapter=6"), + # ('Chapter h',self.url+"&chapter=6"), + # ('Chapter i',self.url+"&chapter=6"), + # ('Chapter j',self.url+"&chapter=6"), + # ('Chapter k',self.url+"&chapter=6"), + # ('Chapter l',self.url+"&chapter=6"), + # ('Chapter m',self.url+"&chapter=6"), + # ('Chapter n',self.url+"&chapter=6"), + ] + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + if self.story.getMetadata('storyId') == '667': + raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url) + + if self.story.getMetadata('storyId').startswith('670') or \ + self.story.getMetadata('storyId').startswith('672'): + time.sleep(1.0) + + if "chapter=1" in url : + text=u''' +<div> +<h3>Prologue</h3> +<p>This is a fake adapter for testing purposes. Different storyId's will give different errors:</p> +<p>http://test1.com?sid=664 - Crazy string title</p> +<p>http://test1.com?sid=665 - raises AdultCheckRequired</p> +<p>http://test1.com?sid=666 - raises StoryDoesNotExist</p> +<p>http://test1.com?sid=667 - raises FailedToDownload on chapter 1</p> +<p>http://test1.com?sid=668 - raises FailedToLogin unless username='Me'</p> +<p>http://test1.com?sid=669 - Succeeds with Updated Date=now</p> +<p>http://test1.com?sid=670 - Succeeds, but sleeps 2sec on each chapter</p> +<p>http://test1.com?sid=671 - Succeeds, but sleeps 2sec metadata only</p> +<p>http://test1.com?sid=672 - Succeeds, quick meta, sleeps 2sec chapters only</p> +<p>And other storyId will succeed with the same output.</p> +</div> +''' + else: + text=u''' +<div> +<h3>Chapter</h3> +<p><center>Centered text</center></p> +<p>Lorem '''+self.crazystring+''' <i>italics</i>, <b>bold</b>, <u>underline</u> consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p> +br breaks<br><br> +br breaks<br><br> +<hr> +horizontal rules +<hr> +<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p> +<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p> +</div> +''' + soup = bs.BeautifulStoneSoup(text,selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + return utf8FromSoup(soup) + +def getClass(): + return TestSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py new file mode 100644 index 00000000..6ce0c425 --- /dev/null +++ b/fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py @@ -0,0 +1,252 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','twcs') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/library/viewstory.php?sid='+self.story.getMetadata('storyId')) + self.dateformat = "%B %d, %Y" + + + @staticmethod + def getSiteDomain(): + return 'www.thewriterscoffeeshop.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/library/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/library/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/library/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&ageconsent=ok&warning=3" + else: + addurl="" + + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Age Consent Required" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + data = data[data.index("<body"):] + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/library/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/library/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/library/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + data = data[data.index("<body"):] + + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return TheWritersCoffeeShopComSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_tthfanficorg.py b/fanficdownloader/adapters/adapter_tthfanficorg.py new file mode 100644 index 00000000..e3a3ae6a --- /dev/null +++ b/fanficdownloader/adapters/adapter_tthfanficorg.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 +import time + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','tth') + self.dateformat = "%d %b %y" + self.is_adult=False + self.username = None + self.password = None + # get storyId from url--url validation guarantees query correct + m = re.match(self.getSiteURLPattern(),url) + if m: + self.story.setMetadata('storyId',m.group('id')) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + # normalized story URL. + self._setURL("http://"+self.getSiteDomain()\ + +"/Story-"+self.story.getMetadata('storyId')) + else: + raise exceptions.InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + @staticmethod + def getSiteDomain(): + return 'www.tthfanfic.org' + + def getSiteExampleURLs(self): + return "http://www.tthfanfic.org/Story-5583 http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm http://www.tthfanfic.org/T-526321777890480578489880055880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm" + + # http://www.tthfanfic.org/T-526321777848988007890480555880/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm + # http://www.tthfanfic.org/Story-5583 + # http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm + # http://www.tthfanfic.org/story.php?no=26093 + def getSiteURLPattern(self): + return r"http://www.tthfanfic.org(/(T-\d+/)?Story-|/story.php\?no=)(?P<id>\d+)(-\d+)?(/.*)?$" + + # tth won't send you future updates if you aren't 'caught up' + # on the story. Login isn't required for F21, but logging in will + # mark stories you've downloaded as 'read' on tth. + def performLogin(self): + params = {} + + if self.password: + params['urealname'] = self.username + params['password'] = self.password + else: + params['urealname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['loginsubmit'] = 'Login' + + if not params['password']: + return + + loginUrl = 'http://' + self.getSiteDomain() + '/login.php' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['urealname'])) + + ## need to pull empty login page first to get ctkn and + ## password name, which are BUSs +# <form method='post' action='/login.php' accept-charset="utf-8"> +# <input type='hidden' name='ctkn' value='4bdf761f5bea06bf4477072afcbd0f8d721d1a4f989c09945a9e87afb7a66de1'/> +# <input type='text' id='urealname' name='urealname' value=''/> +# <input type='password' id='password' name='6bb3fcd148d148629223690bf19733b8'/> +# <input type='submit' value='Login' name='loginsubmit'/> + soup = bs.BeautifulSoup(self._fetchUrl(loginUrl)) + params['ctkn']=soup.find('input', {'name':'ctkn'})['value'] + params[soup.find('input', {'id':'password'})['name']] = params['password'] + + d = self._fetchUrl(loginUrl, params) + + if "Stories Published" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + # fetch the chapter. From that we will get almost all the + # metadata and chapter list + + url=self.url + logging.debug("URL: "+url) + + # tth won't send you future updates if you aren't 'caught up' + # on the story. Login isn't required for F21, but logging in will + # mark stories you've downloaded as 'read' on tth. + self.performLogin() + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(data) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(url) + else: + raise e + + if "<h2>Story Not Found</h2>" in data: + raise exceptions.StoryDoesNotExist(url) + + if "NOTE: This story is rated FR21 which is above your chosen filter level." in data: + if self.is_adult or self.getConfig("is_adult"): + form = soup.find('form', {'id':'sitemaxratingform'}) + params={'ctkn':form.find('input', {'name':'ctkn'})['value'], + 'sitemaxrating':'5'} + logging.info("Attempting to get rating cookie for %s" % url) + data = self._postUrl("http://"+self.getSiteDomain()+'/setmaxrating.php',params) + # refetch story page. + data = self._fetchUrl(url) + soup = bs.BeautifulSoup(data) + else: + raise exceptions.AdultCheckRequired(self.url) + + # http://www.tthfanfic.org/AuthorStories-3449/Greywizard.htm + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"^/AuthorStories-\d+")) + self.story.setMetadata('authorId',a['href'].split('/')[1].split('-')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('author',stripHTML(a)) + + try: + # going to pull part of the meta data from author list page. + logging.debug("**AUTHOR** URL: "+self.story.getMetadata('authorUrl')) + authordata = self._fetchUrl(self.story.getMetadata('authorUrl')) + authorsoup = bs.BeautifulSoup(authordata) + # author can have several pages, scan until we find it. + while( not authorsoup.find('a', href=re.compile(r"^/Story-"+self.story.getMetadata('storyId'))) ): + nextpage = 'http://'+self.host+authorsoup.find('a', {'class':'arrowf'})['href'] + logging.debug("**AUTHOR** nextpage URL: "+nextpage) + authordata = self._fetchUrl(nextpage) + authorsoup = bs.BeautifulSoup(authordata) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(url) + else: + raise e + + storydiv = authorsoup.find('div', {'id':'st'+self.story.getMetadata('storyId'), 'class':re.compile(r"storylistitem")}) + self.story.setMetadata('description',stripHTML(storydiv.find('div',{'class':'storydesc'}))) + self.story.setMetadata('title',stripHTML(storydiv.find('a',{'class':'storylink'}))) + + verticaltable = soup.find('table', {'class':'verticaltable'}) + + BtVS = True + for cat in verticaltable.findAll('a', href=re.compile(r"^/Category-")): + if cat.string not in ['General', 'Non-BtVS/AtS Stories', 'BtVS/AtS Non-Crossover', 'Non-BtVS Crossovers']: + self.story.addToList('category',cat.string) + else: + if 'Non-BtVS' in cat.string: + BtVS = False + if BtVS: + self.story.addToList('category','Buffy: The Vampire Slayer') + + verticaltabletds = verticaltable.findAll('td') + self.story.setMetadata('rating', verticaltabletds[2].string) + self.story.setMetadata('numWords', verticaltabletds[4].string) + + # Complete--if completed. + if 'Yes' in verticaltabletds[10].string: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + self.story.setMetadata('datePublished',makeDate(stripHTML(verticaltabletds[8].string), self.dateformat)) + self.story.setMetadata('dateUpdated',makeDate(stripHTML(verticaltabletds[9].string), self.dateformat)) + + for icon in storydiv.find('span',{'class':'storyicons'}).findAll('img'): + if( icon['title'] not in ['Non-Crossover'] ) : + self.story.addToList('genre',icon['title']) + + # Find the chapter selector + select = soup.find('select', { 'name' : 'chapnav' } ) + + if select is None: + # no selector found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = "http://"+self.host+o['value'] + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(o),url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + pseries = soup.find('p', {'style':'margin-top:0px'}) + m = re.match('This story is No\. (?P<num>\d+) in the series "(?P<series>.+)"\.', + pseries.text) + if m: + self.setSeries(m.group('series'),m.group('num')) + + return + + + def getChapterText(self, url): + logging.debug('Getting chapter text from: %s' % url) + soup = bs.BeautifulSoup(self._fetchUrl(url)) + + div = soup.find('div', {'id' : 'storyinnerbody'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + # strip out included chapter title, if present, to avoid doubling up. + try: + div.find('h3').extract() + except: + pass + return utf8FromSoup(div) + +def getClass(): + return TwistingTheHellmouthSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_twilightednet.py b/fanficdownloader/adapters/adapter_twilightednet.py new file mode 100644 index 00000000..415f8040 --- /dev/null +++ b/fanficdownloader/adapters/adapter_twilightednet.py @@ -0,0 +1,250 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TwilightedNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','tw') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Twilight") + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.twilighted.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.twilighted.net','twilighted.net'] + + def getSiteExampleURLs(self): + return "http://www.twilighted.net/viewstory.php?sid=1234 http://twilighted.net/viewstory.php?sid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("twilighted.net/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + # twilighted isn't writing <body> ??? wtf? + data = "<html><body>"+data[data.index("</head>"):] + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + ## twilighted.net doesn't use genre. + # if 'Genre' in label: + # genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class')) + # genrestext = [genre.string for genre in genres] + # self.genre = ', '.join(genrestext) + # for genre in genrestext: + # self.story.addToList('genre',genre.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.strip(), "%B %d, %Y")) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%B %d, %Y")) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + # twilighted isn't writing <body> ??? wtf? + data = "<html><body>"+data[data.index("</head>"):] + + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return TwilightedNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_twiwritenet.py b/fanficdownloader/adapters/adapter_twiwritenet.py new file mode 100644 index 00000000..f243c668 --- /dev/null +++ b/fanficdownloader/adapters/adapter_twiwritenet.py @@ -0,0 +1,262 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class TwiwriteNetSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','twrt') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.story.addToList("category","Twilight") + self.username = "NoneGiven" # if left empty, twiwrite.net doesn't return any message at all. + self.password = "" + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + + @staticmethod + def getSiteDomain(): + return 'www.twiwrite.net' + + @classmethod + def getAcceptDomains(cls): + return ['www.twiwrite.net','twiwrite.net'] + + def getSiteExampleURLs(self): + return "http://www.twiwrite.net/viewstory.php?sid=1234 http://twiwrite.net/viewstory.php?sid=5678" + + def getSiteURLPattern(self): + return re.escape("http://")+r"(www\.)?"+re.escape("twiwrite.net/viewstory.php?sid=")+r"\d+$" + + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logging.info("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + def extractChapterUrlsAndMetadata(self): + + url = self.url+'&index=1' + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + data = data[data.index("<body"):] + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like <i> in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## <meta name='description' content='<p>Description</p> ...' > + ## Summary, strangely, is in the content attr of a <meta name='description'> tag + ## which is escaped HTML. Unfortunately, we can't use it because they don't + ## escape (') chars in the desc, breakin the tag. + #meta_desc = soup.find('meta',{'name':'description'}) + #metasoup = bs.BeautifulStoneSoup(meta_desc['content']) + #self.story.setMetadata('description',stripHTML(metasoup)) + + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # <span class="label">Rated:</span> NC-17<br /> etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=3')) + genrestext = [genre.string for genre in genres] + self.genre = ', '.join(genrestext) + for genre in genrestext: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=8')) + warningstext = [warning.string for warning in warnings] + self.warning = ', '.join(warningstext) + for warning in warningstext: + self.story.addToList('warning',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(value.strip(), "%B %d, %Y")) + + if 'Updated' in label: + # there's a stray [ at the end. + value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(value.strip(), "%B %d, %Y")) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + # problems with some stories, but only in calibre. I suspect + # issues with different SGML parsers in python. This is a + # nasty hack, but it works. + data = data[data.index("<body"):] + + soup = bs.BeautifulStoneSoup(data, + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + span = soup.find('div', {'id' : 'story'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return TwiwriteNetSiteAdapter + diff --git a/fanficdownloader/adapters/adapter_whoficcom.py b/fanficdownloader/adapters/adapter_whoficcom.py new file mode 100644 index 00000000..756519ad --- /dev/null +++ b/fanficdownloader/adapters/adapter_whoficcom.py @@ -0,0 +1,231 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, utf8FromSoup, makeDate + +class WhoficComSiteAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + self.story.setMetadata('siteabbrev','whof') + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + + @staticmethod + def getSiteDomain(): + return 'www.whofic.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+"\d+$" + + def extractChapterUrlsAndMetadata(self): + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + + # fetch the first chapter. From that we will: + # - determine title, authorname, authorid + # - get chapter list, if not one-shot. + + url = self.url+'&chapter=1' + logging.debug("URL: "+url) + + # use BeautifulSoup HTML parser to make everything easier to find. + try: + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # pull title(title) and author from the HTML title. + title = soup.find('title').string + logging.debug('Title: %s' % title) + title = title.split('::')[1].strip() + self.story.setMetadata('title',title.split(' by ')[0].strip()) + self.story.setMetadata('author',title.split(' by ')[1].strip()) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + + # Find the chapter selector + select = soup.find('select', { 'name' : 'chapter' } ) + + if select is None: + # no selector found, so it's a one-chapter story. + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = self.url + "&chapter=%s" % o['value'] + # just in case there's tags, like <i> in chapter titles. + title = "%s" % o + title = re.sub(r'<[^>]+>','',title) + self.chapterUrls.append((title,url)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + ## Whofic.com puts none of the other meta data in the chapters + ## or even the story chapter index page. Need to scrape the + ## author page to find it. + + # <table width="100%" bordercolor="#333399" border="0" cellspacing="0" cellpadding="2"><tr><td> + # <b><a href="viewstory.php?sid=38220">Accompaniment 2</a></b> by <a href="viewuser.php?uid=12412">clandestinemiscreant</a> [<a href="reviews.php?sid=38220">Reviews</a> - <a href="reviews.php?sid=38220">0</a>] <br> + # This is a series of short stories written as an accompaniment to Season 2, Season 28 for us oldies, and each is unrelated except for that one factor. Each story is canon, in that it does not change established events at time of airing, based on things mentioned and/or implied and missing or deleted scenes that were not seen in the final aired episodes.<br> + # <font size="-1"><b><a href="categories.php?catid=15">Tenth Doctor</a></b> - All Ages - None - Humor, Hurt/Comfort, Romance<br> + # <i>Characters:</i> Rose Tyler<br> + # <i>Series:</i> None<br> + # <i>Published:</i> 2010.08.15 - <i>Updated:</i> 2010.08.16 - <i>Chapters:</i> 4 - <i>Completed:</i> Yes - <i>Word Count:</i> 4890 </font> + # </td></tr></table> + + logging.debug("Author URL: "+self.story.getMetadata('authorUrl')) + soup = bs.BeautifulStoneSoup(self._fetchUrl(self.story.getMetadata('authorUrl')), + selfClosingTags=('br')) # normalize <br> tags to <br /> + # find this story in the list, parse it's metadata based on + # lots of assumptions about the html, since there's little + # tagging. + # Found a story once that had the story URL in the desc for a + # series on the same author's page. Now using the reviews + # link instead to find the appropriate metadata. + a = soup.find('a', href=re.compile(r'reviews.php\?sid='+self.story.getMetadata('storyId'))) + metadata = a.findParent('td') + metadatachunks = utf8FromSoup(metadata).split('<br />') + # process metadata for this story. + self.story.setMetadata('description', metadatachunks[1]) + + # First line of the stuff with ' - ' separators + moremeta = metadatachunks[2] + moremeta = re.sub(r'<[^>]+>','',moremeta) # strip tags. + + moremetaparts = moremeta.split(' - ') + + # first part is category--whofic.com has categories + # Doctor One-11, Torchwood, etc. We're going to + # prepend any with 'Doctor' or 'Era' (Multi-Era, Other + # Era) as 'Doctor Who'. + # + # Also push each in as 'extra tags'. + category = moremetaparts[0] + if 'Doctor' in category or 'Era' in category : + self.story.addToList('category','Doctor Who') + + for cat in category.split(', '): + self.story.addToList('category',cat) + + # next in that line is age rating. + self.story.setMetadata('rating',moremetaparts[1]) + + # after that is a possible list fo specific warnings, + # Explicit Violence, Swearing, etc + if "None" not in moremetaparts[2]: + for warn in moremetaparts[2].split(', '): + self.story.addToList('warnings',warn) + + # then genre. It's another comma list. All together + # in genre, plus each in extra tags. + genre=moremetaparts[3] + for g in genre.split(r', '): + self.story.addToList('genre',g) + + # line 3 is characters. + chars = metadatachunks[3] + charsearch="<i>Characters:</i>" + if charsearch in chars: + chars = chars[metadatachunks[3].index(charsearch)+len(charsearch):] + for c in chars.split(','): + if c.strip() != u'None': + self.story.addToList('characters',c) + + # the next line is stuff with ' - ' separators *and* names--with tags. + moremeta = metadatachunks[5] + moremeta = re.sub(r'<[^>]+>','',moremeta) # strip tags. + + moremetaparts = moremeta.split(' - ') + + for part in moremetaparts: + (name,value) = part.split(': ') + name=name.strip() + value=value.strip() + if name == 'Published': + self.story.setMetadata('datePublished', makeDate(value, '%Y.%m.%d')) + if name == 'Updated': + self.story.setMetadata('dateUpdated', makeDate(value, '%Y.%m.%d')) + if name == 'Completed': + if value == 'Yes': + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + if name == 'Word Count': + self.story.setMetadata('numWords', value) + + try: + # Find Series name from series URL. + a = metadata.find('a', href=re.compile(r"series.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + + # hardly a great identifier, I know, but whofic really doesn't + # give us anything better to work with. + span = soup.find('span', {'style' : 'font-size: 100%;'}) + + if None == span: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return utf8FromSoup(span) + +def getClass(): + return WhoficComSiteAdapter + diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py new file mode 100644 index 00000000..a041975a --- /dev/null +++ b/fanficdownloader/adapters/base_adapter.py @@ -0,0 +1,282 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +import datetime +import time +import logging +import urllib +import urllib2 as u2 +import urlparse as up + +try: + from google.appengine.api import apiproxy_stub_map + def urlfetch_timeout_hook(service, call, request, response): + if call != 'Fetch': + return + # Make the default deadline 10 seconds instead of 5. + if not request.has_deadline(): + request.set_deadline(10.0) + + apiproxy_stub_map.apiproxy.GetPreCallHooks().Append( + 'urlfetch_timeout_hook', urlfetch_timeout_hook, 'urlfetch') + logging.info("Hook to make default deadline 10.0 installed.") +except: + pass + #logging.info("Hook to make default deadline 10.0 NOT installed--not using appengine") + +from ..story import Story +from ..gziphttp import GZipProcessor +from ..configurable import Configurable +from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML +from ..exceptions import InvalidStoryURL + +try: + from .. import chardet as chardet +except ImportError: + chardet = None + +class BaseSiteAdapter(Configurable): + + @classmethod + def matchesSite(cls,site): + return site in cls.getAcceptDomains() + + @classmethod + def getAcceptDomains(cls): + return [cls.getSiteDomain()] + + def validateURL(self): + return re.match(self.getSiteURLPattern(), self.url) + + def __init__(self, config, url): + self.config = config + Configurable.__init__(self, config) + self.addConfigSection(self.getSiteDomain()) + self.addConfigSection("overrides") + + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor()) + self.storyDone = False + self.metadataDone = False + self.story = Story() + self.story.setMetadata('site',self.getSiteDomain()) + self.story.setMetadata('dateCreated',datetime.datetime.now()) + self.chapterUrls = [] # tuples of (chapter title,chapter url) + self.chapterFirst = None + self.chapterLast = None + ## order of preference for decoding. + self.decode = ["utf8", + "Windows-1252"] # 1252 is a superset of + # iso-8859-1. Most sites that + # claim to be iso-8859-1 (and + # some that claim to be utf8) + # are really windows-1252. + self._setURL(url) + if not self.validateURL(): + raise InvalidStoryURL(url, + self.getSiteDomain(), + self.getSiteExampleURLs()) + + def _setURL(self,url): + self.url = url + self.parsedUrl = up.urlparse(url) + self.host = self.parsedUrl.netloc + self.path = self.parsedUrl.path + self.story.setMetadata('storyUrl',self.url) + +## website encoding(s)--in theory, each website reports the character +## encoding they use for each page. In practice, some sites report it +## incorrectly. Each adapter has a default list, usually "utf8, +## Windows-1252" or "Windows-1252, utf8". The special value 'auto' +## will call chardet and use the encoding it reports if it has +90% +## confidence. 'auto' is not reliable. + def _decode(self,data): + if self.getConfig('website_encodings'): + decode = self.getConfigList('website_encodings') + else: + decode = self.decode + + for code in decode: + try: + #print code + if code == "auto": + if not chardet: + logging.info("chardet not available, skipping 'auto' encoding") + continue + detected = chardet.detect(data) + #print detected + if detected['confidence'] > 0.9: + code=detected['encoding'] + else: + continue + return data.decode(code) + except: + logging.debug("code failed:"+code) + pass + logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode) + return "".join([x for x in data if ord(x) < 128]) + + # Assumes application/x-www-form-urlencoded. parameters, headers are dict()s + def _postUrl(self, url, parameters={}, headers={}): + if self.getConfig('slow_down_sleep_time'): + time.sleep(float(self.getConfig('slow_down_sleep_time'))) + + ## u2.Request assumes POST when data!=None. Also assumes data + ## is application/x-www-form-urlencoded. + if 'Content-type' not in headers: + headers['Content-type']='application/x-www-form-urlencoded' + if 'Accept' not in headers: + headers['Accept']="text/html,*/*" + req = u2.Request(url, + data=urllib.urlencode(parameters), + headers=headers) + return self._decode(self.opener.open(req).read()) + + # parameters is a dict() + def _fetchUrl(self, url, parameters=None): + if self.getConfig('slow_down_sleep_time'): + time.sleep(float(self.getConfig('slow_down_sleep_time'))) + + excpt=None + for sleeptime in [0, 0.5, 4, 9]: + time.sleep(sleeptime) + try: + if parameters: + return self._decode(self.opener.open(url,urllib.urlencode(parameters)).read()) + else: + return self._decode(self.opener.open(url).read()) + except Exception, e: + excpt=e + logging.warn("Caught an exception reading URL: %s Exception %s."%(unicode(url),unicode(e))) + + logging.error("Giving up on %s" %url) + logging.exception(excpt) + raise(excpt) + + # Limit chapters to download. Input starts at 1, list starts at 0 + def setChaptersRange(self,first=None,last=None): + if first: + self.chapterFirst=int(first)-1 + if last: + self.chapterLast=int(last)-1 + + # Does the download the first time it's called. + def getStory(self): + if not self.storyDone: + self.getStoryMetadataOnly() + for index, (title,url) in enumerate(self.chapterUrls): + if (self.chapterFirst!=None and index < self.chapterFirst) or \ + (self.chapterLast!=None and index > self.chapterLast): + self.story.addChapter(removeEntities(title), + None) + else: + self.story.addChapter(removeEntities(title), + removeEntities(self.getChapterText(url))) + self.storyDone = True + return self.story + + def getStoryMetadataOnly(self): + if not self.metadataDone: + self.extractChapterUrlsAndMetadata() + self.metadataDone = True + return self.story + + ############################### + + @staticmethod + def getSiteDomain(): + "Needs to be overriden in each adapter class." + return 'no such domain' + + ## URL pattern validation is done *after* picking an adaptor based + ## on domain instead of *as* the adaptor selector so we can offer + ## the user example(s) for that particular site. + ## Override validateURL(self) instead if you need more control. + def getSiteURLPattern(self): + "Used to validate URL. Should be override in each adapter class." + return '^http://'+re.escape(self.getSiteDomain()) + + def getSiteExampleURLs(self): + """ + Needs to be overriden in each adapter class. It's the adapter + writer's responsibility to make sure the example(s) pass the + URL validate. + """ + return 'no such example' + + def extractChapterUrlsAndMetadata(self): + "Needs to be overriden in each adapter class. Populates self.story metadata and self.chapterUrls" + pass + + def getChapterText(self, url): + "Needs to be overriden in each adapter class." + pass + + # Just for series, in case we choose to change how it's stored or represented later. + def setSeries(self,name,num): + if self.getConfig('collect_series'): + self.story.setMetadata('series','%s [%s]'%(name, num)) + +fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05", + "June":"06","July":"07", "August":"08", "September":"09", "October":"10", + "November":"11", "December":"12" } + +def makeDate(string,format): + # Surprise! Abstracting this turned out to be more useful than + # just saving bytes. + + # fudge english month names for people who's locale is set to + # non-english. All our current sites date in english, even if + # there's non-english content. + do_abbrev = "%b" in format + + if "%B" in format or do_abbrev: + format = format.replace("%B","%m").replace("%b","%m") + for (name,num) in fullmon.items(): + if do_abbrev: + name = name[:3] # first three for abbrev + if name in string: + string = string.replace(name,num) + break + + return datetime.datetime.strptime(string,format) + +acceptable_attributes = ['href','name'] + +# this gives us a unicode object, not just a string containing bytes. +# (I gave soup a unicode string, you'd think it could give it back...) +def utf8FromSoup(soup): + for t in soup.findAll(recursive=True): + for attr in t._getAttrMap().keys(): + if attr not in acceptable_attributes: + del t[attr] ## strip all tag attributes except href and name + # these are not acceptable strict XHTML. But we do already have + # CSS classes of the same names defined in constants.py + if t.name in ('u'): + t['class']=t.name + t.name='span' + if t.name in ('center'): + t['class']=t.name + t.name='div' + # removes paired, but empty tags. + if t.string != None and len(t.string.strip()) == 0 : + t.extract() + return soup.__str__('utf8').decode('utf-8') diff --git a/fanficdownloader/chardet/__init__.py b/fanficdownloader/chardet/__init__.py new file mode 100644 index 00000000..953b3994 --- /dev/null +++ b/fanficdownloader/chardet/__init__.py @@ -0,0 +1,26 @@ +######################## BEGIN LICENSE BLOCK ######################## +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +__version__ = "2.0.1" + +def detect(aBuf): + import universaldetector + u = universaldetector.UniversalDetector() + u.reset() + u.feed(aBuf) + u.close() + return u.result diff --git a/fanficdownloader/chardet/big5freq.py b/fanficdownloader/chardet/big5freq.py new file mode 100644 index 00000000..c1b0f3ce --- /dev/null +++ b/fanficdownloader/chardet/big5freq.py @@ -0,0 +1,923 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Big5 frequency table +# by Taiwan's Mandarin Promotion Council +# <http://www.edu.tw:81/mandr/> +# +# 128 --> 0.42261 +# 256 --> 0.57851 +# 512 --> 0.74851 +# 1024 --> 0.89384 +# 2048 --> 0.97583 +# +# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98 +# Random Distribution Ration = 512/(5401-512)=0.105 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR + +BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75 + +#Char to FreqOrder table +BIG5_TABLE_SIZE = 5376 + +Big5CharToFreqOrder = ( \ + 1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16 +3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32 +1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48 + 63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64 +3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80 +4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96 +5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112 + 630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128 + 179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144 + 995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160 +2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176 +1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192 +3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208 + 706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224 +1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240 +3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256 +2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272 + 437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288 +3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304 +1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320 +5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336 + 266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352 +5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368 +1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384 + 32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400 + 188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416 +3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432 +3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448 + 324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464 +2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480 +2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496 + 314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512 + 287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528 +3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544 +1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560 +1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576 +1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592 +2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608 + 265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624 +4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640 +1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656 +5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672 +2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688 + 383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704 + 98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720 + 523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736 + 710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752 +5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768 + 379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784 +1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800 + 585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816 + 690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832 +5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848 +1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864 + 544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880 +3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896 +4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912 +3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928 + 279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944 + 610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960 +1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976 +4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992 +3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008 +3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024 +2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040 +5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056 +3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072 +5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088 +1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104 +2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120 +1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136 + 78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152 +1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168 +4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184 +3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200 + 534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216 + 165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232 + 626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248 +2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264 +5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280 +1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296 +2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312 +1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328 +1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344 +5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360 +5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376 +5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392 +3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408 +4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424 +4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440 +2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456 +5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472 +3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488 + 598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504 +5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520 +5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536 +1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552 +2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568 +3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584 +4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600 +5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616 +3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632 +4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648 +1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664 +1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680 +4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696 +1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712 + 240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728 +1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744 +1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760 +3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776 + 619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792 +5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808 +2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824 +1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840 +1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856 +5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872 + 829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888 +4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904 + 375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920 +2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936 + 444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952 +1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968 +1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984 + 730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000 +4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016 +4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032 +1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048 +3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064 +5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080 +5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096 +1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112 +2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128 +1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144 +3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160 +2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176 +3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192 +2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208 +4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224 +4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240 +3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256 + 97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272 +3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288 + 424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304 +3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320 +4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336 +3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352 +1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368 +5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384 + 199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400 +5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416 +1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432 + 391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448 +4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464 +4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480 + 397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496 +2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512 +2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528 +3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544 +1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560 +4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576 +2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592 +1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608 +1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624 +2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640 +3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656 +1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672 +5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688 +1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704 +4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720 +1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736 + 135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752 +1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768 +4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784 +4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800 +2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816 +1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832 +4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848 + 660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864 +5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880 +2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896 +3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912 +4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928 + 790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944 +5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960 +5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976 +1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992 +4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008 +4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024 +2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040 +3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056 +3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072 +2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088 +1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104 +4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120 +3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136 +3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152 +2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168 +4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184 +5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200 +3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216 +2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232 +3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248 +1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264 +2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280 +3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296 +4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312 +2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328 +2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344 +5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360 +1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376 +2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392 +1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408 +3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424 +4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440 +2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456 +3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472 +3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488 +2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504 +4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520 +2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536 +3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552 +4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568 +5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584 +3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600 + 194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616 +1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632 +4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648 +1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664 +4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680 +5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696 + 510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712 +5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728 +5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744 +2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760 +3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776 +2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792 +2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808 + 681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824 +1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840 +4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856 +3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872 +3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888 + 838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904 +2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920 + 625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936 +2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952 +4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968 +1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984 +4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000 +1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016 +3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032 + 574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048 +3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064 +5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080 +5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096 +3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112 +3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128 +1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144 +2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160 +5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176 +1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192 +1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208 +3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224 + 919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240 +1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256 +4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272 +5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288 +2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304 +3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320 + 516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336 +1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352 +2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368 +2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384 +5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400 +5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416 +5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432 +2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448 +2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464 +1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480 +4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496 +3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512 +3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528 +4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544 +4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560 +2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576 +2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592 +5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608 +4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624 +5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640 +4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656 + 502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672 + 121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688 +1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704 +3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720 +4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736 +1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752 +5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768 +2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784 +2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800 +3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816 +5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832 +1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848 +3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864 +5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880 +1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896 +5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912 +2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928 +3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944 +2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960 +3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976 +3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992 +3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008 +4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024 + 803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040 +2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056 +4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072 +3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088 +5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104 +1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120 +5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136 + 425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152 +1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168 + 479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184 +4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200 +1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216 +4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232 +1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248 + 433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264 +3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280 +4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296 +5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312 + 938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328 +3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344 + 890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360 +2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 #last 512 +#Everything below is of no interest for detection purpose +2522,1613,4812,5799,3345,3945,2523,5800,4162,5801,1637,4163,2471,4813,3946,5802, # 5392 +2500,3034,3800,5803,5804,2195,4814,5805,2163,5806,5807,5808,5809,5810,5811,5812, # 5408 +5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828, # 5424 +5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844, # 5440 +5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856,5857,5858,5859,5860, # 5456 +5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872,5873,5874,5875,5876, # 5472 +5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888,5889,5890,5891,5892, # 5488 +5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,5906,5907,5908, # 5504 +5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,5921,5922,5923,5924, # 5520 +5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936,5937,5938,5939,5940, # 5536 +5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952,5953,5954,5955,5956, # 5552 +5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968,5969,5970,5971,5972, # 5568 +5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984,5985,5986,5987,5988, # 5584 +5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004, # 5600 +6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020, # 5616 +6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032,6033,6034,6035,6036, # 5632 +6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052, # 5648 +6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068, # 5664 +6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084, # 5680 +6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100, # 5696 +6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116, # 5712 +6117,6118,6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,6132, # 5728 +6133,6134,6135,6136,6137,6138,6139,6140,6141,6142,6143,6144,6145,6146,6147,6148, # 5744 +6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,6164, # 5760 +6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,6180, # 5776 +6181,6182,6183,6184,6185,6186,6187,6188,6189,6190,6191,6192,6193,6194,6195,6196, # 5792 +6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,6211,6212, # 5808 +6213,6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,3670,6224,6225,6226,6227, # 5824 +6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,6242,6243, # 5840 +6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,6254,6255,6256,6257,6258,6259, # 5856 +6260,6261,6262,6263,6264,6265,6266,6267,6268,6269,6270,6271,6272,6273,6274,6275, # 5872 +6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,4815,6286,6287,6288,6289,6290, # 5888 +6291,6292,4816,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,6303,6304,6305, # 5904 +6306,6307,6308,6309,6310,6311,4817,4818,6312,6313,6314,6315,6316,6317,6318,4819, # 5920 +6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,6334, # 5936 +6335,6336,6337,4820,6338,6339,6340,6341,6342,6343,6344,6345,6346,6347,6348,6349, # 5952 +6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,6364,6365, # 5968 +6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,6380,6381, # 5984 +6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,6396,6397, # 6000 +6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,3441,6411,6412, # 6016 +6413,6414,6415,6416,6417,6418,6419,6420,6421,6422,6423,6424,6425,4440,6426,6427, # 6032 +6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,6439,6440,6441,6442,6443, # 6048 +6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,4821,6455,6456,6457,6458, # 6064 +6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,6473,6474, # 6080 +6475,6476,6477,3947,3948,6478,6479,6480,6481,3272,4441,6482,6483,6484,6485,4442, # 6096 +6486,6487,6488,6489,6490,6491,6492,6493,6494,6495,6496,4822,6497,6498,6499,6500, # 6112 +6501,6502,6503,6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516, # 6128 +6517,6518,6519,6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532, # 6144 +6533,6534,6535,6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548, # 6160 +6549,6550,6551,6552,6553,6554,6555,6556,2784,6557,4823,6558,6559,6560,6561,6562, # 6176 +6563,6564,6565,6566,6567,6568,6569,3949,6570,6571,6572,4824,6573,6574,6575,6576, # 6192 +6577,6578,6579,6580,6581,6582,6583,4825,6584,6585,6586,3950,2785,6587,6588,6589, # 6208 +6590,6591,6592,6593,6594,6595,6596,6597,6598,6599,6600,6601,6602,6603,6604,6605, # 6224 +6606,6607,6608,6609,6610,6611,6612,4826,6613,6614,6615,4827,6616,6617,6618,6619, # 6240 +6620,6621,6622,6623,6624,6625,4164,6626,6627,6628,6629,6630,6631,6632,6633,6634, # 6256 +3547,6635,4828,6636,6637,6638,6639,6640,6641,6642,3951,2984,6643,6644,6645,6646, # 6272 +6647,6648,6649,4165,6650,4829,6651,6652,4830,6653,6654,6655,6656,6657,6658,6659, # 6288 +6660,6661,6662,4831,6663,6664,6665,6666,6667,6668,6669,6670,6671,4166,6672,4832, # 6304 +3952,6673,6674,6675,6676,4833,6677,6678,6679,4167,6680,6681,6682,3198,6683,6684, # 6320 +6685,6686,6687,6688,6689,6690,6691,6692,6693,6694,6695,6696,6697,4834,6698,6699, # 6336 +6700,6701,6702,6703,6704,6705,6706,6707,6708,6709,6710,6711,6712,6713,6714,6715, # 6352 +6716,6717,6718,6719,6720,6721,6722,6723,6724,6725,6726,6727,6728,6729,6730,6731, # 6368 +6732,6733,6734,4443,6735,6736,6737,6738,6739,6740,6741,6742,6743,6744,6745,4444, # 6384 +6746,6747,6748,6749,6750,6751,6752,6753,6754,6755,6756,6757,6758,6759,6760,6761, # 6400 +6762,6763,6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777, # 6416 +6778,6779,6780,6781,4168,6782,6783,3442,6784,6785,6786,6787,6788,6789,6790,6791, # 6432 +4169,6792,6793,6794,6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806, # 6448 +6807,6808,6809,6810,6811,4835,6812,6813,6814,4445,6815,6816,4446,6817,6818,6819, # 6464 +6820,6821,6822,6823,6824,6825,6826,6827,6828,6829,6830,6831,6832,6833,6834,6835, # 6480 +3548,6836,6837,6838,6839,6840,6841,6842,6843,6844,6845,6846,4836,6847,6848,6849, # 6496 +6850,6851,6852,6853,6854,3953,6855,6856,6857,6858,6859,6860,6861,6862,6863,6864, # 6512 +6865,6866,6867,6868,6869,6870,6871,6872,6873,6874,6875,6876,6877,3199,6878,6879, # 6528 +6880,6881,6882,4447,6883,6884,6885,6886,6887,6888,6889,6890,6891,6892,6893,6894, # 6544 +6895,6896,6897,6898,6899,6900,6901,6902,6903,6904,4170,6905,6906,6907,6908,6909, # 6560 +6910,6911,6912,6913,6914,6915,6916,6917,6918,6919,6920,6921,6922,6923,6924,6925, # 6576 +6926,6927,4837,6928,6929,6930,6931,6932,6933,6934,6935,6936,3346,6937,6938,4838, # 6592 +6939,6940,6941,4448,6942,6943,6944,6945,6946,4449,6947,6948,6949,6950,6951,6952, # 6608 +6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,6967,6968, # 6624 +6969,6970,6971,6972,6973,6974,6975,6976,6977,6978,6979,6980,6981,6982,6983,6984, # 6640 +6985,6986,6987,6988,6989,6990,6991,6992,6993,6994,3671,6995,6996,6997,6998,4839, # 6656 +6999,7000,7001,7002,3549,7003,7004,7005,7006,7007,7008,7009,7010,7011,7012,7013, # 6672 +7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,7028,7029, # 6688 +7030,4840,7031,7032,7033,7034,7035,7036,7037,7038,4841,7039,7040,7041,7042,7043, # 6704 +7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,7059, # 6720 +7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,2985,7071,7072,7073,7074, # 6736 +7075,7076,7077,7078,7079,7080,4842,7081,7082,7083,7084,7085,7086,7087,7088,7089, # 6752 +7090,7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105, # 6768 +7106,7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,4450,7119,7120, # 6784 +7121,7122,7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136, # 6800 +7137,7138,7139,7140,7141,7142,7143,4843,7144,7145,7146,7147,7148,7149,7150,7151, # 6816 +7152,7153,7154,7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167, # 6832 +7168,7169,7170,7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183, # 6848 +7184,7185,7186,7187,7188,4171,4172,7189,7190,7191,7192,7193,7194,7195,7196,7197, # 6864 +7198,7199,7200,7201,7202,7203,7204,7205,7206,7207,7208,7209,7210,7211,7212,7213, # 6880 +7214,7215,7216,7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229, # 6896 +7230,7231,7232,7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245, # 6912 +7246,7247,7248,7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261, # 6928 +7262,7263,7264,7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277, # 6944 +7278,7279,7280,7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293, # 6960 +7294,7295,7296,4844,7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308, # 6976 +7309,7310,7311,7312,7313,7314,7315,7316,4451,7317,7318,7319,7320,7321,7322,7323, # 6992 +7324,7325,7326,7327,7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339, # 7008 +7340,7341,7342,7343,7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,4173,7354, # 7024 +7355,4845,7356,7357,7358,7359,7360,7361,7362,7363,7364,7365,7366,7367,7368,7369, # 7040 +7370,7371,7372,7373,7374,7375,7376,7377,7378,7379,7380,7381,7382,7383,7384,7385, # 7056 +7386,7387,7388,4846,7389,7390,7391,7392,7393,7394,7395,7396,7397,7398,7399,7400, # 7072 +7401,7402,7403,7404,7405,3672,7406,7407,7408,7409,7410,7411,7412,7413,7414,7415, # 7088 +7416,7417,7418,7419,7420,7421,7422,7423,7424,7425,7426,7427,7428,7429,7430,7431, # 7104 +7432,7433,7434,7435,7436,7437,7438,7439,7440,7441,7442,7443,7444,7445,7446,7447, # 7120 +7448,7449,7450,7451,7452,7453,4452,7454,3200,7455,7456,7457,7458,7459,7460,7461, # 7136 +7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,7472,7473,7474,4847,7475,7476, # 7152 +7477,3133,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,7488,7489,7490,7491, # 7168 +7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,3347,7503,7504,7505,7506, # 7184 +7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,7520,7521,4848, # 7200 +7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,7536,7537, # 7216 +7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,3801,4849,7550,7551, # 7232 +7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, # 7248 +7568,7569,3035,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582, # 7264 +7583,7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598, # 7280 +7599,7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614, # 7296 +7615,7616,4850,7617,7618,3802,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628, # 7312 +7629,7630,7631,7632,4851,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643, # 7328 +7644,7645,7646,7647,7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659, # 7344 +7660,7661,7662,7663,7664,7665,7666,7667,7668,7669,7670,4453,7671,7672,7673,7674, # 7360 +7675,7676,7677,7678,7679,7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690, # 7376 +7691,7692,7693,7694,7695,7696,7697,3443,7698,7699,7700,7701,7702,4454,7703,7704, # 7392 +7705,7706,7707,7708,7709,7710,7711,7712,7713,2472,7714,7715,7716,7717,7718,7719, # 7408 +7720,7721,7722,7723,7724,7725,7726,7727,7728,7729,7730,7731,3954,7732,7733,7734, # 7424 +7735,7736,7737,7738,7739,7740,7741,7742,7743,7744,7745,7746,7747,7748,7749,7750, # 7440 +3134,7751,7752,4852,7753,7754,7755,4853,7756,7757,7758,7759,7760,4174,7761,7762, # 7456 +7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,7777,7778, # 7472 +7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,7792,7793,7794, # 7488 +7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,4854,7806,7807,7808,7809, # 7504 +7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824,7825, # 7520 +4855,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7536 +7841,7842,7843,7844,7845,7846,7847,3955,7848,7849,7850,7851,7852,7853,7854,7855, # 7552 +7856,7857,7858,7859,7860,3444,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870, # 7568 +7871,7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886, # 7584 +7887,7888,7889,7890,7891,4175,7892,7893,7894,7895,7896,4856,4857,7897,7898,7899, # 7600 +7900,2598,7901,7902,7903,7904,7905,7906,7907,7908,4455,7909,7910,7911,7912,7913, # 7616 +7914,3201,7915,7916,7917,7918,7919,7920,7921,4858,7922,7923,7924,7925,7926,7927, # 7632 +7928,7929,7930,7931,7932,7933,7934,7935,7936,7937,7938,7939,7940,7941,7942,7943, # 7648 +7944,7945,7946,7947,7948,7949,7950,7951,7952,7953,7954,7955,7956,7957,7958,7959, # 7664 +7960,7961,7962,7963,7964,7965,7966,7967,7968,7969,7970,7971,7972,7973,7974,7975, # 7680 +7976,7977,7978,7979,7980,7981,4859,7982,7983,7984,7985,7986,7987,7988,7989,7990, # 7696 +7991,7992,7993,7994,7995,7996,4860,7997,7998,7999,8000,8001,8002,8003,8004,8005, # 7712 +8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,8016,4176,8017,8018,8019,8020, # 7728 +8021,8022,8023,4861,8024,8025,8026,8027,8028,8029,8030,8031,8032,8033,8034,8035, # 7744 +8036,4862,4456,8037,8038,8039,8040,4863,8041,8042,8043,8044,8045,8046,8047,8048, # 7760 +8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,8064, # 7776 +8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,8080, # 7792 +8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,8096, # 7808 +8097,8098,8099,4864,4177,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110, # 7824 +8111,8112,8113,8114,8115,8116,8117,8118,8119,8120,4178,8121,8122,8123,8124,8125, # 7840 +8126,8127,8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141, # 7856 +8142,8143,8144,8145,4865,4866,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155, # 7872 +8156,8157,8158,8159,8160,8161,8162,8163,8164,8165,4179,8166,8167,8168,8169,8170, # 7888 +8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181,4457,8182,8183,8184,8185, # 7904 +8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201, # 7920 +8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213,8214,8215,8216,8217, # 7936 +8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229,8230,8231,8232,8233, # 7952 +8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245,8246,8247,8248,8249, # 7968 +8250,8251,8252,8253,8254,8255,8256,3445,8257,8258,8259,8260,8261,8262,4458,8263, # 7984 +8264,8265,8266,8267,8268,8269,8270,8271,8272,4459,8273,8274,8275,8276,3550,8277, # 8000 +8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,4460,8290,8291,8292, # 8016 +8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,4867, # 8032 +8308,8309,8310,8311,8312,3551,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322, # 8048 +8323,8324,8325,8326,4868,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337, # 8064 +8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353, # 8080 +8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,4869,4461,8364,8365,8366,8367, # 8096 +8368,8369,8370,4870,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382, # 8112 +8383,8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398, # 8128 +8399,8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,4871,8411,8412,8413, # 8144 +8414,8415,8416,8417,8418,8419,8420,8421,8422,4462,8423,8424,8425,8426,8427,8428, # 8160 +8429,8430,8431,8432,8433,2986,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443, # 8176 +8444,8445,8446,8447,8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459, # 8192 +8460,8461,8462,8463,8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475, # 8208 +8476,8477,8478,4180,8479,8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490, # 8224 +8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506, # 8240 +8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522, # 8256 +8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538, # 8272 +8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554, # 8288 +8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,4872,8565,8566,8567,8568,8569, # 8304 +8570,8571,8572,8573,4873,8574,8575,8576,8577,8578,8579,8580,8581,8582,8583,8584, # 8320 +8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597,8598,8599,8600, # 8336 +8601,8602,8603,8604,8605,3803,8606,8607,8608,8609,8610,8611,8612,8613,4874,3804, # 8352 +8614,8615,8616,8617,8618,8619,8620,8621,3956,8622,8623,8624,8625,8626,8627,8628, # 8368 +8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,2865,8639,8640,8641,8642,8643, # 8384 +8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,4463,8657,8658, # 8400 +8659,4875,4876,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672, # 8416 +8673,8674,8675,8676,8677,8678,8679,8680,8681,4464,8682,8683,8684,8685,8686,8687, # 8432 +8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, # 8448 +8704,8705,8706,8707,8708,8709,2261,8710,8711,8712,8713,8714,8715,8716,8717,8718, # 8464 +8719,8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,4181, # 8480 +8734,8735,8736,8737,8738,8739,8740,8741,8742,8743,8744,8745,8746,8747,8748,8749, # 8496 +8750,8751,8752,8753,8754,8755,8756,8757,8758,8759,8760,8761,8762,8763,4877,8764, # 8512 +8765,8766,8767,8768,8769,8770,8771,8772,8773,8774,8775,8776,8777,8778,8779,8780, # 8528 +8781,8782,8783,8784,8785,8786,8787,8788,4878,8789,4879,8790,8791,8792,4880,8793, # 8544 +8794,8795,8796,8797,8798,8799,8800,8801,4881,8802,8803,8804,8805,8806,8807,8808, # 8560 +8809,8810,8811,8812,8813,8814,8815,3957,8816,8817,8818,8819,8820,8821,8822,8823, # 8576 +8824,8825,8826,8827,8828,8829,8830,8831,8832,8833,8834,8835,8836,8837,8838,8839, # 8592 +8840,8841,8842,8843,8844,8845,8846,8847,4882,8848,8849,8850,8851,8852,8853,8854, # 8608 +8855,8856,8857,8858,8859,8860,8861,8862,8863,8864,8865,8866,8867,8868,8869,8870, # 8624 +8871,8872,8873,8874,8875,8876,8877,8878,8879,8880,8881,8882,8883,8884,3202,8885, # 8640 +8886,8887,8888,8889,8890,8891,8892,8893,8894,8895,8896,8897,8898,8899,8900,8901, # 8656 +8902,8903,8904,8905,8906,8907,8908,8909,8910,8911,8912,8913,8914,8915,8916,8917, # 8672 +8918,8919,8920,8921,8922,8923,8924,4465,8925,8926,8927,8928,8929,8930,8931,8932, # 8688 +4883,8933,8934,8935,8936,8937,8938,8939,8940,8941,8942,8943,2214,8944,8945,8946, # 8704 +8947,8948,8949,8950,8951,8952,8953,8954,8955,8956,8957,8958,8959,8960,8961,8962, # 8720 +8963,8964,8965,4884,8966,8967,8968,8969,8970,8971,8972,8973,8974,8975,8976,8977, # 8736 +8978,8979,8980,8981,8982,8983,8984,8985,8986,8987,8988,8989,8990,8991,8992,4885, # 8752 +8993,8994,8995,8996,8997,8998,8999,9000,9001,9002,9003,9004,9005,9006,9007,9008, # 8768 +9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,4182,9022,9023, # 8784 +9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039, # 8800 +9040,9041,9042,9043,9044,9045,9046,9047,9048,9049,9050,9051,9052,9053,9054,9055, # 8816 +9056,9057,9058,9059,9060,9061,9062,9063,4886,9064,9065,9066,9067,9068,9069,4887, # 8832 +9070,9071,9072,9073,9074,9075,9076,9077,9078,9079,9080,9081,9082,9083,9084,9085, # 8848 +9086,9087,9088,9089,9090,9091,9092,9093,9094,9095,9096,9097,9098,9099,9100,9101, # 8864 +9102,9103,9104,9105,9106,9107,9108,9109,9110,9111,9112,9113,9114,9115,9116,9117, # 8880 +9118,9119,9120,9121,9122,9123,9124,9125,9126,9127,9128,9129,9130,9131,9132,9133, # 8896 +9134,9135,9136,9137,9138,9139,9140,9141,3958,9142,9143,9144,9145,9146,9147,9148, # 8912 +9149,9150,9151,4888,9152,9153,9154,9155,9156,9157,9158,9159,9160,9161,9162,9163, # 8928 +9164,9165,9166,9167,9168,9169,9170,9171,9172,9173,9174,9175,4889,9176,9177,9178, # 8944 +9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194, # 8960 +9195,9196,9197,9198,9199,9200,9201,9202,9203,4890,9204,9205,9206,9207,9208,9209, # 8976 +9210,9211,9212,9213,9214,9215,9216,9217,9218,9219,9220,9221,9222,4466,9223,9224, # 8992 +9225,9226,9227,9228,9229,9230,9231,9232,9233,9234,9235,9236,9237,9238,9239,9240, # 9008 +9241,9242,9243,9244,9245,4891,9246,9247,9248,9249,9250,9251,9252,9253,9254,9255, # 9024 +9256,9257,4892,9258,9259,9260,9261,4893,4894,9262,9263,9264,9265,9266,9267,9268, # 9040 +9269,9270,9271,9272,9273,4467,9274,9275,9276,9277,9278,9279,9280,9281,9282,9283, # 9056 +9284,9285,3673,9286,9287,9288,9289,9290,9291,9292,9293,9294,9295,9296,9297,9298, # 9072 +9299,9300,9301,9302,9303,9304,9305,9306,9307,9308,9309,9310,9311,9312,9313,9314, # 9088 +9315,9316,9317,9318,9319,9320,9321,9322,4895,9323,9324,9325,9326,9327,9328,9329, # 9104 +9330,9331,9332,9333,9334,9335,9336,9337,9338,9339,9340,9341,9342,9343,9344,9345, # 9120 +9346,9347,4468,9348,9349,9350,9351,9352,9353,9354,9355,9356,9357,9358,9359,9360, # 9136 +9361,9362,9363,9364,9365,9366,9367,9368,9369,9370,9371,9372,9373,4896,9374,4469, # 9152 +9375,9376,9377,9378,9379,4897,9380,9381,9382,9383,9384,9385,9386,9387,9388,9389, # 9168 +9390,9391,9392,9393,9394,9395,9396,9397,9398,9399,9400,9401,9402,9403,9404,9405, # 9184 +9406,4470,9407,2751,9408,9409,3674,3552,9410,9411,9412,9413,9414,9415,9416,9417, # 9200 +9418,9419,9420,9421,4898,9422,9423,9424,9425,9426,9427,9428,9429,3959,9430,9431, # 9216 +9432,9433,9434,9435,9436,4471,9437,9438,9439,9440,9441,9442,9443,9444,9445,9446, # 9232 +9447,9448,9449,9450,3348,9451,9452,9453,9454,9455,9456,9457,9458,9459,9460,9461, # 9248 +9462,9463,9464,9465,9466,9467,9468,9469,9470,9471,9472,4899,9473,9474,9475,9476, # 9264 +9477,4900,9478,9479,9480,9481,9482,9483,9484,9485,9486,9487,9488,3349,9489,9490, # 9280 +9491,9492,9493,9494,9495,9496,9497,9498,9499,9500,9501,9502,9503,9504,9505,9506, # 9296 +9507,9508,9509,9510,9511,9512,9513,9514,9515,9516,9517,9518,9519,9520,4901,9521, # 9312 +9522,9523,9524,9525,9526,4902,9527,9528,9529,9530,9531,9532,9533,9534,9535,9536, # 9328 +9537,9538,9539,9540,9541,9542,9543,9544,9545,9546,9547,9548,9549,9550,9551,9552, # 9344 +9553,9554,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568, # 9360 +9569,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,9581,9582,9583,9584, # 9376 +3805,9585,9586,9587,9588,9589,9590,9591,9592,9593,9594,9595,9596,9597,9598,9599, # 9392 +9600,9601,9602,4903,9603,9604,9605,9606,9607,4904,9608,9609,9610,9611,9612,9613, # 9408 +9614,4905,9615,9616,9617,9618,9619,9620,9621,9622,9623,9624,9625,9626,9627,9628, # 9424 +9629,9630,9631,9632,4906,9633,9634,9635,9636,9637,9638,9639,9640,9641,9642,9643, # 9440 +4907,9644,9645,9646,9647,9648,9649,9650,9651,9652,9653,9654,9655,9656,9657,9658, # 9456 +9659,9660,9661,9662,9663,9664,9665,9666,9667,9668,9669,9670,9671,9672,4183,9673, # 9472 +9674,9675,9676,9677,4908,9678,9679,9680,9681,4909,9682,9683,9684,9685,9686,9687, # 9488 +9688,9689,9690,4910,9691,9692,9693,3675,9694,9695,9696,2945,9697,9698,9699,9700, # 9504 +9701,9702,9703,9704,9705,4911,9706,9707,9708,9709,9710,9711,9712,9713,9714,9715, # 9520 +9716,9717,9718,9719,9720,9721,9722,9723,9724,9725,9726,9727,9728,9729,9730,9731, # 9536 +9732,9733,9734,9735,4912,9736,9737,9738,9739,9740,4913,9741,9742,9743,9744,9745, # 9552 +9746,9747,9748,9749,9750,9751,9752,9753,9754,9755,9756,9757,9758,4914,9759,9760, # 9568 +9761,9762,9763,9764,9765,9766,9767,9768,9769,9770,9771,9772,9773,9774,9775,9776, # 9584 +9777,9778,9779,9780,9781,9782,4915,9783,9784,9785,9786,9787,9788,9789,9790,9791, # 9600 +9792,9793,4916,9794,9795,9796,9797,9798,9799,9800,9801,9802,9803,9804,9805,9806, # 9616 +9807,9808,9809,9810,9811,9812,9813,9814,9815,9816,9817,9818,9819,9820,9821,9822, # 9632 +9823,9824,9825,9826,9827,9828,9829,9830,9831,9832,9833,9834,9835,9836,9837,9838, # 9648 +9839,9840,9841,9842,9843,9844,9845,9846,9847,9848,9849,9850,9851,9852,9853,9854, # 9664 +9855,9856,9857,9858,9859,9860,9861,9862,9863,9864,9865,9866,9867,9868,4917,9869, # 9680 +9870,9871,9872,9873,9874,9875,9876,9877,9878,9879,9880,9881,9882,9883,9884,9885, # 9696 +9886,9887,9888,9889,9890,9891,9892,4472,9893,9894,9895,9896,9897,3806,9898,9899, # 9712 +9900,9901,9902,9903,9904,9905,9906,9907,9908,9909,9910,9911,9912,9913,9914,4918, # 9728 +9915,9916,9917,4919,9918,9919,9920,9921,4184,9922,9923,9924,9925,9926,9927,9928, # 9744 +9929,9930,9931,9932,9933,9934,9935,9936,9937,9938,9939,9940,9941,9942,9943,9944, # 9760 +9945,9946,4920,9947,9948,9949,9950,9951,9952,9953,9954,9955,4185,9956,9957,9958, # 9776 +9959,9960,9961,9962,9963,9964,9965,4921,9966,9967,9968,4473,9969,9970,9971,9972, # 9792 +9973,9974,9975,9976,9977,4474,9978,9979,9980,9981,9982,9983,9984,9985,9986,9987, # 9808 +9988,9989,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000,10001,10002,10003, # 9824 +10004,10005,10006,10007,10008,10009,10010,10011,10012,10013,10014,10015,10016,10017,10018,10019, # 9840 +10020,10021,4922,10022,4923,10023,10024,10025,10026,10027,10028,10029,10030,10031,10032,10033, # 9856 +10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044,10045,10046,10047,10048,4924, # 9872 +10049,10050,10051,10052,10053,10054,10055,10056,10057,10058,10059,10060,10061,10062,10063,10064, # 9888 +10065,10066,10067,10068,10069,10070,10071,10072,10073,10074,10075,10076,10077,10078,10079,10080, # 9904 +10081,10082,10083,10084,10085,10086,10087,4475,10088,10089,10090,10091,10092,10093,10094,10095, # 9920 +10096,10097,4476,10098,10099,10100,10101,10102,10103,10104,10105,10106,10107,10108,10109,10110, # 9936 +10111,2174,10112,10113,10114,10115,10116,10117,10118,10119,10120,10121,10122,10123,10124,10125, # 9952 +10126,10127,10128,10129,10130,10131,10132,10133,10134,10135,10136,10137,10138,10139,10140,3807, # 9968 +4186,4925,10141,10142,10143,10144,10145,10146,10147,4477,4187,10148,10149,10150,10151,10152, # 9984 +10153,4188,10154,10155,10156,10157,10158,10159,10160,10161,4926,10162,10163,10164,10165,10166, #10000 +10167,10168,10169,10170,10171,10172,10173,10174,10175,10176,10177,10178,10179,10180,10181,10182, #10016 +10183,10184,10185,10186,10187,10188,10189,10190,10191,10192,3203,10193,10194,10195,10196,10197, #10032 +10198,10199,10200,4478,10201,10202,10203,10204,4479,10205,10206,10207,10208,10209,10210,10211, #10048 +10212,10213,10214,10215,10216,10217,10218,10219,10220,10221,10222,10223,10224,10225,10226,10227, #10064 +10228,10229,10230,10231,10232,10233,10234,4927,10235,10236,10237,10238,10239,10240,10241,10242, #10080 +10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258, #10096 +10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,4480, #10112 +4928,4929,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287, #10128 +10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303, #10144 +10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319, #10160 +10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,4930, #10176 +10335,10336,10337,10338,10339,10340,10341,10342,4931,10343,10344,10345,10346,10347,10348,10349, #10192 +10350,10351,10352,10353,10354,10355,3088,10356,2786,10357,10358,10359,10360,4189,10361,10362, #10208 +10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,4932,10376,10377, #10224 +10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,4933, #10240 +10393,10394,10395,4934,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407, #10256 +10408,10409,10410,10411,10412,3446,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422, #10272 +10423,4935,10424,10425,10426,10427,10428,10429,10430,4936,10431,10432,10433,10434,10435,10436, #10288 +10437,10438,10439,10440,10441,10442,10443,4937,10444,10445,10446,10447,4481,10448,10449,10450, #10304 +10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466, #10320 +10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482, #10336 +10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495,10496,10497,10498, #10352 +10499,10500,10501,10502,10503,10504,10505,4938,10506,10507,10508,10509,10510,2552,10511,10512, #10368 +10513,10514,10515,10516,3447,10517,10518,10519,10520,10521,10522,10523,10524,10525,10526,10527, #10384 +10528,10529,10530,10531,10532,10533,10534,10535,10536,10537,10538,10539,10540,10541,10542,10543, #10400 +4482,10544,4939,10545,10546,10547,10548,10549,10550,10551,10552,10553,10554,10555,10556,10557, #10416 +10558,10559,10560,10561,10562,10563,10564,10565,10566,10567,3676,4483,10568,10569,10570,10571, #10432 +10572,3448,10573,10574,10575,10576,10577,10578,10579,10580,10581,10582,10583,10584,10585,10586, #10448 +10587,10588,10589,10590,10591,10592,10593,10594,10595,10596,10597,10598,10599,10600,10601,10602, #10464 +10603,10604,10605,10606,10607,10608,10609,10610,10611,10612,10613,10614,10615,10616,10617,10618, #10480 +10619,10620,10621,10622,10623,10624,10625,10626,10627,4484,10628,10629,10630,10631,10632,4940, #10496 +10633,10634,10635,10636,10637,10638,10639,10640,10641,10642,10643,10644,10645,10646,10647,10648, #10512 +10649,10650,10651,10652,10653,10654,10655,10656,4941,10657,10658,10659,2599,10660,10661,10662, #10528 +10663,10664,10665,10666,3089,10667,10668,10669,10670,10671,10672,10673,10674,10675,10676,10677, #10544 +10678,10679,10680,4942,10681,10682,10683,10684,10685,10686,10687,10688,10689,10690,10691,10692, #10560 +10693,10694,10695,10696,10697,4485,10698,10699,10700,10701,10702,10703,10704,4943,10705,3677, #10576 +10706,10707,10708,10709,10710,10711,10712,4944,10713,10714,10715,10716,10717,10718,10719,10720, #10592 +10721,10722,10723,10724,10725,10726,10727,10728,4945,10729,10730,10731,10732,10733,10734,10735, #10608 +10736,10737,10738,10739,10740,10741,10742,10743,10744,10745,10746,10747,10748,10749,10750,10751, #10624 +10752,10753,10754,10755,10756,10757,10758,10759,10760,10761,4946,10762,10763,10764,10765,10766, #10640 +10767,4947,4948,10768,10769,10770,10771,10772,10773,10774,10775,10776,10777,10778,10779,10780, #10656 +10781,10782,10783,10784,10785,10786,10787,10788,10789,10790,10791,10792,10793,10794,10795,10796, #10672 +10797,10798,10799,10800,10801,10802,10803,10804,10805,10806,10807,10808,10809,10810,10811,10812, #10688 +10813,10814,10815,10816,10817,10818,10819,10820,10821,10822,10823,10824,10825,10826,10827,10828, #10704 +10829,10830,10831,10832,10833,10834,10835,10836,10837,10838,10839,10840,10841,10842,10843,10844, #10720 +10845,10846,10847,10848,10849,10850,10851,10852,10853,10854,10855,10856,10857,10858,10859,10860, #10736 +10861,10862,10863,10864,10865,10866,10867,10868,10869,10870,10871,10872,10873,10874,10875,10876, #10752 +10877,10878,4486,10879,10880,10881,10882,10883,10884,10885,4949,10886,10887,10888,10889,10890, #10768 +10891,10892,10893,10894,10895,10896,10897,10898,10899,10900,10901,10902,10903,10904,10905,10906, #10784 +10907,10908,10909,10910,10911,10912,10913,10914,10915,10916,10917,10918,10919,4487,10920,10921, #10800 +10922,10923,10924,10925,10926,10927,10928,10929,10930,10931,10932,4950,10933,10934,10935,10936, #10816 +10937,10938,10939,10940,10941,10942,10943,10944,10945,10946,10947,10948,10949,4488,10950,10951, #10832 +10952,10953,10954,10955,10956,10957,10958,10959,4190,10960,10961,10962,10963,10964,10965,10966, #10848 +10967,10968,10969,10970,10971,10972,10973,10974,10975,10976,10977,10978,10979,10980,10981,10982, #10864 +10983,10984,10985,10986,10987,10988,10989,10990,10991,10992,10993,10994,10995,10996,10997,10998, #10880 +10999,11000,11001,11002,11003,11004,11005,11006,3960,11007,11008,11009,11010,11011,11012,11013, #10896 +11014,11015,11016,11017,11018,11019,11020,11021,11022,11023,11024,11025,11026,11027,11028,11029, #10912 +11030,11031,11032,4951,11033,11034,11035,11036,11037,11038,11039,11040,11041,11042,11043,11044, #10928 +11045,11046,11047,4489,11048,11049,11050,11051,4952,11052,11053,11054,11055,11056,11057,11058, #10944 +4953,11059,11060,11061,11062,11063,11064,11065,11066,11067,11068,11069,11070,11071,4954,11072, #10960 +11073,11074,11075,11076,11077,11078,11079,11080,11081,11082,11083,11084,11085,11086,11087,11088, #10976 +11089,11090,11091,11092,11093,11094,11095,11096,11097,11098,11099,11100,11101,11102,11103,11104, #10992 +11105,11106,11107,11108,11109,11110,11111,11112,11113,11114,11115,3808,11116,11117,11118,11119, #11008 +11120,11121,11122,11123,11124,11125,11126,11127,11128,11129,11130,11131,11132,11133,11134,4955, #11024 +11135,11136,11137,11138,11139,11140,11141,11142,11143,11144,11145,11146,11147,11148,11149,11150, #11040 +11151,11152,11153,11154,11155,11156,11157,11158,11159,11160,11161,4956,11162,11163,11164,11165, #11056 +11166,11167,11168,11169,11170,11171,11172,11173,11174,11175,11176,11177,11178,11179,11180,4957, #11072 +11181,11182,11183,11184,11185,11186,4958,11187,11188,11189,11190,11191,11192,11193,11194,11195, #11088 +11196,11197,11198,11199,11200,3678,11201,11202,11203,11204,11205,11206,4191,11207,11208,11209, #11104 +11210,11211,11212,11213,11214,11215,11216,11217,11218,11219,11220,11221,11222,11223,11224,11225, #11120 +11226,11227,11228,11229,11230,11231,11232,11233,11234,11235,11236,11237,11238,11239,11240,11241, #11136 +11242,11243,11244,11245,11246,11247,11248,11249,11250,11251,4959,11252,11253,11254,11255,11256, #11152 +11257,11258,11259,11260,11261,11262,11263,11264,11265,11266,11267,11268,11269,11270,11271,11272, #11168 +11273,11274,11275,11276,11277,11278,11279,11280,11281,11282,11283,11284,11285,11286,11287,11288, #11184 +11289,11290,11291,11292,11293,11294,11295,11296,11297,11298,11299,11300,11301,11302,11303,11304, #11200 +11305,11306,11307,11308,11309,11310,11311,11312,11313,11314,3679,11315,11316,11317,11318,4490, #11216 +11319,11320,11321,11322,11323,11324,11325,11326,11327,11328,11329,11330,11331,11332,11333,11334, #11232 +11335,11336,11337,11338,11339,11340,11341,11342,11343,11344,11345,11346,11347,4960,11348,11349, #11248 +11350,11351,11352,11353,11354,11355,11356,11357,11358,11359,11360,11361,11362,11363,11364,11365, #11264 +11366,11367,11368,11369,11370,11371,11372,11373,11374,11375,11376,11377,3961,4961,11378,11379, #11280 +11380,11381,11382,11383,11384,11385,11386,11387,11388,11389,11390,11391,11392,11393,11394,11395, #11296 +11396,11397,4192,11398,11399,11400,11401,11402,11403,11404,11405,11406,11407,11408,11409,11410, #11312 +11411,4962,11412,11413,11414,11415,11416,11417,11418,11419,11420,11421,11422,11423,11424,11425, #11328 +11426,11427,11428,11429,11430,11431,11432,11433,11434,11435,11436,11437,11438,11439,11440,11441, #11344 +11442,11443,11444,11445,11446,11447,11448,11449,11450,11451,11452,11453,11454,11455,11456,11457, #11360 +11458,11459,11460,11461,11462,11463,11464,11465,11466,11467,11468,11469,4963,11470,11471,4491, #11376 +11472,11473,11474,11475,4964,11476,11477,11478,11479,11480,11481,11482,11483,11484,11485,11486, #11392 +11487,11488,11489,11490,11491,11492,4965,11493,11494,11495,11496,11497,11498,11499,11500,11501, #11408 +11502,11503,11504,11505,11506,11507,11508,11509,11510,11511,11512,11513,11514,11515,11516,11517, #11424 +11518,11519,11520,11521,11522,11523,11524,11525,11526,11527,11528,11529,3962,11530,11531,11532, #11440 +11533,11534,11535,11536,11537,11538,11539,11540,11541,11542,11543,11544,11545,11546,11547,11548, #11456 +11549,11550,11551,11552,11553,11554,11555,11556,11557,11558,11559,11560,11561,11562,11563,11564, #11472 +4193,4194,11565,11566,11567,11568,11569,11570,11571,11572,11573,11574,11575,11576,11577,11578, #11488 +11579,11580,11581,11582,11583,11584,11585,11586,11587,11588,11589,11590,11591,4966,4195,11592, #11504 +11593,11594,11595,11596,11597,11598,11599,11600,11601,11602,11603,11604,3090,11605,11606,11607, #11520 +11608,11609,11610,4967,11611,11612,11613,11614,11615,11616,11617,11618,11619,11620,11621,11622, #11536 +11623,11624,11625,11626,11627,11628,11629,11630,11631,11632,11633,11634,11635,11636,11637,11638, #11552 +11639,11640,11641,11642,11643,11644,11645,11646,11647,11648,11649,11650,11651,11652,11653,11654, #11568 +11655,11656,11657,11658,11659,11660,11661,11662,11663,11664,11665,11666,11667,11668,11669,11670, #11584 +11671,11672,11673,11674,4968,11675,11676,11677,11678,11679,11680,11681,11682,11683,11684,11685, #11600 +11686,11687,11688,11689,11690,11691,11692,11693,3809,11694,11695,11696,11697,11698,11699,11700, #11616 +11701,11702,11703,11704,11705,11706,11707,11708,11709,11710,11711,11712,11713,11714,11715,11716, #11632 +11717,11718,3553,11719,11720,11721,11722,11723,11724,11725,11726,11727,11728,11729,11730,4969, #11648 +11731,11732,11733,11734,11735,11736,11737,11738,11739,11740,4492,11741,11742,11743,11744,11745, #11664 +11746,11747,11748,11749,11750,11751,11752,4970,11753,11754,11755,11756,11757,11758,11759,11760, #11680 +11761,11762,11763,11764,11765,11766,11767,11768,11769,11770,11771,11772,11773,11774,11775,11776, #11696 +11777,11778,11779,11780,11781,11782,11783,11784,11785,11786,11787,11788,11789,11790,4971,11791, #11712 +11792,11793,11794,11795,11796,11797,4972,11798,11799,11800,11801,11802,11803,11804,11805,11806, #11728 +11807,11808,11809,11810,4973,11811,11812,11813,11814,11815,11816,11817,11818,11819,11820,11821, #11744 +11822,11823,11824,11825,11826,11827,11828,11829,11830,11831,11832,11833,11834,3680,3810,11835, #11760 +11836,4974,11837,11838,11839,11840,11841,11842,11843,11844,11845,11846,11847,11848,11849,11850, #11776 +11851,11852,11853,11854,11855,11856,11857,11858,11859,11860,11861,11862,11863,11864,11865,11866, #11792 +11867,11868,11869,11870,11871,11872,11873,11874,11875,11876,11877,11878,11879,11880,11881,11882, #11808 +11883,11884,4493,11885,11886,11887,11888,11889,11890,11891,11892,11893,11894,11895,11896,11897, #11824 +11898,11899,11900,11901,11902,11903,11904,11905,11906,11907,11908,11909,11910,11911,11912,11913, #11840 +11914,11915,4975,11916,11917,11918,11919,11920,11921,11922,11923,11924,11925,11926,11927,11928, #11856 +11929,11930,11931,11932,11933,11934,11935,11936,11937,11938,11939,11940,11941,11942,11943,11944, #11872 +11945,11946,11947,11948,11949,4976,11950,11951,11952,11953,11954,11955,11956,11957,11958,11959, #11888 +11960,11961,11962,11963,11964,11965,11966,11967,11968,11969,11970,11971,11972,11973,11974,11975, #11904 +11976,11977,11978,11979,11980,11981,11982,11983,11984,11985,11986,11987,4196,11988,11989,11990, #11920 +11991,11992,4977,11993,11994,11995,11996,11997,11998,11999,12000,12001,12002,12003,12004,12005, #11936 +12006,12007,12008,12009,12010,12011,12012,12013,12014,12015,12016,12017,12018,12019,12020,12021, #11952 +12022,12023,12024,12025,12026,12027,12028,12029,12030,12031,12032,12033,12034,12035,12036,12037, #11968 +12038,12039,12040,12041,12042,12043,12044,12045,12046,12047,12048,12049,12050,12051,12052,12053, #11984 +12054,12055,12056,12057,12058,12059,12060,12061,4978,12062,12063,12064,12065,12066,12067,12068, #12000 +12069,12070,12071,12072,12073,12074,12075,12076,12077,12078,12079,12080,12081,12082,12083,12084, #12016 +12085,12086,12087,12088,12089,12090,12091,12092,12093,12094,12095,12096,12097,12098,12099,12100, #12032 +12101,12102,12103,12104,12105,12106,12107,12108,12109,12110,12111,12112,12113,12114,12115,12116, #12048 +12117,12118,12119,12120,12121,12122,12123,4979,12124,12125,12126,12127,12128,4197,12129,12130, #12064 +12131,12132,12133,12134,12135,12136,12137,12138,12139,12140,12141,12142,12143,12144,12145,12146, #12080 +12147,12148,12149,12150,12151,12152,12153,12154,4980,12155,12156,12157,12158,12159,12160,4494, #12096 +12161,12162,12163,12164,3811,12165,12166,12167,12168,12169,4495,12170,12171,4496,12172,12173, #12112 +12174,12175,12176,3812,12177,12178,12179,12180,12181,12182,12183,12184,12185,12186,12187,12188, #12128 +12189,12190,12191,12192,12193,12194,12195,12196,12197,12198,12199,12200,12201,12202,12203,12204, #12144 +12205,12206,12207,12208,12209,12210,12211,12212,12213,12214,12215,12216,12217,12218,12219,12220, #12160 +12221,4981,12222,12223,12224,12225,12226,12227,12228,12229,12230,12231,12232,12233,12234,12235, #12176 +4982,12236,12237,12238,12239,12240,12241,12242,12243,12244,12245,4983,12246,12247,12248,12249, #12192 +4984,12250,12251,12252,12253,12254,12255,12256,12257,12258,12259,12260,12261,12262,12263,12264, #12208 +4985,12265,4497,12266,12267,12268,12269,12270,12271,12272,12273,12274,12275,12276,12277,12278, #12224 +12279,12280,12281,12282,12283,12284,12285,12286,12287,4986,12288,12289,12290,12291,12292,12293, #12240 +12294,12295,12296,2473,12297,12298,12299,12300,12301,12302,12303,12304,12305,12306,12307,12308, #12256 +12309,12310,12311,12312,12313,12314,12315,12316,12317,12318,12319,3963,12320,12321,12322,12323, #12272 +12324,12325,12326,12327,12328,12329,12330,12331,12332,4987,12333,12334,12335,12336,12337,12338, #12288 +12339,12340,12341,12342,12343,12344,12345,12346,12347,12348,12349,12350,12351,12352,12353,12354, #12304 +12355,12356,12357,12358,12359,3964,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369, #12320 +12370,3965,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384, #12336 +12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400, #12352 +12401,12402,12403,12404,12405,12406,12407,12408,4988,12409,12410,12411,12412,12413,12414,12415, #12368 +12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431, #12384 +12432,12433,12434,12435,12436,12437,12438,3554,12439,12440,12441,12442,12443,12444,12445,12446, #12400 +12447,12448,12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462, #12416 +12463,12464,4989,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477, #12432 +12478,12479,12480,4990,12481,12482,12483,12484,12485,12486,12487,12488,12489,4498,12490,12491, #12448 +12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507, #12464 +12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523, #12480 +12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,12535,12536,12537,12538,12539, #12496 +12540,12541,12542,12543,12544,12545,12546,12547,12548,12549,12550,12551,4991,12552,12553,12554, #12512 +12555,12556,12557,12558,12559,12560,12561,12562,12563,12564,12565,12566,12567,12568,12569,12570, #12528 +12571,12572,12573,12574,12575,12576,12577,12578,3036,12579,12580,12581,12582,12583,3966,12584, #12544 +12585,12586,12587,12588,12589,12590,12591,12592,12593,12594,12595,12596,12597,12598,12599,12600, #12560 +12601,12602,12603,12604,12605,12606,12607,12608,12609,12610,12611,12612,12613,12614,12615,12616, #12576 +12617,12618,12619,12620,12621,12622,12623,12624,12625,12626,12627,12628,12629,12630,12631,12632, #12592 +12633,12634,12635,12636,12637,12638,12639,12640,12641,12642,12643,12644,12645,12646,4499,12647, #12608 +12648,12649,12650,12651,12652,12653,12654,12655,12656,12657,12658,12659,12660,12661,12662,12663, #12624 +12664,12665,12666,12667,12668,12669,12670,12671,12672,12673,12674,12675,12676,12677,12678,12679, #12640 +12680,12681,12682,12683,12684,12685,12686,12687,12688,12689,12690,12691,12692,12693,12694,12695, #12656 +12696,12697,12698,4992,12699,12700,12701,12702,12703,12704,12705,12706,12707,12708,12709,12710, #12672 +12711,12712,12713,12714,12715,12716,12717,12718,12719,12720,12721,12722,12723,12724,12725,12726, #12688 +12727,12728,12729,12730,12731,12732,12733,12734,12735,12736,12737,12738,12739,12740,12741,12742, #12704 +12743,12744,12745,12746,12747,12748,12749,12750,12751,12752,12753,12754,12755,12756,12757,12758, #12720 +12759,12760,12761,12762,12763,12764,12765,12766,12767,12768,12769,12770,12771,12772,12773,12774, #12736 +12775,12776,12777,12778,4993,2175,12779,12780,12781,12782,12783,12784,12785,12786,4500,12787, #12752 +12788,12789,12790,12791,12792,12793,12794,12795,12796,12797,12798,12799,12800,12801,12802,12803, #12768 +12804,12805,12806,12807,12808,12809,12810,12811,12812,12813,12814,12815,12816,12817,12818,12819, #12784 +12820,12821,12822,12823,12824,12825,12826,4198,3967,12827,12828,12829,12830,12831,12832,12833, #12800 +12834,12835,12836,12837,12838,12839,12840,12841,12842,12843,12844,12845,12846,12847,12848,12849, #12816 +12850,12851,12852,12853,12854,12855,12856,12857,12858,12859,12860,12861,4199,12862,12863,12864, #12832 +12865,12866,12867,12868,12869,12870,12871,12872,12873,12874,12875,12876,12877,12878,12879,12880, #12848 +12881,12882,12883,12884,12885,12886,12887,4501,12888,12889,12890,12891,12892,12893,12894,12895, #12864 +12896,12897,12898,12899,12900,12901,12902,12903,12904,12905,12906,12907,12908,12909,12910,12911, #12880 +12912,4994,12913,12914,12915,12916,12917,12918,12919,12920,12921,12922,12923,12924,12925,12926, #12896 +12927,12928,12929,12930,12931,12932,12933,12934,12935,12936,12937,12938,12939,12940,12941,12942, #12912 +12943,12944,12945,12946,12947,12948,12949,12950,12951,12952,12953,12954,12955,12956,1772,12957, #12928 +12958,12959,12960,12961,12962,12963,12964,12965,12966,12967,12968,12969,12970,12971,12972,12973, #12944 +12974,12975,12976,12977,12978,12979,12980,12981,12982,12983,12984,12985,12986,12987,12988,12989, #12960 +12990,12991,12992,12993,12994,12995,12996,12997,4502,12998,4503,12999,13000,13001,13002,13003, #12976 +4504,13004,13005,13006,13007,13008,13009,13010,13011,13012,13013,13014,13015,13016,13017,13018, #12992 +13019,13020,13021,13022,13023,13024,13025,13026,13027,13028,13029,3449,13030,13031,13032,13033, #13008 +13034,13035,13036,13037,13038,13039,13040,13041,13042,13043,13044,13045,13046,13047,13048,13049, #13024 +13050,13051,13052,13053,13054,13055,13056,13057,13058,13059,13060,13061,13062,13063,13064,13065, #13040 +13066,13067,13068,13069,13070,13071,13072,13073,13074,13075,13076,13077,13078,13079,13080,13081, #13056 +13082,13083,13084,13085,13086,13087,13088,13089,13090,13091,13092,13093,13094,13095,13096,13097, #13072 +13098,13099,13100,13101,13102,13103,13104,13105,13106,13107,13108,13109,13110,13111,13112,13113, #13088 +13114,13115,13116,13117,13118,3968,13119,4995,13120,13121,13122,13123,13124,13125,13126,13127, #13104 +4505,13128,13129,13130,13131,13132,13133,13134,4996,4506,13135,13136,13137,13138,13139,4997, #13120 +13140,13141,13142,13143,13144,13145,13146,13147,13148,13149,13150,13151,13152,13153,13154,13155, #13136 +13156,13157,13158,13159,4998,13160,13161,13162,13163,13164,13165,13166,13167,13168,13169,13170, #13152 +13171,13172,13173,13174,13175,13176,4999,13177,13178,13179,13180,13181,13182,13183,13184,13185, #13168 +13186,13187,13188,13189,13190,13191,13192,13193,13194,13195,13196,13197,13198,13199,13200,13201, #13184 +13202,13203,13204,13205,13206,5000,13207,13208,13209,13210,13211,13212,13213,13214,13215,13216, #13200 +13217,13218,13219,13220,13221,13222,13223,13224,13225,13226,13227,4200,5001,13228,13229,13230, #13216 +13231,13232,13233,13234,13235,13236,13237,13238,13239,13240,3969,13241,13242,13243,13244,3970, #13232 +13245,13246,13247,13248,13249,13250,13251,13252,13253,13254,13255,13256,13257,13258,13259,13260, #13248 +13261,13262,13263,13264,13265,13266,13267,13268,3450,13269,13270,13271,13272,13273,13274,13275, #13264 +13276,5002,13277,13278,13279,13280,13281,13282,13283,13284,13285,13286,13287,13288,13289,13290, #13280 +13291,13292,13293,13294,13295,13296,13297,13298,13299,13300,13301,13302,3813,13303,13304,13305, #13296 +13306,13307,13308,13309,13310,13311,13312,13313,13314,13315,13316,13317,13318,13319,13320,13321, #13312 +13322,13323,13324,13325,13326,13327,13328,4507,13329,13330,13331,13332,13333,13334,13335,13336, #13328 +13337,13338,13339,13340,13341,5003,13342,13343,13344,13345,13346,13347,13348,13349,13350,13351, #13344 +13352,13353,13354,13355,13356,13357,13358,13359,13360,13361,13362,13363,13364,13365,13366,13367, #13360 +5004,13368,13369,13370,13371,13372,13373,13374,13375,13376,13377,13378,13379,13380,13381,13382, #13376 +13383,13384,13385,13386,13387,13388,13389,13390,13391,13392,13393,13394,13395,13396,13397,13398, #13392 +13399,13400,13401,13402,13403,13404,13405,13406,13407,13408,13409,13410,13411,13412,13413,13414, #13408 +13415,13416,13417,13418,13419,13420,13421,13422,13423,13424,13425,13426,13427,13428,13429,13430, #13424 +13431,13432,4508,13433,13434,13435,4201,13436,13437,13438,13439,13440,13441,13442,13443,13444, #13440 +13445,13446,13447,13448,13449,13450,13451,13452,13453,13454,13455,13456,13457,5005,13458,13459, #13456 +13460,13461,13462,13463,13464,13465,13466,13467,13468,13469,13470,4509,13471,13472,13473,13474, #13472 +13475,13476,13477,13478,13479,13480,13481,13482,13483,13484,13485,13486,13487,13488,13489,13490, #13488 +13491,13492,13493,13494,13495,13496,13497,13498,13499,13500,13501,13502,13503,13504,13505,13506, #13504 +13507,13508,13509,13510,13511,13512,13513,13514,13515,13516,13517,13518,13519,13520,13521,13522, #13520 +13523,13524,13525,13526,13527,13528,13529,13530,13531,13532,13533,13534,13535,13536,13537,13538, #13536 +13539,13540,13541,13542,13543,13544,13545,13546,13547,13548,13549,13550,13551,13552,13553,13554, #13552 +13555,13556,13557,13558,13559,13560,13561,13562,13563,13564,13565,13566,13567,13568,13569,13570, #13568 +13571,13572,13573,13574,13575,13576,13577,13578,13579,13580,13581,13582,13583,13584,13585,13586, #13584 +13587,13588,13589,13590,13591,13592,13593,13594,13595,13596,13597,13598,13599,13600,13601,13602, #13600 +13603,13604,13605,13606,13607,13608,13609,13610,13611,13612,13613,13614,13615,13616,13617,13618, #13616 +13619,13620,13621,13622,13623,13624,13625,13626,13627,13628,13629,13630,13631,13632,13633,13634, #13632 +13635,13636,13637,13638,13639,13640,13641,13642,5006,13643,13644,13645,13646,13647,13648,13649, #13648 +13650,13651,5007,13652,13653,13654,13655,13656,13657,13658,13659,13660,13661,13662,13663,13664, #13664 +13665,13666,13667,13668,13669,13670,13671,13672,13673,13674,13675,13676,13677,13678,13679,13680, #13680 +13681,13682,13683,13684,13685,13686,13687,13688,13689,13690,13691,13692,13693,13694,13695,13696, #13696 +13697,13698,13699,13700,13701,13702,13703,13704,13705,13706,13707,13708,13709,13710,13711,13712, #13712 +13713,13714,13715,13716,13717,13718,13719,13720,13721,13722,13723,13724,13725,13726,13727,13728, #13728 +13729,13730,13731,13732,13733,13734,13735,13736,13737,13738,13739,13740,13741,13742,13743,13744, #13744 +13745,13746,13747,13748,13749,13750,13751,13752,13753,13754,13755,13756,13757,13758,13759,13760, #13760 +13761,13762,13763,13764,13765,13766,13767,13768,13769,13770,13771,13772,13773,13774,3273,13775, #13776 +13776,13777,13778,13779,13780,13781,13782,13783,13784,13785,13786,13787,13788,13789,13790,13791, #13792 +13792,13793,13794,13795,13796,13797,13798,13799,13800,13801,13802,13803,13804,13805,13806,13807, #13808 +13808,13809,13810,13811,13812,13813,13814,13815,13816,13817,13818,13819,13820,13821,13822,13823, #13824 +13824,13825,13826,13827,13828,13829,13830,13831,13832,13833,13834,13835,13836,13837,13838,13839, #13840 +13840,13841,13842,13843,13844,13845,13846,13847,13848,13849,13850,13851,13852,13853,13854,13855, #13856 +13856,13857,13858,13859,13860,13861,13862,13863,13864,13865,13866,13867,13868,13869,13870,13871, #13872 +13872,13873,13874,13875,13876,13877,13878,13879,13880,13881,13882,13883,13884,13885,13886,13887, #13888 +13888,13889,13890,13891,13892,13893,13894,13895,13896,13897,13898,13899,13900,13901,13902,13903, #13904 +13904,13905,13906,13907,13908,13909,13910,13911,13912,13913,13914,13915,13916,13917,13918,13919, #13920 +13920,13921,13922,13923,13924,13925,13926,13927,13928,13929,13930,13931,13932,13933,13934,13935, #13936 +13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952 +13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968 +13968,13969,13970,13971,13972) #13973 diff --git a/fanficdownloader/chardet/big5prober.py b/fanficdownloader/chardet/big5prober.py new file mode 100644 index 00000000..e6b52aad --- /dev/null +++ b/fanficdownloader/chardet/big5prober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import Big5DistributionAnalysis +from mbcssm import Big5SMModel + +class Big5Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(Big5SMModel) + self._mDistributionAnalyzer = Big5DistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "Big5" diff --git a/fanficdownloader/chardet/chardistribution.py b/fanficdownloader/chardet/chardistribution.py new file mode 100644 index 00000000..b8933418 --- /dev/null +++ b/fanficdownloader/chardet/chardistribution.py @@ -0,0 +1,200 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants +from euctwfreq import EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, EUCTW_TYPICAL_DISTRIBUTION_RATIO +from euckrfreq import EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, EUCKR_TYPICAL_DISTRIBUTION_RATIO +from gb2312freq import GB2312CharToFreqOrder, GB2312_TABLE_SIZE, GB2312_TYPICAL_DISTRIBUTION_RATIO +from big5freq import Big5CharToFreqOrder, BIG5_TABLE_SIZE, BIG5_TYPICAL_DISTRIBUTION_RATIO +from jisfreq import JISCharToFreqOrder, JIS_TABLE_SIZE, JIS_TYPICAL_DISTRIBUTION_RATIO + +ENOUGH_DATA_THRESHOLD = 1024 +SURE_YES = 0.99 +SURE_NO = 0.01 + +class CharDistributionAnalysis: + def __init__(self): + self._mCharToFreqOrder = None # Mapping table to get frequency order from char order (get from GetOrder()) + self._mTableSize = None # Size of above table + self._mTypicalDistributionRatio = None # This is a constant value which varies from language to language, used in calculating confidence. See http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html for further detail. + self.reset() + + def reset(self): + """reset analyser, clear any state""" + self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made + self._mTotalChars = 0 # Total characters encountered + self._mFreqChars = 0 # The number of characters whose frequency order is less than 512 + + def feed(self, aStr, aCharLen): + """feed a character with known length""" + if aCharLen == 2: + # we only care about 2-bytes character in our distribution analysis + order = self.get_order(aStr) + else: + order = -1 + if order >= 0: + self._mTotalChars += 1 + # order is valid + if order < self._mTableSize: + if 512 > self._mCharToFreqOrder[order]: + self._mFreqChars += 1 + + def get_confidence(self): + """return confidence based on existing data""" + # if we didn't receive any character in our consideration range, return negative answer + if self._mTotalChars <= 0: + return SURE_NO + + if self._mTotalChars != self._mFreqChars: + r = self._mFreqChars / ((self._mTotalChars - self._mFreqChars) * self._mTypicalDistributionRatio) + if r < SURE_YES: + return r + + # normalize confidence (we don't want to be 100% sure) + return SURE_YES + + def got_enough_data(self): + # It is not necessary to receive all data to draw conclusion. For charset detection, + # certain amount of data is enough + return self._mTotalChars > ENOUGH_DATA_THRESHOLD + + def get_order(self, aStr): + # We do not handle characters based on the original encoding string, but + # convert this encoding string to a number, here called order. + # This allows multiple encodings of a language to share one frequency table. + return -1 + +class EUCTWDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = EUCTWCharToFreqOrder + self._mTableSize = EUCTW_TABLE_SIZE + self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for euc-TW encoding, we are interested + # first byte range: 0xc4 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xC4': + return 94 * (ord(aStr[0]) - 0xC4) + ord(aStr[1]) - 0xA1 + else: + return -1 + +class EUCKRDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = EUCKRCharToFreqOrder + self._mTableSize = EUCKR_TABLE_SIZE + self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for euc-KR encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xB0': + return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1 + else: + return -1; + +class GB2312DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = GB2312CharToFreqOrder + self._mTableSize = GB2312_TABLE_SIZE + self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for GB2312 encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if (aStr[0] >= '\xB0') and (aStr[1] >= '\xA1'): + return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1 + else: + return -1; + +class Big5DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = Big5CharToFreqOrder + self._mTableSize = BIG5_TABLE_SIZE + self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for big5 encoding, we are interested + # first byte range: 0xa4 -- 0xfe + # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xA4': + if aStr[1] >= '\xA1': + return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0xA1 + 63 + else: + return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0x40 + else: + return -1 + +class SJISDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = JISCharToFreqOrder + self._mTableSize = JIS_TABLE_SIZE + self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for sjis encoding, we are interested + # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe + # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe + # no validation needed here. State machine has done that + if (aStr[0] >= '\x81') and (aStr[0] <= '\x9F'): + order = 188 * (ord(aStr[0]) - 0x81) + elif (aStr[0] >= '\xE0') and (aStr[0] <= '\xEF'): + order = 188 * (ord(aStr[0]) - 0xE0 + 31) + else: + return -1; + order = order + ord(aStr[1]) - 0x40 + if aStr[1] > '\x7F': + order =- 1 + return order + +class EUCJPDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = JISCharToFreqOrder + self._mTableSize = JIS_TABLE_SIZE + self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aStr): + # for euc-JP encoding, we are interested + # first byte range: 0xa0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + if aStr[0] >= '\xA0': + return 94 * (ord(aStr[0]) - 0xA1) + ord(aStr[1]) - 0xa1 + else: + return -1 diff --git a/fanficdownloader/chardet/charsetgroupprober.py b/fanficdownloader/chardet/charsetgroupprober.py new file mode 100644 index 00000000..51880694 --- /dev/null +++ b/fanficdownloader/chardet/charsetgroupprober.py @@ -0,0 +1,96 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from charsetprober import CharSetProber + +class CharSetGroupProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mActiveNum = 0 + self._mProbers = [] + self._mBestGuessProber = None + + def reset(self): + CharSetProber.reset(self) + self._mActiveNum = 0 + for prober in self._mProbers: + if prober: + prober.reset() + prober.active = constants.True + self._mActiveNum += 1 + self._mBestGuessProber = None + + def get_charset_name(self): + if not self._mBestGuessProber: + self.get_confidence() + if not self._mBestGuessProber: return None +# self._mBestGuessProber = self._mProbers[0] + return self._mBestGuessProber.get_charset_name() + + def feed(self, aBuf): + for prober in self._mProbers: + if not prober: continue + if not prober.active: continue + st = prober.feed(aBuf) + if not st: continue + if st == constants.eFoundIt: + self._mBestGuessProber = prober + return self.get_state() + elif st == constants.eNotMe: + prober.active = constants.False + self._mActiveNum -= 1 + if self._mActiveNum <= 0: + self._mState = constants.eNotMe + return self.get_state() + return self.get_state() + + def get_confidence(self): + st = self.get_state() + if st == constants.eFoundIt: + return 0.99 + elif st == constants.eNotMe: + return 0.01 + bestConf = 0.0 + self._mBestGuessProber = None + for prober in self._mProbers: + if not prober: continue + if not prober.active: + if constants._debug: + sys.stderr.write(prober.get_charset_name() + ' not active\n') + continue + cf = prober.get_confidence() + if constants._debug: + sys.stderr.write('%s confidence = %s\n' % (prober.get_charset_name(), cf)) + if bestConf < cf: + bestConf = cf + self._mBestGuessProber = prober + if not self._mBestGuessProber: return 0.0 + return bestConf +# else: +# self._mBestGuessProber = self._mProbers[0] +# return self._mBestGuessProber.get_confidence() diff --git a/fanficdownloader/chardet/charsetprober.py b/fanficdownloader/chardet/charsetprober.py new file mode 100644 index 00000000..3ac1683c --- /dev/null +++ b/fanficdownloader/chardet/charsetprober.py @@ -0,0 +1,60 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, re + +class CharSetProber: + def __init__(self): + pass + + def reset(self): + self._mState = constants.eDetecting + + def get_charset_name(self): + return None + + def feed(self, aBuf): + pass + + def get_state(self): + return self._mState + + def get_confidence(self): + return 0.0 + + def filter_high_bit_only(self, aBuf): + aBuf = re.sub(r'([\x00-\x7F])+', ' ', aBuf) + return aBuf + + def filter_without_english_letters(self, aBuf): + aBuf = re.sub(r'([A-Za-z])+', ' ', aBuf) + return aBuf + + def filter_with_english_letters(self, aBuf): + # TODO + return aBuf diff --git a/fanficdownloader/chardet/codingstatemachine.py b/fanficdownloader/chardet/codingstatemachine.py new file mode 100644 index 00000000..452d3b0a --- /dev/null +++ b/fanficdownloader/chardet/codingstatemachine.py @@ -0,0 +1,56 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from constants import eStart, eError, eItsMe + +class CodingStateMachine: + def __init__(self, sm): + self._mModel = sm + self._mCurrentBytePos = 0 + self._mCurrentCharLen = 0 + self.reset() + + def reset(self): + self._mCurrentState = eStart + + def next_state(self, c): + # for each byte we get its class + # if it is first byte, we also get byte length + byteCls = self._mModel['classTable'][ord(c)] + if self._mCurrentState == eStart: + self._mCurrentBytePos = 0 + self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] + # from byte's class and stateTable, we get its next state + self._mCurrentState = self._mModel['stateTable'][self._mCurrentState * self._mModel['classFactor'] + byteCls] + self._mCurrentBytePos += 1 + return self._mCurrentState + + def get_current_charlen(self): + return self._mCurrentCharLen + + def get_coding_state_machine(self): + return self._mModel['name'] diff --git a/fanficdownloader/chardet/constants.py b/fanficdownloader/chardet/constants.py new file mode 100644 index 00000000..e94e226b --- /dev/null +++ b/fanficdownloader/chardet/constants.py @@ -0,0 +1,47 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +_debug = 0 + +eDetecting = 0 +eFoundIt = 1 +eNotMe = 2 + +eStart = 0 +eError = 1 +eItsMe = 2 + +SHORTCUT_THRESHOLD = 0.95 + +import __builtin__ +if not hasattr(__builtin__, 'False'): + False = 0 + True = 1 +else: + False = __builtin__.False + True = __builtin__.True diff --git a/fanficdownloader/chardet/escprober.py b/fanficdownloader/chardet/escprober.py new file mode 100644 index 00000000..572ed7be --- /dev/null +++ b/fanficdownloader/chardet/escprober.py @@ -0,0 +1,79 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel +from charsetprober import CharSetProber +from codingstatemachine import CodingStateMachine + +class EscCharSetProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mCodingSM = [ \ + CodingStateMachine(HZSMModel), + CodingStateMachine(ISO2022CNSMModel), + CodingStateMachine(ISO2022JPSMModel), + CodingStateMachine(ISO2022KRSMModel) + ] + self.reset() + + def reset(self): + CharSetProber.reset(self) + for codingSM in self._mCodingSM: + if not codingSM: continue + codingSM.active = constants.True + codingSM.reset() + self._mActiveSM = len(self._mCodingSM) + self._mDetectedCharset = None + + def get_charset_name(self): + return self._mDetectedCharset + + def get_confidence(self): + if self._mDetectedCharset: + return 0.99 + else: + return 0.00 + + def feed(self, aBuf): + for c in aBuf: + for codingSM in self._mCodingSM: + if not codingSM: continue + if not codingSM.active: continue + codingState = codingSM.next_state(c) + if codingState == constants.eError: + codingSM.active = constants.False + self._mActiveSM -= 1 + if self._mActiveSM <= 0: + self._mState = constants.eNotMe + return self.get_state() + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + self._mDetectedCharset = codingSM.get_coding_state_machine() + return self.get_state() + + return self.get_state() diff --git a/fanficdownloader/chardet/escsm.py b/fanficdownloader/chardet/escsm.py new file mode 100644 index 00000000..9fa22952 --- /dev/null +++ b/fanficdownloader/chardet/escsm.py @@ -0,0 +1,240 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from constants import eStart, eError, eItsMe + +HZ_cls = ( \ +1,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,0,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,4,0,5,2,0, # 78 - 7f +1,1,1,1,1,1,1,1, # 80 - 87 +1,1,1,1,1,1,1,1, # 88 - 8f +1,1,1,1,1,1,1,1, # 90 - 97 +1,1,1,1,1,1,1,1, # 98 - 9f +1,1,1,1,1,1,1,1, # a0 - a7 +1,1,1,1,1,1,1,1, # a8 - af +1,1,1,1,1,1,1,1, # b0 - b7 +1,1,1,1,1,1,1,1, # b8 - bf +1,1,1,1,1,1,1,1, # c0 - c7 +1,1,1,1,1,1,1,1, # c8 - cf +1,1,1,1,1,1,1,1, # d0 - d7 +1,1,1,1,1,1,1,1, # d8 - df +1,1,1,1,1,1,1,1, # e0 - e7 +1,1,1,1,1,1,1,1, # e8 - ef +1,1,1,1,1,1,1,1, # f0 - f7 +1,1,1,1,1,1,1,1, # f8 - ff +) + +HZ_st = ( \ +eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 + 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f + 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 + 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f +) + +HZCharLenTable = (0, 0, 0, 0, 0, 0) + +HZSMModel = {'classTable': HZ_cls, + 'classFactor': 6, + 'stateTable': HZ_st, + 'charLenTable': HZCharLenTable, + 'name': "HZ-GB-2312"} + +ISO2022CN_cls = ( \ +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,3,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,4,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022CN_st = ( \ +eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 + 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 +eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f +) + +ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022CNSMModel = {'classTable': ISO2022CN_cls, + 'classFactor': 9, + 'stateTable': ISO2022CN_st, + 'charLenTable': ISO2022CNCharLenTable, + 'name': "ISO-2022-CN"} + +ISO2022JP_cls = ( \ +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,2,2, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,7,0,0,0, # 20 - 27 +3,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +6,0,4,0,8,0,0,0, # 40 - 47 +0,9,5,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022JP_st = ( \ +eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f +eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 +eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f +eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f +eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 +) + +ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022JPSMModel = {'classTable': ISO2022JP_cls, + 'classFactor': 10, + 'stateTable': ISO2022JP_st, + 'charLenTable': ISO2022JPCharLenTable, + 'name': "ISO-2022-JP"} + +ISO2022KR_cls = ( \ +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,3,0,0,0, # 20 - 27 +0,4,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,5,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022KR_st = ( \ +eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 +eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f +eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 +) + +ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) + +ISO2022KRSMModel = {'classTable': ISO2022KR_cls, + 'classFactor': 6, + 'stateTable': ISO2022KR_st, + 'charLenTable': ISO2022KRCharLenTable, + 'name': "ISO-2022-KR"} diff --git a/fanficdownloader/chardet/eucjpprober.py b/fanficdownloader/chardet/eucjpprober.py new file mode 100644 index 00000000..46a8b38b --- /dev/null +++ b/fanficdownloader/chardet/eucjpprober.py @@ -0,0 +1,85 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from constants import eStart, eError, eItsMe +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import EUCJPDistributionAnalysis +from jpcntx import EUCJPContextAnalysis +from mbcssm import EUCJPSMModel + +class EUCJPProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCJPSMModel) + self._mDistributionAnalyzer = EUCJPDistributionAnalysis() + self._mContextAnalyzer = EUCJPContextAnalysis() + self.reset() + + def reset(self): + MultiByteCharSetProber.reset(self) + self._mContextAnalyzer.reset() + + def get_charset_name(self): + return "EUC-JP" + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n') + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mContextAnalyzer.feed(self._mLastChar, charLen) + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mContextAnalyzer.feed(aBuf[i-1:i+1], charLen) + self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if self._mContextAnalyzer.got_enough_data() and \ + (self.get_confidence() > constants.SHORTCUT_THRESHOLD): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + contxtCf = self._mContextAnalyzer.get_confidence() + distribCf = self._mDistributionAnalyzer.get_confidence() + return max(contxtCf, distribCf) diff --git a/fanficdownloader/chardet/euckrfreq.py b/fanficdownloader/chardet/euckrfreq.py new file mode 100644 index 00000000..1463fa1d --- /dev/null +++ b/fanficdownloader/chardet/euckrfreq.py @@ -0,0 +1,594 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Sampling from about 20M text materials include literature and computer technology + +# 128 --> 0.79 +# 256 --> 0.92 +# 512 --> 0.986 +# 1024 --> 0.99944 +# 2048 --> 0.99999 +# +# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24 +# Random Distribution Ration = 512 / (2350-512) = 0.279. +# +# Typical Distribution Ratio + +EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0 + +EUCKR_TABLE_SIZE = 2352 + +# Char to FreqOrder table , +EUCKRCharToFreqOrder = ( \ + 13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87, +1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398, +1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734, + 945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739, + 116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622, + 708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750, +1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856, + 344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205, + 709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779, +1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19, +1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567, +1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797, +1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802, +1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899, + 885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818, +1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409, +1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697, +1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770, +1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723, + 544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416, +1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300, + 119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083, + 893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857, +1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871, + 282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420, +1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885, + 127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889, + 0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893, +1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317, +1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841, +1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910, +1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610, + 269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375, +1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939, + 887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870, + 217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934, +1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888, +1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950, +1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065, +1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002, +1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965, +1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467, + 50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285, + 639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7, + 103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979, +1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985, + 818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994, +1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250, + 423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824, + 532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003, +2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745, + 619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61, + 191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023, +2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032, +2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912, +2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224, + 719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012, + 819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050, +2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681, + 499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414, +1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068, +2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075, +1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850, +2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606, +2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449, +1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452, + 949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112, +2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121, +2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130, + 22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274, + 962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139, +2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721, +1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298, +2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463, +2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747, +2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285, +2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187, +2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10, +2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350, +1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201, +2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972, +2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219, +2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233, +2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242, +2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247, +1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178, +1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255, +2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259, +1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262, +2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702, +1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273, + 295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541, +2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117, + 432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187, +2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800, + 808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312, +2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229, +2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315, + 501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484, +2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170, +1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335, + 425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601, +1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395, +2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354, +1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476, +2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035, + 416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498, +2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310, +1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389, +2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504, +1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505, +2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145, +1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624, + 593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700, +2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221, +2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377, + 644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448, + 915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485, +1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705, +1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465, + 291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471, +2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997, +2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486, + 797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494, + 434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771, + 585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323, +2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491, + 95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510, + 161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519, +2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532, +2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199, + 704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544, +2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247, +1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441, + 249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562, +2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362, +2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583, +2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465, + 3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431, + 202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151, + 974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596, +2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406, +2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611, +2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619, +1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628, +2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, + 670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256 +#Everything below is of no interest for detection purpose +2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658, +2659,2660,2661,2662,2663,2664,2665,2666,2667,2668,2669,2670,2671,2672,2673,2674, +2675,2676,2677,2678,2679,2680,2681,2682,2683,2684,2685,2686,2687,2688,2689,2690, +2691,2692,2693,2694,2695,2696,2697,2698,2699,1542, 880,2700,2701,2702,2703,2704, +2705,2706,2707,2708,2709,2710,2711,2712,2713,2714,2715,2716,2717,2718,2719,2720, +2721,2722,2723,2724,2725,1543,2726,2727,2728,2729,2730,2731,2732,1544,2733,2734, +2735,2736,2737,2738,2739,2740,2741,2742,2743,2744,2745,2746,2747,2748,2749,2750, +2751,2752,2753,2754,1545,2755,2756,2757,2758,2759,2760,2761,2762,2763,2764,2765, +2766,1546,2767,1547,2768,2769,2770,2771,2772,2773,2774,2775,2776,2777,2778,2779, +2780,2781,2782,2783,2784,2785,2786,1548,2787,2788,2789,1109,2790,2791,2792,2793, +2794,2795,2796,2797,2798,2799,2800,2801,2802,2803,2804,2805,2806,2807,2808,2809, +2810,2811,2812,1329,2813,2814,2815,2816,2817,2818,2819,2820,2821,2822,2823,2824, +2825,2826,2827,2828,2829,2830,2831,2832,2833,2834,2835,2836,2837,2838,2839,2840, +2841,2842,2843,2844,2845,2846,2847,2848,2849,2850,2851,2852,2853,2854,2855,2856, +1549,2857,2858,2859,2860,1550,2861,2862,1551,2863,2864,2865,2866,2867,2868,2869, +2870,2871,2872,2873,2874,1110,1330,2875,2876,2877,2878,2879,2880,2881,2882,2883, +2884,2885,2886,2887,2888,2889,2890,2891,2892,2893,2894,2895,2896,2897,2898,2899, +2900,2901,2902,2903,2904,2905,2906,2907,2908,2909,2910,2911,2912,2913,2914,2915, +2916,2917,2918,2919,2920,2921,2922,2923,2924,2925,2926,2927,2928,2929,2930,1331, +2931,2932,2933,2934,2935,2936,2937,2938,2939,2940,2941,2942,2943,1552,2944,2945, +2946,2947,2948,2949,2950,2951,2952,2953,2954,2955,2956,2957,2958,2959,2960,2961, +2962,2963,2964,1252,2965,2966,2967,2968,2969,2970,2971,2972,2973,2974,2975,2976, +2977,2978,2979,2980,2981,2982,2983,2984,2985,2986,2987,2988,2989,2990,2991,2992, +2993,2994,2995,2996,2997,2998,2999,3000,3001,3002,3003,3004,3005,3006,3007,3008, +3009,3010,3011,3012,1553,3013,3014,3015,3016,3017,1554,3018,1332,3019,3020,3021, +3022,3023,3024,3025,3026,3027,3028,3029,3030,3031,3032,3033,3034,3035,3036,3037, +3038,3039,3040,3041,3042,3043,3044,3045,3046,3047,3048,3049,3050,1555,3051,3052, +3053,1556,1557,3054,3055,3056,3057,3058,3059,3060,3061,3062,3063,3064,3065,3066, +3067,1558,3068,3069,3070,3071,3072,3073,3074,3075,3076,1559,3077,3078,3079,3080, +3081,3082,3083,1253,3084,3085,3086,3087,3088,3089,3090,3091,3092,3093,3094,3095, +3096,3097,3098,3099,3100,3101,3102,3103,3104,3105,3106,3107,3108,1152,3109,3110, +3111,3112,3113,1560,3114,3115,3116,3117,1111,3118,3119,3120,3121,3122,3123,3124, +3125,3126,3127,3128,3129,3130,3131,3132,3133,3134,3135,3136,3137,3138,3139,3140, +3141,3142,3143,3144,3145,3146,3147,3148,3149,3150,3151,3152,3153,3154,3155,3156, +3157,3158,3159,3160,3161,3162,3163,3164,3165,3166,3167,3168,3169,3170,3171,3172, +3173,3174,3175,3176,1333,3177,3178,3179,3180,3181,3182,3183,3184,3185,3186,3187, +3188,3189,1561,3190,3191,1334,3192,3193,3194,3195,3196,3197,3198,3199,3200,3201, +3202,3203,3204,3205,3206,3207,3208,3209,3210,3211,3212,3213,3214,3215,3216,3217, +3218,3219,3220,3221,3222,3223,3224,3225,3226,3227,3228,3229,3230,3231,3232,3233, +3234,1562,3235,3236,3237,3238,3239,3240,3241,3242,3243,3244,3245,3246,3247,3248, +3249,3250,3251,3252,3253,3254,3255,3256,3257,3258,3259,3260,3261,3262,3263,3264, +3265,3266,3267,3268,3269,3270,3271,3272,3273,3274,3275,3276,3277,1563,3278,3279, +3280,3281,3282,3283,3284,3285,3286,3287,3288,3289,3290,3291,3292,3293,3294,3295, +3296,3297,3298,3299,3300,3301,3302,3303,3304,3305,3306,3307,3308,3309,3310,3311, +3312,3313,3314,3315,3316,3317,3318,3319,3320,3321,3322,3323,3324,3325,3326,3327, +3328,3329,3330,3331,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341,3342,3343, +3344,3345,3346,3347,3348,3349,3350,3351,3352,3353,3354,3355,3356,3357,3358,3359, +3360,3361,3362,3363,3364,1335,3365,3366,3367,3368,3369,3370,3371,3372,3373,3374, +3375,3376,3377,3378,3379,3380,3381,3382,3383,3384,3385,3386,3387,1336,3388,3389, +3390,3391,3392,3393,3394,3395,3396,3397,3398,3399,3400,3401,3402,3403,3404,3405, +3406,3407,3408,3409,3410,3411,3412,3413,3414,1337,3415,3416,3417,3418,3419,1338, +3420,3421,3422,1564,1565,3423,3424,3425,3426,3427,3428,3429,3430,3431,1254,3432, +3433,3434,1339,3435,3436,3437,3438,3439,1566,3440,3441,3442,3443,3444,3445,3446, +3447,3448,3449,3450,3451,3452,3453,3454,1255,3455,3456,3457,3458,3459,1567,1191, +3460,1568,1569,3461,3462,3463,1570,3464,3465,3466,3467,3468,1571,3469,3470,3471, +3472,3473,1572,3474,3475,3476,3477,3478,3479,3480,3481,3482,3483,3484,3485,3486, +1340,3487,3488,3489,3490,3491,3492,1021,3493,3494,3495,3496,3497,3498,1573,3499, +1341,3500,3501,3502,3503,3504,3505,3506,3507,3508,3509,3510,3511,1342,3512,3513, +3514,3515,3516,1574,1343,3517,3518,3519,1575,3520,1576,3521,3522,3523,3524,3525, +3526,3527,3528,3529,3530,3531,3532,3533,3534,3535,3536,3537,3538,3539,3540,3541, +3542,3543,3544,3545,3546,3547,3548,3549,3550,3551,3552,3553,3554,3555,3556,3557, +3558,3559,3560,3561,3562,3563,3564,3565,3566,3567,3568,3569,3570,3571,3572,3573, +3574,3575,3576,3577,3578,3579,3580,1577,3581,3582,1578,3583,3584,3585,3586,3587, +3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603, +3604,1579,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618, +3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,1580,3630,3631,1581,3632, +3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,3643,3644,3645,3646,3647,3648, +3649,3650,3651,3652,3653,3654,3655,3656,1582,3657,3658,3659,3660,3661,3662,3663, +3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,3676,3677,3678,3679, +3680,3681,3682,3683,3684,3685,3686,3687,3688,3689,3690,3691,3692,3693,3694,3695, +3696,3697,3698,3699,3700,1192,3701,3702,3703,3704,1256,3705,3706,3707,3708,1583, +1257,3709,3710,3711,3712,3713,3714,3715,3716,1584,3717,3718,3719,3720,3721,3722, +3723,3724,3725,3726,3727,3728,3729,3730,3731,3732,3733,3734,3735,3736,3737,3738, +3739,3740,3741,3742,3743,3744,3745,1344,3746,3747,3748,3749,3750,3751,3752,3753, +3754,3755,3756,1585,3757,3758,3759,3760,3761,3762,3763,3764,3765,3766,1586,3767, +3768,3769,3770,3771,3772,3773,3774,3775,3776,3777,3778,1345,3779,3780,3781,3782, +3783,3784,3785,3786,3787,3788,3789,3790,3791,3792,3793,3794,3795,1346,1587,3796, +3797,1588,3798,3799,3800,3801,3802,3803,3804,3805,3806,1347,3807,3808,3809,3810, +3811,1589,3812,3813,3814,3815,3816,3817,3818,3819,3820,3821,1590,3822,3823,1591, +1348,3824,3825,3826,3827,3828,3829,3830,1592,3831,3832,1593,3833,3834,3835,3836, +3837,3838,3839,3840,3841,3842,3843,3844,1349,3845,3846,3847,3848,3849,3850,3851, +3852,3853,3854,3855,3856,3857,3858,1594,3859,3860,3861,3862,3863,3864,3865,3866, +3867,3868,3869,1595,3870,3871,3872,3873,1596,3874,3875,3876,3877,3878,3879,3880, +3881,3882,3883,3884,3885,3886,1597,3887,3888,3889,3890,3891,3892,3893,3894,3895, +1598,3896,3897,3898,1599,1600,3899,1350,3900,1351,3901,3902,1352,3903,3904,3905, +3906,3907,3908,3909,3910,3911,3912,3913,3914,3915,3916,3917,3918,3919,3920,3921, +3922,3923,3924,1258,3925,3926,3927,3928,3929,3930,3931,1193,3932,1601,3933,3934, +3935,3936,3937,3938,3939,3940,3941,3942,3943,1602,3944,3945,3946,3947,3948,1603, +3949,3950,3951,3952,3953,3954,3955,3956,3957,3958,3959,3960,3961,3962,3963,3964, +3965,1604,3966,3967,3968,3969,3970,3971,3972,3973,3974,3975,3976,3977,1353,3978, +3979,3980,3981,3982,3983,3984,3985,3986,3987,3988,3989,3990,3991,1354,3992,3993, +3994,3995,3996,3997,3998,3999,4000,4001,4002,4003,4004,4005,4006,4007,4008,4009, +4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,1355,4024, +4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040, +1605,4041,4042,4043,4044,4045,4046,4047,4048,4049,4050,4051,4052,4053,4054,4055, +4056,4057,4058,4059,4060,1606,4061,4062,4063,4064,1607,4065,4066,4067,4068,4069, +4070,4071,4072,4073,4074,4075,4076,1194,4077,4078,1608,4079,4080,4081,4082,4083, +4084,4085,4086,4087,1609,4088,4089,4090,4091,4092,4093,4094,4095,4096,4097,4098, +4099,4100,4101,4102,4103,4104,4105,4106,4107,4108,1259,4109,4110,4111,4112,4113, +4114,4115,4116,4117,4118,4119,4120,4121,4122,4123,4124,1195,4125,4126,4127,1610, +4128,4129,4130,4131,4132,4133,4134,4135,4136,4137,1356,4138,4139,4140,4141,4142, +4143,4144,1611,4145,4146,4147,4148,4149,4150,4151,4152,4153,4154,4155,4156,4157, +4158,4159,4160,4161,4162,4163,4164,4165,4166,4167,4168,4169,4170,4171,4172,4173, +4174,4175,4176,4177,4178,4179,4180,4181,4182,4183,4184,4185,4186,4187,4188,4189, +4190,4191,4192,4193,4194,4195,4196,4197,4198,4199,4200,4201,4202,4203,4204,4205, +4206,4207,4208,4209,4210,4211,4212,4213,4214,4215,4216,4217,4218,4219,1612,4220, +4221,4222,4223,4224,4225,4226,4227,1357,4228,1613,4229,4230,4231,4232,4233,4234, +4235,4236,4237,4238,4239,4240,4241,4242,4243,1614,4244,4245,4246,4247,4248,4249, +4250,4251,4252,4253,4254,4255,4256,4257,4258,4259,4260,4261,4262,4263,4264,4265, +4266,4267,4268,4269,4270,1196,1358,4271,4272,4273,4274,4275,4276,4277,4278,4279, +4280,4281,4282,4283,4284,4285,4286,4287,1615,4288,4289,4290,4291,4292,4293,4294, +4295,4296,4297,4298,4299,4300,4301,4302,4303,4304,4305,4306,4307,4308,4309,4310, +4311,4312,4313,4314,4315,4316,4317,4318,4319,4320,4321,4322,4323,4324,4325,4326, +4327,4328,4329,4330,4331,4332,4333,4334,1616,4335,4336,4337,4338,4339,4340,4341, +4342,4343,4344,4345,4346,4347,4348,4349,4350,4351,4352,4353,4354,4355,4356,4357, +4358,4359,4360,1617,4361,4362,4363,4364,4365,1618,4366,4367,4368,4369,4370,4371, +4372,4373,4374,4375,4376,4377,4378,4379,4380,4381,4382,4383,4384,4385,4386,4387, +4388,4389,4390,4391,4392,4393,4394,4395,4396,4397,4398,4399,4400,4401,4402,4403, +4404,4405,4406,4407,4408,4409,4410,4411,4412,4413,4414,4415,4416,1619,4417,4418, +4419,4420,4421,4422,4423,4424,4425,1112,4426,4427,4428,4429,4430,1620,4431,4432, +4433,4434,4435,4436,4437,4438,4439,4440,4441,4442,1260,1261,4443,4444,4445,4446, +4447,4448,4449,4450,4451,4452,4453,4454,4455,1359,4456,4457,4458,4459,4460,4461, +4462,4463,4464,4465,1621,4466,4467,4468,4469,4470,4471,4472,4473,4474,4475,4476, +4477,4478,4479,4480,4481,4482,4483,4484,4485,4486,4487,4488,4489,1055,4490,4491, +4492,4493,4494,4495,4496,4497,4498,4499,4500,4501,4502,4503,4504,4505,4506,4507, +4508,4509,4510,4511,4512,4513,4514,4515,4516,4517,4518,1622,4519,4520,4521,1623, +4522,4523,4524,4525,4526,4527,4528,4529,4530,4531,4532,4533,4534,4535,1360,4536, +4537,4538,4539,4540,4541,4542,4543, 975,4544,4545,4546,4547,4548,4549,4550,4551, +4552,4553,4554,4555,4556,4557,4558,4559,4560,4561,4562,4563,4564,4565,4566,4567, +4568,4569,4570,4571,1624,4572,4573,4574,4575,4576,1625,4577,4578,4579,4580,4581, +4582,4583,4584,1626,4585,4586,4587,4588,4589,4590,4591,4592,4593,4594,4595,1627, +4596,4597,4598,4599,4600,4601,4602,4603,4604,4605,4606,4607,4608,4609,4610,4611, +4612,4613,4614,4615,1628,4616,4617,4618,4619,4620,4621,4622,4623,4624,4625,4626, +4627,4628,4629,4630,4631,4632,4633,4634,4635,4636,4637,4638,4639,4640,4641,4642, +4643,4644,4645,4646,4647,4648,4649,1361,4650,4651,4652,4653,4654,4655,4656,4657, +4658,4659,4660,4661,1362,4662,4663,4664,4665,4666,4667,4668,4669,4670,4671,4672, +4673,4674,4675,4676,4677,4678,4679,4680,4681,4682,1629,4683,4684,4685,4686,4687, +1630,4688,4689,4690,4691,1153,4692,4693,4694,1113,4695,4696,4697,4698,4699,4700, +4701,4702,4703,4704,4705,4706,4707,4708,4709,4710,4711,1197,4712,4713,4714,4715, +4716,4717,4718,4719,4720,4721,4722,4723,4724,4725,4726,4727,4728,4729,4730,4731, +4732,4733,4734,4735,1631,4736,1632,4737,4738,4739,4740,4741,4742,4743,4744,1633, +4745,4746,4747,4748,4749,1262,4750,4751,4752,4753,4754,1363,4755,4756,4757,4758, +4759,4760,4761,4762,4763,4764,4765,4766,4767,4768,1634,4769,4770,4771,4772,4773, +4774,4775,4776,4777,4778,1635,4779,4780,4781,4782,4783,4784,4785,4786,4787,4788, +4789,1636,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799,4800,4801,4802,4803, +4804,4805,4806,1637,4807,4808,4809,1638,4810,4811,4812,4813,4814,4815,4816,4817, +4818,1639,4819,4820,4821,4822,4823,4824,4825,4826,4827,4828,4829,4830,4831,4832, +4833,1077,4834,4835,4836,4837,4838,4839,4840,4841,4842,4843,4844,4845,4846,4847, +4848,4849,4850,4851,4852,4853,4854,4855,4856,4857,4858,4859,4860,4861,4862,4863, +4864,4865,4866,4867,4868,4869,4870,4871,4872,4873,4874,4875,4876,4877,4878,4879, +4880,4881,4882,4883,1640,4884,4885,1641,4886,4887,4888,4889,4890,4891,4892,4893, +4894,4895,4896,4897,4898,4899,4900,4901,4902,4903,4904,4905,4906,4907,4908,4909, +4910,4911,1642,4912,4913,4914,1364,4915,4916,4917,4918,4919,4920,4921,4922,4923, +4924,4925,4926,4927,4928,4929,4930,4931,1643,4932,4933,4934,4935,4936,4937,4938, +4939,4940,4941,4942,4943,4944,4945,4946,4947,4948,4949,4950,4951,4952,4953,4954, +4955,4956,4957,4958,4959,4960,4961,4962,4963,4964,4965,4966,4967,4968,4969,4970, +4971,4972,4973,4974,4975,4976,4977,4978,4979,4980,1644,4981,4982,4983,4984,1645, +4985,4986,1646,4987,4988,4989,4990,4991,4992,4993,4994,4995,4996,4997,4998,4999, +5000,5001,5002,5003,5004,5005,1647,5006,1648,5007,5008,5009,5010,5011,5012,1078, +5013,5014,5015,5016,5017,5018,5019,5020,5021,5022,5023,5024,5025,5026,5027,5028, +1365,5029,5030,5031,5032,5033,5034,5035,5036,5037,5038,5039,1649,5040,5041,5042, +5043,5044,5045,1366,5046,5047,5048,5049,5050,5051,5052,5053,5054,5055,1650,5056, +5057,5058,5059,5060,5061,5062,5063,5064,5065,5066,5067,5068,5069,5070,5071,5072, +5073,5074,5075,5076,5077,1651,5078,5079,5080,5081,5082,5083,5084,5085,5086,5087, +5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102,5103, +5104,5105,5106,5107,5108,5109,5110,1652,5111,5112,5113,5114,5115,5116,5117,5118, +1367,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,1653,5130,5131,5132, +5133,5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148, +5149,1368,5150,1654,5151,1369,5152,5153,5154,5155,5156,5157,5158,5159,5160,5161, +5162,5163,5164,5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,5176,5177, +5178,1370,5179,5180,5181,5182,5183,5184,5185,5186,5187,5188,5189,5190,5191,5192, +5193,5194,5195,5196,5197,5198,1655,5199,5200,5201,5202,1656,5203,5204,5205,5206, +1371,5207,1372,5208,5209,5210,5211,1373,5212,5213,1374,5214,5215,5216,5217,5218, +5219,5220,5221,5222,5223,5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234, +5235,5236,5237,5238,5239,5240,5241,5242,5243,5244,5245,5246,5247,1657,5248,5249, +5250,5251,1658,1263,5252,5253,5254,5255,5256,1375,5257,5258,5259,5260,5261,5262, +5263,5264,5265,5266,5267,5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278, +5279,5280,5281,5282,5283,1659,5284,5285,5286,5287,5288,5289,5290,5291,5292,5293, +5294,5295,5296,5297,5298,5299,5300,1660,5301,5302,5303,5304,5305,5306,5307,5308, +5309,5310,5311,5312,5313,5314,5315,5316,5317,5318,5319,5320,5321,1376,5322,5323, +5324,5325,5326,5327,5328,5329,5330,5331,5332,5333,1198,5334,5335,5336,5337,5338, +5339,5340,5341,5342,5343,1661,5344,5345,5346,5347,5348,5349,5350,5351,5352,5353, +5354,5355,5356,5357,5358,5359,5360,5361,5362,5363,5364,5365,5366,5367,5368,5369, +5370,5371,5372,5373,5374,5375,5376,5377,5378,5379,5380,5381,5382,5383,5384,5385, +5386,5387,5388,5389,5390,5391,5392,5393,5394,5395,5396,5397,5398,1264,5399,5400, +5401,5402,5403,5404,5405,5406,5407,5408,5409,5410,5411,5412,1662,5413,5414,5415, +5416,1663,5417,5418,5419,5420,5421,5422,5423,5424,5425,5426,5427,5428,5429,5430, +5431,5432,5433,5434,5435,5436,5437,5438,1664,5439,5440,5441,5442,5443,5444,5445, +5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456,5457,5458,5459,5460,5461, +5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472,5473,5474,5475,5476,5477, +5478,1154,5479,5480,5481,5482,5483,5484,5485,1665,5486,5487,5488,5489,5490,5491, +5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504,5505,5506,5507, +5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520,5521,5522,5523, +5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536,5537,5538,5539, +5540,5541,5542,5543,5544,5545,5546,5547,5548,1377,5549,5550,5551,5552,5553,5554, +5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568,5569,5570, +1114,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584,5585, +5586,5587,5588,5589,5590,5591,5592,1378,5593,5594,5595,5596,5597,5598,5599,5600, +5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,1379,5615, +5616,5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631, +5632,5633,5634,1380,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646, +5647,5648,5649,1381,1056,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660, +1666,5661,5662,5663,5664,5665,5666,5667,5668,1667,5669,1668,5670,5671,5672,5673, +5674,5675,5676,5677,5678,1155,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688, +5689,5690,5691,5692,5693,5694,5695,5696,5697,5698,1669,5699,5700,5701,5702,5703, +5704,5705,1670,5706,5707,5708,5709,5710,1671,5711,5712,5713,5714,1382,5715,5716, +5717,5718,5719,5720,5721,5722,5723,5724,5725,1672,5726,5727,1673,1674,5728,5729, +5730,5731,5732,5733,5734,5735,5736,1675,5737,5738,5739,5740,5741,5742,5743,5744, +1676,5745,5746,5747,5748,5749,5750,5751,1383,5752,5753,5754,5755,5756,5757,5758, +5759,5760,5761,5762,5763,5764,5765,5766,5767,5768,1677,5769,5770,5771,5772,5773, +1678,5774,5775,5776, 998,5777,5778,5779,5780,5781,5782,5783,5784,5785,1384,5786, +5787,5788,5789,5790,5791,5792,5793,5794,5795,5796,5797,5798,5799,5800,1679,5801, +5802,5803,1115,1116,5804,5805,5806,5807,5808,5809,5810,5811,5812,5813,5814,5815, +5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828,5829,5830,5831, +5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844,5845,5846,5847, +5848,5849,5850,5851,5852,5853,5854,5855,1680,5856,5857,5858,5859,5860,5861,5862, +5863,5864,1681,5865,5866,5867,1682,5868,5869,5870,5871,5872,5873,5874,5875,5876, +5877,5878,5879,1683,5880,1684,5881,5882,5883,5884,1685,5885,5886,5887,5888,5889, +5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905, +5906,5907,1686,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, +5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,1687, +5936,5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951, +5952,1688,1689,5953,1199,5954,5955,5956,5957,5958,5959,5960,5961,1690,5962,5963, +5964,5965,5966,5967,5968,5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979, +5980,5981,1385,5982,1386,5983,5984,5985,5986,5987,5988,5989,5990,5991,5992,5993, +5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004,6005,6006,6007,6008,6009, +6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020,6021,6022,6023,6024,6025, +6026,6027,1265,6028,6029,1691,6030,6031,6032,6033,6034,6035,6036,6037,6038,6039, +6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055, +6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071, +6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084,1692,6085,6086, +6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100,6101,6102, +6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116,6117,6118, +6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,1693,6132,6133, +6134,6135,6136,1694,6137,6138,6139,6140,6141,1695,6142,6143,6144,6145,6146,6147, +6148,6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163, +6164,6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179, +6180,6181,6182,6183,6184,6185,1696,6186,6187,6188,6189,6190,6191,6192,6193,6194, +6195,6196,6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210, +6211,6212,6213,6214,6215,6216,6217,6218,6219,1697,6220,6221,6222,6223,6224,6225, +6226,6227,6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241, +6242,6243,6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,1698,6254,6255,6256, +6257,6258,6259,6260,6261,6262,6263,1200,6264,6265,6266,6267,6268,6269,6270,6271, #1024 +6272,6273,6274,6275,6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,6286,6287, +6288,6289,6290,6291,6292,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,1699, +6303,6304,1700,6305,6306,6307,6308,6309,6310,6311,6312,6313,6314,6315,6316,6317, +6318,6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333, +6334,6335,6336,6337,6338,6339,1701,6340,6341,6342,6343,6344,1387,6345,6346,6347, +6348,6349,6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363, +6364,6365,6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379, +6380,6381,6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395, +6396,6397,6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,6411, +6412,6413,1702,6414,6415,6416,6417,6418,6419,6420,6421,6422,1703,6423,6424,6425, +6426,6427,6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,1704,6439,6440, +6441,6442,6443,6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,6455,6456, +6457,6458,6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472, +6473,6474,6475,6476,6477,6478,6479,6480,6481,6482,6483,6484,6485,6486,6487,6488, +6489,6490,6491,6492,6493,6494,6495,6496,6497,6498,6499,6500,6501,6502,6503,1266, +6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516,6517,6518,6519, +6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532,6533,6534,6535, +6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548,6549,6550,6551, +1705,1706,6552,6553,6554,6555,6556,6557,6558,6559,6560,6561,6562,6563,6564,6565, +6566,6567,6568,6569,6570,6571,6572,6573,6574,6575,6576,6577,6578,6579,6580,6581, +6582,6583,6584,6585,6586,6587,6588,6589,6590,6591,6592,6593,6594,6595,6596,6597, +6598,6599,6600,6601,6602,6603,6604,6605,6606,6607,6608,6609,6610,6611,6612,6613, +6614,6615,6616,6617,6618,6619,6620,6621,6622,6623,6624,6625,6626,6627,6628,6629, +6630,6631,6632,6633,6634,6635,6636,6637,1388,6638,6639,6640,6641,6642,6643,6644, +1707,6645,6646,6647,6648,6649,6650,6651,6652,6653,6654,6655,6656,6657,6658,6659, +6660,6661,6662,6663,1708,6664,6665,6666,6667,6668,6669,6670,6671,6672,6673,6674, +1201,6675,6676,6677,6678,6679,6680,6681,6682,6683,6684,6685,6686,6687,6688,6689, +6690,6691,6692,6693,6694,6695,6696,6697,6698,6699,6700,6701,6702,6703,6704,6705, +6706,6707,6708,6709,6710,6711,6712,6713,6714,6715,6716,6717,6718,6719,6720,6721, +6722,6723,6724,6725,1389,6726,6727,6728,6729,6730,6731,6732,6733,6734,6735,6736, +1390,1709,6737,6738,6739,6740,6741,6742,1710,6743,6744,6745,6746,1391,6747,6748, +6749,6750,6751,6752,6753,6754,6755,6756,6757,1392,6758,6759,6760,6761,6762,6763, +6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777,6778,6779, +6780,1202,6781,6782,6783,6784,6785,6786,6787,6788,6789,6790,6791,6792,6793,6794, +6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806,6807,6808,6809,1711, +6810,6811,6812,6813,6814,6815,6816,6817,6818,6819,6820,6821,6822,6823,6824,6825, +6826,6827,6828,6829,6830,6831,6832,6833,6834,6835,6836,1393,6837,6838,6839,6840, +6841,6842,6843,6844,6845,6846,6847,6848,6849,6850,6851,6852,6853,6854,6855,6856, +6857,6858,6859,6860,6861,6862,6863,6864,6865,6866,6867,6868,6869,6870,6871,6872, +6873,6874,6875,6876,6877,6878,6879,6880,6881,6882,6883,6884,6885,6886,6887,6888, +6889,6890,6891,6892,6893,6894,6895,6896,6897,6898,6899,6900,6901,6902,1712,6903, +6904,6905,6906,6907,6908,6909,6910,1713,6911,6912,6913,6914,6915,6916,6917,6918, +6919,6920,6921,6922,6923,6924,6925,6926,6927,6928,6929,6930,6931,6932,6933,6934, +6935,6936,6937,6938,6939,6940,6941,6942,6943,6944,6945,6946,6947,6948,6949,6950, +6951,6952,6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966, +6967,6968,6969,6970,6971,6972,6973,6974,1714,6975,6976,6977,6978,6979,6980,6981, +6982,6983,6984,6985,6986,6987,6988,1394,6989,6990,6991,6992,6993,6994,6995,6996, +6997,6998,6999,7000,1715,7001,7002,7003,7004,7005,7006,7007,7008,7009,7010,7011, +7012,7013,7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027, +7028,1716,7029,7030,7031,7032,7033,7034,7035,7036,7037,7038,7039,7040,7041,7042, +7043,7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058, +7059,7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,7071,7072,7073,7074, +7075,7076,7077,7078,7079,7080,7081,7082,7083,7084,7085,7086,7087,7088,7089,7090, +7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105,7106, +7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,7119,7120,7121,7122, +7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136,7137,7138, +7139,7140,7141,7142,7143,7144,7145,7146,7147,7148,7149,7150,7151,7152,7153,7154, +7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167,7168,7169,7170, +7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183,7184,7185,7186, +7187,7188,7189,7190,7191,7192,7193,7194,7195,7196,7197,7198,7199,7200,7201,7202, +7203,7204,7205,7206,7207,1395,7208,7209,7210,7211,7212,7213,1717,7214,7215,7216, +7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229,7230,7231,7232, +7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245,7246,7247,7248, +7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261,7262,7263,7264, +7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277,7278,7279,7280, +7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293,7294,7295,7296, +7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308,7309,7310,7311,7312, +7313,1718,7314,7315,7316,7317,7318,7319,7320,7321,7322,7323,7324,7325,7326,7327, +7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339,7340,7341,7342,7343, +7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,7354,7355,7356,7357,7358,7359, +7360,7361,7362,7363,7364,7365,7366,7367,7368,7369,7370,7371,7372,7373,7374,7375, +7376,7377,7378,7379,7380,7381,7382,7383,7384,7385,7386,7387,7388,7389,7390,7391, +7392,7393,7394,7395,7396,7397,7398,7399,7400,7401,7402,7403,7404,7405,7406,7407, +7408,7409,7410,7411,7412,7413,7414,7415,7416,7417,7418,7419,7420,7421,7422,7423, +7424,7425,7426,7427,7428,7429,7430,7431,7432,7433,7434,7435,7436,7437,7438,7439, +7440,7441,7442,7443,7444,7445,7446,7447,7448,7449,7450,7451,7452,7453,7454,7455, +7456,7457,7458,7459,7460,7461,7462,7463,7464,7465,7466,7467,7468,7469,7470,7471, +7472,7473,7474,7475,7476,7477,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487, +7488,7489,7490,7491,7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,7503, +7504,7505,7506,7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519, +7520,7521,7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535, +7536,7537,7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,7550,7551, +7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, +7568,7569,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582,7583, +7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598,7599, +7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614,7615, +7616,7617,7618,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628,7629,7630,7631, +7632,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643,7644,7645,7646,7647, +7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659,7660,7661,7662,7663, +7664,7665,7666,7667,7668,7669,7670,7671,7672,7673,7674,7675,7676,7677,7678,7679, +7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690,7691,7692,7693,7694,7695, +7696,7697,7698,7699,7700,7701,7702,7703,7704,7705,7706,7707,7708,7709,7710,7711, +7712,7713,7714,7715,7716,7717,7718,7719,7720,7721,7722,7723,7724,7725,7726,7727, +7728,7729,7730,7731,7732,7733,7734,7735,7736,7737,7738,7739,7740,7741,7742,7743, +7744,7745,7746,7747,7748,7749,7750,7751,7752,7753,7754,7755,7756,7757,7758,7759, +7760,7761,7762,7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775, +7776,7777,7778,7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791, +7792,7793,7794,7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,7806,7807, +7808,7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823, +7824,7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839, +7840,7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855, +7856,7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871, +7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887, +7888,7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903, +7904,7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919, +7920,7921,7922,7923,7924,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, +7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, +7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, +7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, +7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, +8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, +8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, +8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, +8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, +8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, +8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, +8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, +8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, +8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, +8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, +8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, +8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, +8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, +8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, +8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, +8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, +8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, +8272,8273,8274,8275,8276,8277,8278,8279,8280,8281,8282,8283,8284,8285,8286,8287, +8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303, +8304,8305,8306,8307,8308,8309,8310,8311,8312,8313,8314,8315,8316,8317,8318,8319, +8320,8321,8322,8323,8324,8325,8326,8327,8328,8329,8330,8331,8332,8333,8334,8335, +8336,8337,8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351, +8352,8353,8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,8364,8365,8366,8367, +8368,8369,8370,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382,8383, +8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398,8399, +8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,8411,8412,8413,8414,8415, +8416,8417,8418,8419,8420,8421,8422,8423,8424,8425,8426,8427,8428,8429,8430,8431, +8432,8433,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443,8444,8445,8446,8447, +8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459,8460,8461,8462,8463, +8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475,8476,8477,8478,8479, +8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490,8491,8492,8493,8494,8495, +8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506,8507,8508,8509,8510,8511, +8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522,8523,8524,8525,8526,8527, +8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538,8539,8540,8541,8542,8543, +8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554,8555,8556,8557,8558,8559, +8560,8561,8562,8563,8564,8565,8566,8567,8568,8569,8570,8571,8572,8573,8574,8575, +8576,8577,8578,8579,8580,8581,8582,8583,8584,8585,8586,8587,8588,8589,8590,8591, +8592,8593,8594,8595,8596,8597,8598,8599,8600,8601,8602,8603,8604,8605,8606,8607, +8608,8609,8610,8611,8612,8613,8614,8615,8616,8617,8618,8619,8620,8621,8622,8623, +8624,8625,8626,8627,8628,8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,8639, +8640,8641,8642,8643,8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655, +8656,8657,8658,8659,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671, +8672,8673,8674,8675,8676,8677,8678,8679,8680,8681,8682,8683,8684,8685,8686,8687, +8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, +8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719, +8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735, +8736,8737,8738,8739,8740,8741) diff --git a/fanficdownloader/chardet/euckrprober.py b/fanficdownloader/chardet/euckrprober.py new file mode 100644 index 00000000..bd697ebf --- /dev/null +++ b/fanficdownloader/chardet/euckrprober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import EUCKRDistributionAnalysis +from mbcssm import EUCKRSMModel + +class EUCKRProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCKRSMModel) + self._mDistributionAnalyzer = EUCKRDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "EUC-KR" diff --git a/fanficdownloader/chardet/euctwfreq.py b/fanficdownloader/chardet/euctwfreq.py new file mode 100644 index 00000000..c0572095 --- /dev/null +++ b/fanficdownloader/chardet/euctwfreq.py @@ -0,0 +1,426 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# EUCTW frequency table +# Converted from big5 work +# by Taiwan's Mandarin Promotion Council +# <http:#www.edu.tw:81/mandr/> + +# 128 --> 0.42261 +# 256 --> 0.57851 +# 512 --> 0.74851 +# 1024 --> 0.89384 +# 2048 --> 0.97583 +# +# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98 +# Random Distribution Ration = 512/(5401-512)=0.105 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR + +EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75 + +# Char to FreqOrder table , +EUCTW_TABLE_SIZE = 8102 + +EUCTWCharToFreqOrder = ( \ + 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 +3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 +1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 + 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 +3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 +4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 +7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 + 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 + 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 + 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 +2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 +1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 +3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 + 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 +1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 +3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 +2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 + 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 +3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 +1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 +7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 + 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 +7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 +1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 + 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 + 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 +3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 +3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 + 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 +2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 +2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 + 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 + 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 +3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 +1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 +1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 +1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 +2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 + 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 +4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 +1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 +7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 +2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 + 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 + 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 + 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 + 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 +7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 + 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 +1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 + 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 + 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 +7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 +1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 + 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 +3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 +4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 +3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 + 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 + 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 +1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 +4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 +3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 +3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 +2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 +7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 +3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 +7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 +1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 +2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 +1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 + 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 +1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 +4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 +3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 + 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 + 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 + 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 +2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 +7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 +1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 +2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 +1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 +1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 +7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 +7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 +7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 +3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 +4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 +1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 +7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 +2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 +7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 +3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 +3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 +7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 +2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 +7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 + 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 +4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 +2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 +7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 +3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 +2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 +2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 + 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 +2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 +1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 +1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 +2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 +1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 +7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 +7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 +2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 +4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 +1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 +7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 + 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 +4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 + 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 +2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 + 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 +1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 +1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 + 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 +3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 +3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 +1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 +3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 +7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 +7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 +1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 +2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 +1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 +3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 +2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 +3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 +2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 +4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 +4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 +3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 + 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 +3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 + 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 +3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 +3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 +3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 +1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 +7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 + 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 +7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 +1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 + 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 +4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 +3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 + 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 +2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 +2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 +3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 +1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 +4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 +2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 +1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 +1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 +2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 +3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 +1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 +7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 +1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 +4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 +1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 + 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 +1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 +3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 +3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 +2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 +1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 +4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 + 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 +7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 +2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 +3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 +4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 + 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 +7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 +7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 +1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 +4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 +3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 +2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 +3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 +3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 +2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 +1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 +4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 +3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 +3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 +2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 +4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 +7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 +3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 +2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 +3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 +1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 +2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 +3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 +4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 +2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 +2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 +7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 +1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 +2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 +1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 +3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 +4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 +2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 +3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 +3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 +2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 +4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 +2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 +3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 +4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 +7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 +3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 + 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 +1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 +4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 +1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 +4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 +7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 + 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 +7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 +2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 +1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 +1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 +3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 + 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 + 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 + 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 +3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 +2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 + 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 +7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 +1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 +3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 +7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 +1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 +7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 +4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 +1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 +2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 +2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 +4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 + 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 + 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 +3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 +3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 +1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 +2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 +7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 +1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 +1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 +3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 + 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 +1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 +4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 +7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 +2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 +3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 + 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 +1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 +2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 +2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 +7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 +7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 +7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 +2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 +2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 +1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 +4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 +3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 +3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 +4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 +4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 +2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 +2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 +7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 +4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 +7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 +2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 +1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 +3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 +4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 +2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 + 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 +2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 +1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 +2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 +2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 +4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 +7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 +1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 +3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 +7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 +1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 +8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 +2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 +8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 +2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 +2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 +8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 +8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 +8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 + 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 +8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 +4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 +3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 +8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 +1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 +8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 + 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 +1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 + 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 +4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 +1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 +4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 +1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 + 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 +3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 +4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 +8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 + 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 +3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 + 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 +2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 +#Everything below is of no interest for detection purpose +2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, # 8118 +2493,3016,3734,8123,8124,2192,8125,8126,2162,8127,8128,8129,8130,8131,8132,8133, # 8134 +8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,8144,8145,8146,8147,8148,8149, # 8150 +8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,8160,8161,8162,8163,8164,8165, # 8166 +8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181, # 8182 +8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197, # 8198 +8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213, # 8214 +8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229, # 8230 +8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245, # 8246 +8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,8256,8257,8258,8259,8260,8261, # 8262 +8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,8272,8273,8274,8275,8276,8277, # 8278 +8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,8290,8291,8292,8293, # 8294 +8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,8308,8309, # 8310 +8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322,8323,8324,8325, # 8326 +8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337,8338,8339,8340,8341, # 8342 +8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353,8354,8355,8356,8357, # 8358 +8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,8368,8369,8370,8371,8372,8373, # 8374 +8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,8384,8385,8386,8387,8388,8389, # 8390 +8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,8400,8401,8402,8403,8404,8405, # 8406 +8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,8416,8417,8418,8419,8420,8421, # 8422 +8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,8432,8433,8434,8435,8436,8437, # 8438 +8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,8448,8449,8450,8451,8452,8453, # 8454 +8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,8464,8465,8466,8467,8468,8469, # 8470 +8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,8480,8481,8482,8483,8484,8485, # 8486 +8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501, # 8502 +8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517, # 8518 +8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533, # 8534 +8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549, # 8550 +8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,8565, # 8566 +8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,8576,8577,8578,8579,8580,8581, # 8582 +8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597, # 8598 +8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,8608,8609,8610,8611,8612,8613, # 8614 +8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,8624,8625,8626,8627,8628,8629, # 8630 +8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8644,8645, # 8646 +8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,8657,8658,8659,8660,8661, # 8662 +8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672,8673,8674,8675,8676,8677, # 8678 +8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, # 8694 +8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710 +8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726 +8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742 diff --git a/fanficdownloader/chardet/euctwprober.py b/fanficdownloader/chardet/euctwprober.py new file mode 100644 index 00000000..b073f134 --- /dev/null +++ b/fanficdownloader/chardet/euctwprober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import EUCTWDistributionAnalysis +from mbcssm import EUCTWSMModel + +class EUCTWProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCTWSMModel) + self._mDistributionAnalyzer = EUCTWDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "EUC-TW" diff --git a/fanficdownloader/chardet/gb2312freq.py b/fanficdownloader/chardet/gb2312freq.py new file mode 100644 index 00000000..7a4d5a1b --- /dev/null +++ b/fanficdownloader/chardet/gb2312freq.py @@ -0,0 +1,471 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# GB2312 most frequently used character table +# +# Char to FreqOrder table , from hz6763 + +# 512 --> 0.79 -- 0.79 +# 1024 --> 0.92 -- 0.13 +# 2048 --> 0.98 -- 0.06 +# 6768 --> 1.00 -- 0.02 +# +# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79 +# Random Distribution Ration = 512 / (3755 - 512) = 0.157 +# +# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR + +GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9 + +GB2312_TABLE_SIZE = 3760 + +GB2312CharToFreqOrder = ( \ +1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205, +2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842, +2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409, + 249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670, +1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820, +1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585, + 152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566, +1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575, +2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853, +3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061, + 544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155, +1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406, + 927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816, +2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606, + 360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023, +2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414, +1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513, +3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052, + 198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570, +1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575, + 253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250, +2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506, +1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26, +3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835, +1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686, +2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054, +1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894, + 585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105, +3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403, +3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694, + 252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873, +3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940, + 836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121, +1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648, +3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992, +2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233, +1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157, + 755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807, +1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094, +4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258, + 887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478, +3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152, +3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909, + 509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272, +1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221, +2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252, +1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301, +1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254, + 389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070, +3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461, +3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360, +4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124, + 296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535, +3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243, +1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713, +1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071, +4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442, + 215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946, + 814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257, +3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180, +1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427, + 602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781, +1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724, +2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937, + 930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943, + 432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789, + 396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552, +3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246, +4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451, +3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310, + 750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860, +2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297, +2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780, +2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745, + 776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936, +2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032, + 968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657, + 163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414, + 220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976, +3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436, +2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254, +2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536, +1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238, + 18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059, +2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741, + 90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447, + 286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601, +1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269, +1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894, + 915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173, + 681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994, +1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956, +2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437, +3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154, +2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240, +2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143, +2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634, +3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472, +1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541, +1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143, +2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312, +1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414, +3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754, +1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424, +1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302, +3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739, + 795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004, +2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484, +1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739, +4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535, +1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641, +1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307, +3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573, +1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533, + 47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965, + 504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99, +1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280, + 160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505, +1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012, +1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039, + 744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982, +3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530, +4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392, +3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656, +2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220, +2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766, +1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535, +3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728, +2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338, +1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627, +1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885, + 125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411, +2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671, +2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162, +3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774, +4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524, +3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346, + 180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040, +3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188, +2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280, +1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131, + 259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947, + 774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970, +3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814, +4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557, +2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997, +1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972, +1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369, + 766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376, +1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480, +3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610, + 955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128, + 642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769, +1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207, + 57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392, +1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623, + 193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782, +2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650, + 158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478, +2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773, +2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007, +1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323, +1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598, +2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961, + 819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302, +1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409, +1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683, +2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191, +2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616, +3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302, +1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774, +4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147, + 571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731, + 845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464, +3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377, +1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315, + 470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557, +3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903, +1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060, +4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261, +1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092, +2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810, +1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708, + 498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658, +1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871, +3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503, + 448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229, +2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112, + 136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504, +1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389, +1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27, +1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542, +3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861, +2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845, +3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700, +3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469, +3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582, + 996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999, +2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274, + 786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020, +2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601, + 12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628, +1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31, + 475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668, + 233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778, +1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169, +3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667, +3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881, +1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276, +1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320, +3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751, +2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432, +2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772, +1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843, +3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116, + 451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904, +4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652, +1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664, +2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770, +3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283, +3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626, +1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713, + 768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333, + 391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062, +2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555, + 931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014, +1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510, + 386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015, +1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459, +1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390, +1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238, +1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232, +1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624, + 381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189, + 852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, # last 512 +#Everything below is of no interest for detection purpose +5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636, +5509,3257,5510,5973,5445,5872,4941,4403,3174,4627,5873,6276,2286,4230,5446,5874, +5122,6102,6103,4162,5447,5123,5323,4849,6277,3980,3851,5066,4246,5774,5067,6278, +3001,2807,5695,3346,5775,5974,5158,5448,6487,5975,5976,5776,3598,6279,5696,4806, +4211,4154,6280,6488,6489,6490,6281,4212,5037,3374,4171,6491,4562,4807,4722,4827, +5977,6104,4532,4079,5159,5324,5160,4404,3858,5359,5875,3975,4288,4610,3486,4512, +5325,3893,5360,6282,6283,5560,2522,4231,5978,5186,5449,2569,3878,6284,5401,3578, +4415,6285,4656,5124,5979,2506,4247,4449,3219,3417,4334,4969,4329,6492,4576,4828, +4172,4416,4829,5402,6286,3927,3852,5361,4369,4830,4477,4867,5876,4173,6493,6105, +4657,6287,6106,5877,5450,6494,4155,4868,5451,3700,5629,4384,6288,6289,5878,3189, +4881,6107,6290,6495,4513,6496,4692,4515,4723,5100,3356,6497,6291,3810,4080,5561, +3570,4430,5980,6498,4355,5697,6499,4724,6108,6109,3764,4050,5038,5879,4093,3226, +6292,5068,5217,4693,3342,5630,3504,4831,4377,4466,4309,5698,4431,5777,6293,5778, +4272,3706,6110,5326,3752,4676,5327,4273,5403,4767,5631,6500,5699,5880,3475,5039, +6294,5562,5125,4348,4301,4482,4068,5126,4593,5700,3380,3462,5981,5563,3824,5404, +4970,5511,3825,4738,6295,6501,5452,4516,6111,5881,5564,6502,6296,5982,6503,4213, +4163,3454,6504,6112,4009,4450,6113,4658,6297,6114,3035,6505,6115,3995,4904,4739, +4563,4942,4110,5040,3661,3928,5362,3674,6506,5292,3612,4791,5565,4149,5983,5328, +5259,5021,4725,4577,4564,4517,4364,6298,5405,4578,5260,4594,4156,4157,5453,3592, +3491,6507,5127,5512,4709,4922,5984,5701,4726,4289,6508,4015,6116,5128,4628,3424, +4241,5779,6299,4905,6509,6510,5454,5702,5780,6300,4365,4923,3971,6511,5161,3270, +3158,5985,4100, 867,5129,5703,6117,5363,3695,3301,5513,4467,6118,6512,5455,4232, +4242,4629,6513,3959,4478,6514,5514,5329,5986,4850,5162,5566,3846,4694,6119,5456, +4869,5781,3779,6301,5704,5987,5515,4710,6302,5882,6120,4392,5364,5705,6515,6121, +6516,6517,3736,5988,5457,5989,4695,2457,5883,4551,5782,6303,6304,6305,5130,4971, +6122,5163,6123,4870,3263,5365,3150,4871,6518,6306,5783,5069,5706,3513,3498,4409, +5330,5632,5366,5458,5459,3991,5990,4502,3324,5991,5784,3696,4518,5633,4119,6519, +4630,5634,4417,5707,4832,5992,3418,6124,5993,5567,4768,5218,6520,4595,3458,5367, +6125,5635,6126,4202,6521,4740,4924,6307,3981,4069,4385,6308,3883,2675,4051,3834, +4302,4483,5568,5994,4972,4101,5368,6309,5164,5884,3922,6127,6522,6523,5261,5460, +5187,4164,5219,3538,5516,4111,3524,5995,6310,6311,5369,3181,3386,2484,5188,3464, +5569,3627,5708,6524,5406,5165,4677,4492,6312,4872,4851,5885,4468,5996,6313,5709, +5710,6128,2470,5886,6314,5293,4882,5785,3325,5461,5101,6129,5711,5786,6525,4906, +6526,6527,4418,5887,5712,4808,2907,3701,5713,5888,6528,3765,5636,5331,6529,6530, +3593,5889,3637,4943,3692,5714,5787,4925,6315,6130,5462,4405,6131,6132,6316,5262, +6531,6532,5715,3859,5716,5070,4696,5102,3929,5788,3987,4792,5997,6533,6534,3920, +4809,5000,5998,6535,2974,5370,6317,5189,5263,5717,3826,6536,3953,5001,4883,3190, +5463,5890,4973,5999,4741,6133,6134,3607,5570,6000,4711,3362,3630,4552,5041,6318, +6001,2950,2953,5637,4646,5371,4944,6002,2044,4120,3429,6319,6537,5103,4833,6538, +6539,4884,4647,3884,6003,6004,4758,3835,5220,5789,4565,5407,6540,6135,5294,4697, +4852,6320,6321,3206,4907,6541,6322,4945,6542,6136,6543,6323,6005,4631,3519,6544, +5891,6545,5464,3784,5221,6546,5571,4659,6547,6324,6137,5190,6548,3853,6549,4016, +4834,3954,6138,5332,3827,4017,3210,3546,4469,5408,5718,3505,4648,5790,5131,5638, +5791,5465,4727,4318,6325,6326,5792,4553,4010,4698,3439,4974,3638,4335,3085,6006, +5104,5042,5166,5892,5572,6327,4356,4519,5222,5573,5333,5793,5043,6550,5639,5071, +4503,6328,6139,6551,6140,3914,3901,5372,6007,5640,4728,4793,3976,3836,4885,6552, +4127,6553,4451,4102,5002,6554,3686,5105,6555,5191,5072,5295,4611,5794,5296,6556, +5893,5264,5894,4975,5466,5265,4699,4976,4370,4056,3492,5044,4886,6557,5795,4432, +4769,4357,5467,3940,4660,4290,6141,4484,4770,4661,3992,6329,4025,4662,5022,4632, +4835,4070,5297,4663,4596,5574,5132,5409,5895,6142,4504,5192,4664,5796,5896,3885, +5575,5797,5023,4810,5798,3732,5223,4712,5298,4084,5334,5468,6143,4052,4053,4336, +4977,4794,6558,5335,4908,5576,5224,4233,5024,4128,5469,5225,4873,6008,5045,4729, +4742,4633,3675,4597,6559,5897,5133,5577,5003,5641,5719,6330,6560,3017,2382,3854, +4406,4811,6331,4393,3964,4946,6561,2420,3722,6562,4926,4378,3247,1736,4442,6332, +5134,6333,5226,3996,2918,5470,4319,4003,4598,4743,4744,4485,3785,3902,5167,5004, +5373,4394,5898,6144,4874,1793,3997,6334,4085,4214,5106,5642,4909,5799,6009,4419, +4189,3330,5899,4165,4420,5299,5720,5227,3347,6145,4081,6335,2876,3930,6146,3293, +3786,3910,3998,5900,5300,5578,2840,6563,5901,5579,6147,3531,5374,6564,6565,5580, +4759,5375,6566,6148,3559,5643,6336,6010,5517,6337,6338,5721,5902,3873,6011,6339, +6567,5518,3868,3649,5722,6568,4771,4947,6569,6149,4812,6570,2853,5471,6340,6341, +5644,4795,6342,6012,5723,6343,5724,6013,4349,6344,3160,6150,5193,4599,4514,4493, +5168,4320,6345,4927,3666,4745,5169,5903,5005,4928,6346,5725,6014,4730,4203,5046, +4948,3395,5170,6015,4150,6016,5726,5519,6347,5047,3550,6151,6348,4197,4310,5904, +6571,5581,2965,6152,4978,3960,4291,5135,6572,5301,5727,4129,4026,5905,4853,5728, +5472,6153,6349,4533,2700,4505,5336,4678,3583,5073,2994,4486,3043,4554,5520,6350, +6017,5800,4487,6351,3931,4103,5376,6352,4011,4321,4311,4190,5136,6018,3988,3233, +4350,5906,5645,4198,6573,5107,3432,4191,3435,5582,6574,4139,5410,6353,5411,3944, +5583,5074,3198,6575,6354,4358,6576,5302,4600,5584,5194,5412,6577,6578,5585,5413, +5303,4248,5414,3879,4433,6579,4479,5025,4854,5415,6355,4760,4772,3683,2978,4700, +3797,4452,3965,3932,3721,4910,5801,6580,5195,3551,5907,3221,3471,3029,6019,3999, +5908,5909,5266,5267,3444,3023,3828,3170,4796,5646,4979,4259,6356,5647,5337,3694, +6357,5648,5338,4520,4322,5802,3031,3759,4071,6020,5586,4836,4386,5048,6581,3571, +4679,4174,4949,6154,4813,3787,3402,3822,3958,3215,3552,5268,4387,3933,4950,4359, +6021,5910,5075,3579,6358,4234,4566,5521,6359,3613,5049,6022,5911,3375,3702,3178, +4911,5339,4521,6582,6583,4395,3087,3811,5377,6023,6360,6155,4027,5171,5649,4421, +4249,2804,6584,2270,6585,4000,4235,3045,6156,5137,5729,4140,4312,3886,6361,4330, +6157,4215,6158,3500,3676,4929,4331,3713,4930,5912,4265,3776,3368,5587,4470,4855, +3038,4980,3631,6159,6160,4132,4680,6161,6362,3923,4379,5588,4255,6586,4121,6587, +6363,4649,6364,3288,4773,4774,6162,6024,6365,3543,6588,4274,3107,3737,5050,5803, +4797,4522,5589,5051,5730,3714,4887,5378,4001,4523,6163,5026,5522,4701,4175,2791, +3760,6589,5473,4224,4133,3847,4814,4815,4775,3259,5416,6590,2738,6164,6025,5304, +3733,5076,5650,4816,5590,6591,6165,6592,3934,5269,6593,3396,5340,6594,5804,3445, +3602,4042,4488,5731,5732,3525,5591,4601,5196,6166,6026,5172,3642,4612,3202,4506, +4798,6366,3818,5108,4303,5138,5139,4776,3332,4304,2915,3415,4434,5077,5109,4856, +2879,5305,4817,6595,5913,3104,3144,3903,4634,5341,3133,5110,5651,5805,6167,4057, +5592,2945,4371,5593,6596,3474,4182,6367,6597,6168,4507,4279,6598,2822,6599,4777, +4713,5594,3829,6169,3887,5417,6170,3653,5474,6368,4216,2971,5228,3790,4579,6369, +5733,6600,6601,4951,4746,4555,6602,5418,5475,6027,3400,4665,5806,6171,4799,6028, +5052,6172,3343,4800,4747,5006,6370,4556,4217,5476,4396,5229,5379,5477,3839,5914, +5652,5807,4714,3068,4635,5808,6173,5342,4192,5078,5419,5523,5734,6174,4557,6175, +4602,6371,6176,6603,5809,6372,5735,4260,3869,5111,5230,6029,5112,6177,3126,4681, +5524,5915,2706,3563,4748,3130,6178,4018,5525,6604,6605,5478,4012,4837,6606,4534, +4193,5810,4857,3615,5479,6030,4082,3697,3539,4086,5270,3662,4508,4931,5916,4912, +5811,5027,3888,6607,4397,3527,3302,3798,2775,2921,2637,3966,4122,4388,4028,4054, +1633,4858,5079,3024,5007,3982,3412,5736,6608,3426,3236,5595,3030,6179,3427,3336, +3279,3110,6373,3874,3039,5080,5917,5140,4489,3119,6374,5812,3405,4494,6031,4666, +4141,6180,4166,6032,5813,4981,6609,5081,4422,4982,4112,3915,5653,3296,3983,6375, +4266,4410,5654,6610,6181,3436,5082,6611,5380,6033,3819,5596,4535,5231,5306,5113, +6612,4952,5918,4275,3113,6613,6376,6182,6183,5814,3073,4731,4838,5008,3831,6614, +4888,3090,3848,4280,5526,5232,3014,5655,5009,5737,5420,5527,6615,5815,5343,5173, +5381,4818,6616,3151,4953,6617,5738,2796,3204,4360,2989,4281,5739,5174,5421,5197, +3132,5141,3849,5142,5528,5083,3799,3904,4839,5480,2880,4495,3448,6377,6184,5271, +5919,3771,3193,6034,6035,5920,5010,6036,5597,6037,6378,6038,3106,5422,6618,5423, +5424,4142,6619,4889,5084,4890,4313,5740,6620,3437,5175,5307,5816,4199,5198,5529, +5817,5199,5656,4913,5028,5344,3850,6185,2955,5272,5011,5818,4567,4580,5029,5921, +3616,5233,6621,6622,6186,4176,6039,6379,6380,3352,5200,5273,2908,5598,5234,3837, +5308,6623,6624,5819,4496,4323,5309,5201,6625,6626,4983,3194,3838,4167,5530,5922, +5274,6381,6382,3860,3861,5599,3333,4292,4509,6383,3553,5481,5820,5531,4778,6187, +3955,3956,4324,4389,4218,3945,4325,3397,2681,5923,4779,5085,4019,5482,4891,5382, +5383,6040,4682,3425,5275,4094,6627,5310,3015,5483,5657,4398,5924,3168,4819,6628, +5925,6629,5532,4932,4613,6041,6630,4636,6384,4780,4204,5658,4423,5821,3989,4683, +5822,6385,4954,6631,5345,6188,5425,5012,5384,3894,6386,4490,4104,6632,5741,5053, +6633,5823,5926,5659,5660,5927,6634,5235,5742,5824,4840,4933,4820,6387,4859,5928, +4955,6388,4143,3584,5825,5346,5013,6635,5661,6389,5014,5484,5743,4337,5176,5662, +6390,2836,6391,3268,6392,6636,6042,5236,6637,4158,6638,5744,5663,4471,5347,3663, +4123,5143,4293,3895,6639,6640,5311,5929,5826,3800,6189,6393,6190,5664,5348,3554, +3594,4749,4603,6641,5385,4801,6043,5827,4183,6642,5312,5426,4761,6394,5665,6191, +4715,2669,6643,6644,5533,3185,5427,5086,5930,5931,5386,6192,6044,6645,4781,4013, +5745,4282,4435,5534,4390,4267,6045,5746,4984,6046,2743,6193,3501,4087,5485,5932, +5428,4184,4095,5747,4061,5054,3058,3862,5933,5600,6646,5144,3618,6395,3131,5055, +5313,6396,4650,4956,3855,6194,3896,5202,4985,4029,4225,6195,6647,5828,5486,5829, +3589,3002,6648,6397,4782,5276,6649,6196,6650,4105,3803,4043,5237,5830,6398,4096, +3643,6399,3528,6651,4453,3315,4637,6652,3984,6197,5535,3182,3339,6653,3096,2660, +6400,6654,3449,5934,4250,4236,6047,6401,5831,6655,5487,3753,4062,5832,6198,6199, +6656,3766,6657,3403,4667,6048,6658,4338,2897,5833,3880,2797,3780,4326,6659,5748, +5015,6660,5387,4351,5601,4411,6661,3654,4424,5935,4339,4072,5277,4568,5536,6402, +6662,5238,6663,5349,5203,6200,5204,6201,5145,4536,5016,5056,4762,5834,4399,4957, +6202,6403,5666,5749,6664,4340,6665,5936,5177,5667,6666,6667,3459,4668,6404,6668, +6669,4543,6203,6670,4276,6405,4480,5537,6671,4614,5205,5668,6672,3348,2193,4763, +6406,6204,5937,5602,4177,5669,3419,6673,4020,6205,4443,4569,5388,3715,3639,6407, +6049,4058,6206,6674,5938,4544,6050,4185,4294,4841,4651,4615,5488,6207,6408,6051, +5178,3241,3509,5835,6208,4958,5836,4341,5489,5278,6209,2823,5538,5350,5206,5429, +6675,4638,4875,4073,3516,4684,4914,4860,5939,5603,5389,6052,5057,3237,5490,3791, +6676,6409,6677,4821,4915,4106,5351,5058,4243,5539,4244,5604,4842,4916,5239,3028, +3716,5837,5114,5605,5390,5940,5430,6210,4332,6678,5540,4732,3667,3840,6053,4305, +3408,5670,5541,6410,2744,5240,5750,6679,3234,5606,6680,5607,5671,3608,4283,4159, +4400,5352,4783,6681,6411,6682,4491,4802,6211,6412,5941,6413,6414,5542,5751,6683, +4669,3734,5942,6684,6415,5943,5059,3328,4670,4144,4268,6685,6686,6687,6688,4372, +3603,6689,5944,5491,4373,3440,6416,5543,4784,4822,5608,3792,4616,5838,5672,3514, +5391,6417,4892,6690,4639,6691,6054,5673,5839,6055,6692,6056,5392,6212,4038,5544, +5674,4497,6057,6693,5840,4284,5675,4021,4545,5609,6418,4454,6419,6213,4113,4472, +5314,3738,5087,5279,4074,5610,4959,4063,3179,4750,6058,6420,6214,3476,4498,4716, +5431,4960,4685,6215,5241,6694,6421,6216,6695,5841,5945,6422,3748,5946,5179,3905, +5752,5545,5947,4374,6217,4455,6423,4412,6218,4803,5353,6696,3832,5280,6219,4327, +4702,6220,6221,6059,4652,5432,6424,3749,4751,6425,5753,4986,5393,4917,5948,5030, +5754,4861,4733,6426,4703,6697,6222,4671,5949,4546,4961,5180,6223,5031,3316,5281, +6698,4862,4295,4934,5207,3644,6427,5842,5950,6428,6429,4570,5843,5282,6430,6224, +5088,3239,6060,6699,5844,5755,6061,6431,2701,5546,6432,5115,5676,4039,3993,3327, +4752,4425,5315,6433,3941,6434,5677,4617,4604,3074,4581,6225,5433,6435,6226,6062, +4823,5756,5116,6227,3717,5678,4717,5845,6436,5679,5846,6063,5847,6064,3977,3354, +6437,3863,5117,6228,5547,5394,4499,4524,6229,4605,6230,4306,4500,6700,5951,6065, +3693,5952,5089,4366,4918,6701,6231,5548,6232,6702,6438,4704,5434,6703,6704,5953, +4168,6705,5680,3420,6706,5242,4407,6066,3812,5757,5090,5954,4672,4525,3481,5681, +4618,5395,5354,5316,5955,6439,4962,6707,4526,6440,3465,4673,6067,6441,5682,6708, +5435,5492,5758,5683,4619,4571,4674,4804,4893,4686,5493,4753,6233,6068,4269,6442, +6234,5032,4705,5146,5243,5208,5848,6235,6443,4963,5033,4640,4226,6236,5849,3387, +6444,6445,4436,4437,5850,4843,5494,4785,4894,6709,4361,6710,5091,5956,3331,6237, +4987,5549,6069,6711,4342,3517,4473,5317,6070,6712,6071,4706,6446,5017,5355,6713, +6714,4988,5436,6447,4734,5759,6715,4735,4547,4456,4754,6448,5851,6449,6450,3547, +5852,5318,6451,6452,5092,4205,6716,6238,4620,4219,5611,6239,6072,4481,5760,5957, +5958,4059,6240,6453,4227,4537,6241,5761,4030,4186,5244,5209,3761,4457,4876,3337, +5495,5181,6242,5959,5319,5612,5684,5853,3493,5854,6073,4169,5613,5147,4895,6074, +5210,6717,5182,6718,3830,6243,2798,3841,6075,6244,5855,5614,3604,4606,5496,5685, +5118,5356,6719,6454,5960,5357,5961,6720,4145,3935,4621,5119,5962,4261,6721,6455, +4786,5963,4375,4582,6245,6246,6247,6076,5437,4877,5856,3376,4380,6248,4160,6722, +5148,6456,5211,6457,6723,4718,6458,6724,6249,5358,4044,3297,6459,6250,5857,5615, +5497,5245,6460,5498,6725,6251,6252,5550,3793,5499,2959,5396,6461,6462,4572,5093, +5500,5964,3806,4146,6463,4426,5762,5858,6077,6253,4755,3967,4220,5965,6254,4989, +5501,6464,4352,6726,6078,4764,2290,5246,3906,5438,5283,3767,4964,2861,5763,5094, +6255,6256,4622,5616,5859,5860,4707,6727,4285,4708,4824,5617,6257,5551,4787,5212, +4965,4935,4687,6465,6728,6466,5686,6079,3494,4413,2995,5247,5966,5618,6729,5967, +5764,5765,5687,5502,6730,6731,6080,5397,6467,4990,6258,6732,4538,5060,5619,6733, +4719,5688,5439,5018,5149,5284,5503,6734,6081,4607,6259,5120,3645,5861,4583,6260, +4584,4675,5620,4098,5440,6261,4863,2379,3306,4585,5552,5689,4586,5285,6735,4864, +6736,5286,6082,6737,4623,3010,4788,4381,4558,5621,4587,4896,3698,3161,5248,4353, +4045,6262,3754,5183,4588,6738,6263,6739,6740,5622,3936,6741,6468,6742,6264,5095, +6469,4991,5968,6743,4992,6744,6083,4897,6745,4256,5766,4307,3108,3968,4444,5287, +3889,4343,6084,4510,6085,4559,6086,4898,5969,6746,5623,5061,4919,5249,5250,5504, +5441,6265,5320,4878,3242,5862,5251,3428,6087,6747,4237,5624,5442,6266,5553,4539, +6748,2585,3533,5398,4262,6088,5150,4736,4438,6089,6267,5505,4966,6749,6268,6750, +6269,5288,5554,3650,6090,6091,4624,6092,5690,6751,5863,4270,5691,4277,5555,5864, +6752,5692,4720,4865,6470,5151,4688,4825,6753,3094,6754,6471,3235,4653,6755,5213, +5399,6756,3201,4589,5865,4967,6472,5866,6473,5019,3016,6757,5321,4756,3957,4573, +6093,4993,5767,4721,6474,6758,5625,6759,4458,6475,6270,6760,5556,4994,5214,5252, +6271,3875,5768,6094,5034,5506,4376,5769,6761,2120,6476,5253,5770,6762,5771,5970, +3990,5971,5557,5558,5772,6477,6095,2787,4641,5972,5121,6096,6097,6272,6763,3703, +5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978, +4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767) + diff --git a/fanficdownloader/chardet/gb2312prober.py b/fanficdownloader/chardet/gb2312prober.py new file mode 100644 index 00000000..91eb3925 --- /dev/null +++ b/fanficdownloader/chardet/gb2312prober.py @@ -0,0 +1,41 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import GB2312DistributionAnalysis +from mbcssm import GB2312SMModel + +class GB2312Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(GB2312SMModel) + self._mDistributionAnalyzer = GB2312DistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "GB2312" diff --git a/fanficdownloader/chardet/hebrewprober.py b/fanficdownloader/chardet/hebrewprober.py new file mode 100644 index 00000000..a2b1eaa9 --- /dev/null +++ b/fanficdownloader/chardet/hebrewprober.py @@ -0,0 +1,269 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Shy Shalom +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from charsetprober import CharSetProber +import constants + +# This prober doesn't actually recognize a language or a charset. +# It is a helper prober for the use of the Hebrew model probers + +### General ideas of the Hebrew charset recognition ### +# +# Four main charsets exist in Hebrew: +# "ISO-8859-8" - Visual Hebrew +# "windows-1255" - Logical Hebrew +# "ISO-8859-8-I" - Logical Hebrew +# "x-mac-hebrew" - ?? Logical Hebrew ?? +# +# Both "ISO" charsets use a completely identical set of code points, whereas +# "windows-1255" and "x-mac-hebrew" are two different proper supersets of +# these code points. windows-1255 defines additional characters in the range +# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific +# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. +# x-mac-hebrew defines similar additional code points but with a different +# mapping. +# +# As far as an average Hebrew text with no diacritics is concerned, all four +# charsets are identical with respect to code points. Meaning that for the +# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters +# (including final letters). +# +# The dominant difference between these charsets is their directionality. +# "Visual" directionality means that the text is ordered as if the renderer is +# not aware of a BIDI rendering algorithm. The renderer sees the text and +# draws it from left to right. The text itself when ordered naturally is read +# backwards. A buffer of Visual Hebrew generally looks like so: +# "[last word of first line spelled backwards] [whole line ordered backwards +# and spelled backwards] [first word of first line spelled backwards] +# [end of line] [last word of second line] ... etc' " +# adding punctuation marks, numbers and English text to visual text is +# naturally also "visual" and from left to right. +# +# "Logical" directionality means the text is ordered "naturally" according to +# the order it is read. It is the responsibility of the renderer to display +# the text from right to left. A BIDI algorithm is used to place general +# punctuation marks, numbers and English text in the text. +# +# Texts in x-mac-hebrew are almost impossible to find on the Internet. From +# what little evidence I could find, it seems that its general directionality +# is Logical. +# +# To sum up all of the above, the Hebrew probing mechanism knows about two +# charsets: +# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are +# backwards while line order is natural. For charset recognition purposes +# the line order is unimportant (In fact, for this implementation, even +# word order is unimportant). +# Logical Hebrew - "windows-1255" - normal, naturally ordered text. +# +# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be +# specifically identified. +# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew +# that contain special punctuation marks or diacritics is displayed with +# some unconverted characters showing as question marks. This problem might +# be corrected using another model prober for x-mac-hebrew. Due to the fact +# that x-mac-hebrew texts are so rare, writing another model prober isn't +# worth the effort and performance hit. +# +#### The Prober #### +# +# The prober is divided between two SBCharSetProbers and a HebrewProber, +# all of which are managed, created, fed data, inquired and deleted by the +# SBCSGroupProber. The two SBCharSetProbers identify that the text is in +# fact some kind of Hebrew, Logical or Visual. The final decision about which +# one is it is made by the HebrewProber by combining final-letter scores +# with the scores of the two SBCharSetProbers to produce a final answer. +# +# The SBCSGroupProber is responsible for stripping the original text of HTML +# tags, English characters, numbers, low-ASCII punctuation characters, spaces +# and new lines. It reduces any sequence of such characters to a single space. +# The buffer fed to each prober in the SBCS group prober is pure text in +# high-ASCII. +# The two SBCharSetProbers (model probers) share the same language model: +# Win1255Model. +# The first SBCharSetProber uses the model normally as any other +# SBCharSetProber does, to recognize windows-1255, upon which this model was +# built. The second SBCharSetProber is told to make the pair-of-letter +# lookup in the language model backwards. This in practice exactly simulates +# a visual Hebrew model using the windows-1255 logical Hebrew model. +# +# The HebrewProber is not using any language model. All it does is look for +# final-letter evidence suggesting the text is either logical Hebrew or visual +# Hebrew. Disjointed from the model probers, the results of the HebrewProber +# alone are meaningless. HebrewProber always returns 0.00 as confidence +# since it never identifies a charset by itself. Instead, the pointer to the +# HebrewProber is passed to the model probers as a helper "Name Prober". +# When the Group prober receives a positive identification from any prober, +# it asks for the name of the charset identified. If the prober queried is a +# Hebrew model prober, the model prober forwards the call to the +# HebrewProber to make the final decision. In the HebrewProber, the +# decision is made according to the final-letters scores maintained and Both +# model probers scores. The answer is returned in the form of the name of the +# charset identified, either "windows-1255" or "ISO-8859-8". + +# windows-1255 / ISO-8859-8 code points of interest +FINAL_KAF = '\xea' +NORMAL_KAF = '\xeb' +FINAL_MEM = '\xed' +NORMAL_MEM = '\xee' +FINAL_NUN = '\xef' +NORMAL_NUN = '\xf0' +FINAL_PE = '\xf3' +NORMAL_PE = '\xf4' +FINAL_TSADI = '\xf5' +NORMAL_TSADI = '\xf6' + +# Minimum Visual vs Logical final letter score difference. +# If the difference is below this, don't rely solely on the final letter score distance. +MIN_FINAL_CHAR_DISTANCE = 5 + +# Minimum Visual vs Logical model score difference. +# If the difference is below this, don't rely at all on the model score distance. +MIN_MODEL_DISTANCE = 0.01 + +VISUAL_HEBREW_NAME = "ISO-8859-8" +LOGICAL_HEBREW_NAME = "windows-1255" + +class HebrewProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mLogicalProber = None + self._mVisualProber = None + self.reset() + + def reset(self): + self._mFinalCharLogicalScore = 0 + self._mFinalCharVisualScore = 0 + # The two last characters seen in the previous buffer, + # mPrev and mBeforePrev are initialized to space in order to simulate a word + # delimiter at the beginning of the data + self._mPrev = ' ' + self._mBeforePrev = ' ' + # These probers are owned by the group prober. + + def set_model_probers(self, logicalProber, visualProber): + self._mLogicalProber = logicalProber + self._mVisualProber = visualProber + + def is_final(self, c): + return c in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, FINAL_TSADI] + + def is_non_final(self, c): + # The normal Tsadi is not a good Non-Final letter due to words like + # 'lechotet' (to chat) containing an apostrophe after the tsadi. This + # apostrophe is converted to a space in FilterWithoutEnglishLetters causing + # the Non-Final tsadi to appear at an end of a word even though this is not + # the case in the original text. + # The letters Pe and Kaf rarely display a related behavior of not being a + # good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for + # example legally end with a Non-Final Pe or Kaf. However, the benefit of + # these letters as Non-Final letters outweighs the damage since these words + # are quite rare. + return c in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE] + + def feed(self, aBuf): + # Final letter analysis for logical-visual decision. + # Look for evidence that the received buffer is either logical Hebrew or + # visual Hebrew. + # The following cases are checked: + # 1) A word longer than 1 letter, ending with a final letter. This is an + # indication that the text is laid out "naturally" since the final letter + # really appears at the end. +1 for logical score. + # 2) A word longer than 1 letter, ending with a Non-Final letter. In normal + # Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with + # the Non-Final form of that letter. Exceptions to this rule are mentioned + # above in isNonFinal(). This is an indication that the text is laid out + # backwards. +1 for visual score + # 3) A word longer than 1 letter, starting with a final letter. Final letters + # should not appear at the beginning of a word. This is an indication that + # the text is laid out backwards. +1 for visual score. + # + # The visual score and logical score are accumulated throughout the text and + # are finally checked against each other in GetCharSetName(). + # No checking for final letters in the middle of words is done since that case + # is not an indication for either Logical or Visual text. + # + # We automatically filter out all 7-bit characters (replace them with spaces) + # so the word boundary detection works properly. [MAP] + + if self.get_state() == constants.eNotMe: + # Both model probers say it's not them. No reason to continue. + return constants.eNotMe + + aBuf = self.filter_high_bit_only(aBuf) + + for cur in aBuf: + if cur == ' ': + # We stand on a space - a word just ended + if self._mBeforePrev != ' ': + # next-to-last char was not a space so self._mPrev is not a 1 letter word + if self.is_final(self._mPrev): + # case (1) [-2:not space][-1:final letter][cur:space] + self._mFinalCharLogicalScore += 1 + elif self.is_non_final(self._mPrev): + # case (2) [-2:not space][-1:Non-Final letter][cur:space] + self._mFinalCharVisualScore += 1 + else: + # Not standing on a space + if (self._mBeforePrev == ' ') and (self.is_final(self._mPrev)) and (cur != ' '): + # case (3) [-2:space][-1:final letter][cur:not space] + self._mFinalCharVisualScore += 1 + self._mBeforePrev = self._mPrev + self._mPrev = cur + + # Forever detecting, till the end or until both model probers return eNotMe (handled above) + return constants.eDetecting + + def get_charset_name(self): + # Make the decision: is it Logical or Visual? + # If the final letter score distance is dominant enough, rely on it. + finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore + if finalsub >= MIN_FINAL_CHAR_DISTANCE: + return LOGICAL_HEBREW_NAME + if finalsub <= -MIN_FINAL_CHAR_DISTANCE: + return VISUAL_HEBREW_NAME + + # It's not dominant enough, try to rely on the model scores instead. + modelsub = self._mLogicalProber.get_confidence() - self._mVisualProber.get_confidence() + if modelsub > MIN_MODEL_DISTANCE: + return LOGICAL_HEBREW_NAME + if modelsub < -MIN_MODEL_DISTANCE: + return VISUAL_HEBREW_NAME + + # Still no good, back to final letter distance, maybe it'll save the day. + if finalsub < 0.0: + return VISUAL_HEBREW_NAME + + # (finalsub > 0 - Logical) or (don't know what to do) default to Logical. + return LOGICAL_HEBREW_NAME + + def get_state(self): + # Remain active as long as any of the model probers are active. + if (self._mLogicalProber.get_state() == constants.eNotMe) and \ + (self._mVisualProber.get_state() == constants.eNotMe): + return constants.eNotMe + return constants.eDetecting diff --git a/fanficdownloader/chardet/jisfreq.py b/fanficdownloader/chardet/jisfreq.py new file mode 100644 index 00000000..5fe4a5c3 --- /dev/null +++ b/fanficdownloader/chardet/jisfreq.py @@ -0,0 +1,567 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# Sampling from about 20M text materials include literature and computer technology +# +# Japanese frequency table, applied to both S-JIS and EUC-JP +# They are sorted in order. + +# 128 --> 0.77094 +# 256 --> 0.85710 +# 512 --> 0.92635 +# 1024 --> 0.97130 +# 2048 --> 0.99431 +# +# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58 +# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191 +# +# Typical Distribution Ratio, 25% of IDR + +JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0 + +# Char to FreqOrder table , +JIS_TABLE_SIZE = 4368 + +JISCharToFreqOrder = ( \ + 40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16 +3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32 +1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48 +2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64 +2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80 +5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96 +1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112 +5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128 +5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144 +5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160 +5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176 +5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192 +5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208 +1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224 +1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240 +1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256 +2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272 +3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288 +3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304 + 4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320 + 12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336 +1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352 + 109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368 +5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384 + 271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400 + 32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416 + 43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432 + 280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448 + 54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464 +5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480 +5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496 +5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512 +4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528 +5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544 +5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560 +5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576 +5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592 +5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608 +5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624 +5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640 +5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656 +5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672 +3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688 +5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704 +5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720 +5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736 +5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752 +5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768 +5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784 +5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800 +5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816 +5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832 +5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848 +5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864 +5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880 +5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896 +5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912 +5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928 +5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944 +5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960 +5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976 +5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992 +5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008 +5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024 +5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040 +5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056 +5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072 +5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088 +5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104 +5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120 +5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136 +5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152 +5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168 +5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184 +5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200 +5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216 +5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232 +5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248 +5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264 +5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280 +5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296 +6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312 +6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328 +6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344 +6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360 +6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376 +6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392 +6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408 +6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424 +4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440 + 854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456 + 665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472 +1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488 +1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504 + 896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520 +3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536 +3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552 + 804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568 +3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584 +3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600 + 586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616 +2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632 + 277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648 +3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664 +1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680 + 380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696 +1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712 + 850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728 +2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744 +2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760 +2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776 +2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792 +1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808 +1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824 +1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840 +1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856 +2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872 +1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888 +2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904 +1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920 +1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936 +1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952 +1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968 +1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984 +1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000 + 606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016 + 684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032 +1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048 +2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064 +2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080 +2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096 +3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112 +3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128 + 884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144 +3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160 +1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176 + 861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192 +2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208 +1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224 + 576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240 +3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256 +4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272 +2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288 +1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304 +2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320 +1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336 + 385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352 + 178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368 +1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384 +2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400 +2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416 +2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432 +3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448 +1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464 +2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480 + 359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496 + 837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512 + 855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528 +1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544 +2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560 + 633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576 +1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592 +1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608 + 353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624 +1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640 +1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656 +1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672 + 764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688 +2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704 + 278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720 +2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736 +3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752 +2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768 +1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784 +6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800 +1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816 +2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832 +1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848 + 470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864 + 72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880 +3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896 +3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912 +1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928 +1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944 +1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960 +1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976 + 123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992 + 913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008 +2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024 + 900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040 +3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056 +2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072 + 423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088 +1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104 +2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120 + 220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136 +1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152 + 745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168 +4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184 +2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200 +1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216 + 666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232 +1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248 +2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264 + 376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280 +6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296 +1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312 +1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328 +2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344 +3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360 + 914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376 +3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392 +1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408 + 674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424 +1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440 + 199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456 +3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472 + 370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488 +2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504 + 414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520 +4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536 +2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552 +1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568 +1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584 +1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600 + 166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616 +1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632 +3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648 +1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664 +3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680 + 264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696 + 543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712 + 983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728 +2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744 +1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760 + 867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776 +1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792 + 894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808 +1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824 + 530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840 + 839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856 + 480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872 +1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888 +1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904 +2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920 +4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936 + 227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952 +1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968 + 328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984 +1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000 +3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016 +1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032 +2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048 +2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064 +1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080 +1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096 +2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112 + 455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128 +2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144 +1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160 +1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176 +1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192 +1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208 +3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224 +2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240 +2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256 + 575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272 +3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288 +3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304 +1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320 +2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336 +1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352 +2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512 +#Everything below is of no interest for detection purpose +2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, # 4384 +6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, # 4400 +6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, # 4416 +6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, # 4432 +6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, # 4448 +4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, # 4464 +4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, # 4480 +3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, # 4496 +3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, # 4512 +4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, # 4528 +3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, # 4544 +6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, # 4560 +4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, # 4576 +6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, # 4592 +6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, # 4608 +6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, # 4624 +6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, # 4640 +6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, # 4656 +6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, # 4672 +3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, # 4688 +3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, # 4704 +6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, # 4720 +2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, # 4736 +4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, # 4752 +4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, # 4768 +4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, # 4784 +6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, # 4800 +3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, # 4816 +4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, # 4832 +4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, # 4848 +6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, # 4864 +4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, # 4880 +6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, # 4896 +3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, # 4912 +2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, # 4928 +4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, # 4944 +2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, # 4960 +6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, # 4976 +4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, # 4992 +6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, # 5008 +6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, # 5024 +6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, # 5040 +4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, # 5056 +6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, # 5072 +2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, # 5088 +6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, # 5104 +4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, # 5120 +6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, # 5136 +4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, # 5152 +4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, # 5168 +6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, # 5184 +6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, # 5200 +6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, # 5216 +3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, # 5232 +1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, # 5248 +3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, # 5264 +3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, # 5280 +4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, # 5296 +6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, # 5312 +3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, # 5328 +6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, # 5344 +3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, # 5360 +3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, # 5376 +2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, # 5392 +6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, # 5408 +6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, # 5424 +3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, # 5440 +6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, # 5456 +3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, # 5472 +6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, # 5488 +6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, # 5504 +6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, # 5520 +4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, # 5536 +6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, # 5552 +4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, # 5568 +3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, # 5584 +3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, # 5600 +6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, # 5616 +6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, # 5632 +4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, # 5648 +6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, # 5664 +6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, # 5680 +6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, # 5696 +6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, # 5712 +6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, # 5728 +6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, # 5744 +4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, # 5760 +4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, # 5776 +3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, # 5792 +6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, # 5808 +4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, # 5824 +2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, # 5840 +6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, # 5856 +6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, # 5872 +4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, # 5888 +2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, # 5904 +4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, # 5920 +2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, # 5936 +4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, # 5952 +4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, # 5968 +4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, # 5984 +6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, # 6000 +3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, # 6016 +6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, # 6032 +3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, # 6048 +6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, # 6064 +2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, # 6080 +3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, # 6096 +7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, # 6112 +2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, # 6128 +3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, # 6144 +3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, # 6160 +3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, # 6176 +3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, # 6192 +7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, # 6208 +7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, # 6224 +7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, # 6240 +7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, # 6256 +7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, # 6272 +4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, # 6288 +3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, # 6304 +3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, # 6320 +4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, # 6336 +3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, # 6352 +3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, # 6368 +7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, # 6384 +4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, # 6400 +7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, # 6416 +7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, # 6432 +7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, # 6448 +7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, # 6464 +7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, # 6480 +4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, # 6496 +4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, # 6512 +7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, # 6528 +3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, # 6544 +4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, # 6560 +7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, # 6576 +7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, # 6592 +4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, # 6608 +3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, # 6624 +3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, # 6640 +7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, # 6656 +4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, # 6672 +4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, # 6688 +4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, # 6704 +4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, # 6720 +4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, # 6736 +4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, # 6752 +7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, # 6768 +7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, # 6784 +7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, # 6800 +7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, # 6816 +7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, # 6832 +2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, # 6848 +3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, # 6864 +7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, # 6880 +7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, # 6896 +3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, # 6912 +4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, # 6928 +3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, # 6944 +3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, # 6960 +2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, # 6976 +7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, # 6992 +7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, # 7008 +4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, # 7024 +3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, # 7040 +3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, # 7056 +7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, # 7072 +7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, # 7088 +7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, # 7104 +4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, # 7120 +7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, # 7136 +2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, # 7152 +3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, # 7168 +4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, # 7184 +7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, # 7200 +4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, # 7216 +4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, # 7232 +7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, # 7248 +7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, # 7264 +5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, # 7280 +7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, # 7296 +7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, # 7312 +7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, # 7328 +7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, # 7344 +7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, # 7360 +5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, # 7376 +5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, # 7392 +7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, # 7408 +3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, # 7424 +7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, # 7440 +7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, # 7456 +3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, # 7472 +7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, # 7488 +7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, # 7504 +1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, # 7520 +3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, # 7536 +4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, # 7552 +2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, # 7568 +3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, # 7584 +2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, # 7600 +5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, # 7616 +4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, # 7632 +4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, # 7648 +5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, # 7664 +7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, # 7680 +7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, # 7696 +7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, # 7712 +7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, # 7728 +3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, # 7744 +7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, # 7760 +3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, # 7776 +7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, # 7792 +4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, # 7808 +7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, # 7824 +7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7840 +7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, # 7856 +7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, # 7872 +7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, # 7888 +7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, # 7904 +7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, # 7920 +7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, # 7936 +7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, # 7952 +7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, # 7968 +7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, # 7984 +7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, # 8000 +8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, # 8016 +8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, # 8032 +8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, # 8048 +8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, # 8064 +8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, # 8080 +8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, # 8096 +8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, # 8112 +8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, # 8128 +8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, # 8144 +8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, # 8160 +8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, # 8176 +8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, # 8192 +8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, # 8208 +8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, # 8224 +8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240 +8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256 +8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272 diff --git a/fanficdownloader/chardet/jpcntx.py b/fanficdownloader/chardet/jpcntx.py new file mode 100644 index 00000000..93db4a9c --- /dev/null +++ b/fanficdownloader/chardet/jpcntx.py @@ -0,0 +1,210 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +NUM_OF_CATEGORY = 6 +DONT_KNOW = -1 +ENOUGH_REL_THRESHOLD = 100 +MAX_REL_THRESHOLD = 1000 +MINIMUM_DATA_THRESHOLD = 4 + +# This is hiragana 2-char sequence table, the number in each cell represents its frequency category +jp2CharContext = ( \ +(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), +(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), +(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), +(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), +(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), +(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), +(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), +(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), +(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), +(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), +(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), +(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), +(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), +(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), +(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), +(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), +(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), +(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), +(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), +(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), +(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), +(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), +(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), +(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), +(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), +(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), +(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), +(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), +(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), +(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), +(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), +(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), +(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), +(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), +(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), +(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), +(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), +(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), +(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), +(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), +(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), +(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), +(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), +(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), +(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), +(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), +(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), +(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), +(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), +(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), +(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), +(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), +(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), +(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), +(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), +(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), +(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), +(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), +(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), +(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), +(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), +(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), +(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), +(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), +(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), +(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), +(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), +(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), +(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), +(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), +(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), +(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), +(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), +(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), +) + +class JapaneseContextAnalysis: + def __init__(self): + self.reset() + + def reset(self): + self._mTotalRel = 0 # total sequence received + self._mRelSample = [0] * NUM_OF_CATEGORY # category counters, each interger counts sequence in its category + self._mNeedToSkipCharNum = 0 # if last byte in current buffer is not the last byte of a character, we need to know how many bytes to skip in next buffer + self._mLastCharOrder = -1 # The order of previous char + self._mDone = constants.False # If this flag is set to constants.True, detection is done and conclusion has been made + + def feed(self, aBuf, aLen): + if self._mDone: return + + # The buffer we got is byte oriented, and a character may span in more than one + # buffers. In case the last one or two byte in last buffer is not complete, we + # record how many byte needed to complete that character and skip these bytes here. + # We can choose to record those bytes as well and analyse the character once it + # is complete, but since a character will not make much difference, by simply skipping + # this character will simply our logic and improve performance. + i = self._mNeedToSkipCharNum + while i < aLen: + order, charLen = self.get_order(aBuf[i:i+2]) + i += charLen + if i > aLen: + self._mNeedToSkipCharNum = i - aLen + self._mLastCharOrder = -1 + else: + if (order != -1) and (self._mLastCharOrder != -1): + self._mTotalRel += 1 + if self._mTotalRel > MAX_REL_THRESHOLD: + self._mDone = constants.True + break + self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1 + self._mLastCharOrder = order + + def got_enough_data(self): + return self._mTotalRel > ENOUGH_REL_THRESHOLD + + def get_confidence(self): + # This is just one way to calculate confidence. It works well for me. + if self._mTotalRel > MINIMUM_DATA_THRESHOLD: + return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel + else: + return DONT_KNOW + + def get_order(self, aStr): + return -1, 1 + +class SJISContextAnalysis(JapaneseContextAnalysis): + def get_order(self, aStr): + if not aStr: return -1, 1 + # find out current char's byte length + if ((aStr[0] >= '\x81') and (aStr[0] <= '\x9F')) or \ + ((aStr[0] >= '\xE0') and (aStr[0] <= '\xFC')): + charLen = 2 + else: + charLen = 1 + + # return its order if it is hiragana + if len(aStr) > 1: + if (aStr[0] == '\202') and \ + (aStr[1] >= '\x9F') and \ + (aStr[1] <= '\xF1'): + return ord(aStr[1]) - 0x9F, charLen + + return -1, charLen + +class EUCJPContextAnalysis(JapaneseContextAnalysis): + def get_order(self, aStr): + if not aStr: return -1, 1 + # find out current char's byte length + if (aStr[0] == '\x8E') or \ + ((aStr[0] >= '\xA1') and (aStr[0] <= '\xFE')): + charLen = 2 + elif aStr[0] == '\x8F': + charLen = 3 + else: + charLen = 1 + + # return its order if it is hiragana + if len(aStr) > 1: + if (aStr[0] == '\xA4') and \ + (aStr[1] >= '\xA1') and \ + (aStr[1] <= '\xF3'): + return ord(aStr[1]) - 0xA1, charLen + + return -1, charLen diff --git a/fanficdownloader/chardet/langbulgarianmodel.py b/fanficdownloader/chardet/langbulgarianmodel.py new file mode 100644 index 00000000..bf5641e7 --- /dev/null +++ b/fanficdownloader/chardet/langbulgarianmodel.py @@ -0,0 +1,228 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +# this table is modified base on win1251BulgarianCharToOrderMap, so +# only number <64 is sure valid + +Latin5_BulgarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80 +210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90 + 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0 + 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0 +) + +win1251BulgarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80 +221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90 + 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0 + 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 96.9392% +# first 1024 sequences:3.0618% +# rest sequences: 0.2992% +# negative sequences: 0.0020% +BulgarianLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, +3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1, +0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0, +0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0, +0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0, +0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0, +0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3, +2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1, +3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2, +1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0, +3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1, +1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0, +2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2, +2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0, +3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2, +1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0, +2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2, +2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2, +1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0, +2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2, +2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0, +2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2, +1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0, +2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2, +1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0, +3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2, +1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0, +3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1, +1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0, +2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1, +1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0, +2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2, +1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0, +2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1, +1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2, +1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1, +0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2, +1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1, +1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1, +0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1, +1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +) + +Latin5BulgarianModel = { \ + 'charToOrderMap': Latin5_BulgarianCharToOrderMap, + 'precedenceMatrix': BulgarianLangModel, + 'mTypicalPositiveRatio': 0.969392, + 'keepEnglishLetter': constants.False, + 'charsetName': "ISO-8859-5" +} + +Win1251BulgarianModel = { \ + 'charToOrderMap': win1251BulgarianCharToOrderMap, + 'precedenceMatrix': BulgarianLangModel, + 'mTypicalPositiveRatio': 0.969392, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1251" +} diff --git a/fanficdownloader/chardet/langcyrillicmodel.py b/fanficdownloader/chardet/langcyrillicmodel.py new file mode 100644 index 00000000..e604cc73 --- /dev/null +++ b/fanficdownloader/chardet/langcyrillicmodel.py @@ -0,0 +1,329 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# KOI8-R language model +# Character Mapping Table: +KOI8R_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80 +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90 +223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0 +238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0 + 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0 + 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0 + 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0 + 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 +) + +win1251_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +) + +latin5_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +macCyrillic_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, +) + +IBM855_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, +206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, + 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, +220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229, +230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, + 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, + 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, +250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, +) + +IBM866_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 97.6601% +# first 1024 sequences: 2.3389% +# rest sequences: 0.1237% +# negative sequences: 0.0009% +RussianLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, +1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, +1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, +2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, +1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, +3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, +1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, +2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, +1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, +1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, +1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, +3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, +1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, +2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, +1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, +2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, +1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, +1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, +1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, +3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, +3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, +1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, +1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, +0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, +1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, +1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, +1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, +2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, +1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, +1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, +2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, +1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, +1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, +0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, +0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, +0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, +2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +) + +Koi8rModel = { \ + 'charToOrderMap': KOI8R_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "KOI8-R" +} + +Win1251CyrillicModel = { \ + 'charToOrderMap': win1251_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1251" +} + +Latin5CyrillicModel = { \ + 'charToOrderMap': latin5_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "ISO-8859-5" +} + +MacCyrillicModel = { \ + 'charToOrderMap': macCyrillic_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "MacCyrillic" +}; + +Ibm866Model = { \ + 'charToOrderMap': IBM866_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "IBM866" +} + +Ibm855Model = { \ + 'charToOrderMap': IBM855_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': constants.False, + 'charsetName': "IBM855" +} diff --git a/fanficdownloader/chardet/langgreekmodel.py b/fanficdownloader/chardet/langgreekmodel.py new file mode 100644 index 00000000..ec6d49e8 --- /dev/null +++ b/fanficdownloader/chardet/langgreekmodel.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin7_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +win1253_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.2851% +# first 1024 sequences:1.7001% +# rest sequences: 0.0359% +# negative sequences: 0.0148% +GreekLangModel = ( \ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, +2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, +2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, +2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, +0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, +3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, +2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, +0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, +0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, +0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, +0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, +0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, +0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, +0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, +0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, +0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, +0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, +0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, +0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, +0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, +0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, +0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, +0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, +0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin7GreekModel = { \ + 'charToOrderMap': Latin7_CharToOrderMap, + 'precedenceMatrix': GreekLangModel, + 'mTypicalPositiveRatio': 0.982851, + 'keepEnglishLetter': constants.False, + 'charsetName': "ISO-8859-7" +} + +Win1253GreekModel = { \ + 'charToOrderMap': win1253_CharToOrderMap, + 'precedenceMatrix': GreekLangModel, + 'mTypicalPositiveRatio': 0.982851, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1253" +} diff --git a/fanficdownloader/chardet/langhebrewmodel.py b/fanficdownloader/chardet/langhebrewmodel.py new file mode 100644 index 00000000..a8bcc65b --- /dev/null +++ b/fanficdownloader/chardet/langhebrewmodel.py @@ -0,0 +1,201 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Simon Montagu +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Shoshannah Forbes - original C code (?) +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Windows-1255 language model +# Character Mapping Table: +win1255_CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40 + 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50 +253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60 + 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70 +124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, +215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, + 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, +106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, + 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, +238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, + 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, + 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.4004% +# first 1024 sequences: 1.5981% +# rest sequences: 0.087% +# negative sequences: 0.0015% +HebrewLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, +3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, +1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, +1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, +1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, +1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, +0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, +0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, +0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, +0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, +0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, +0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, +0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, +0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, +0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, +1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, +0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, +0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, +0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, +0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, +1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, +2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, +0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, +) + +Win1255HebrewModel = { \ + 'charToOrderMap': win1255_CharToOrderMap, + 'precedenceMatrix': HebrewLangModel, + 'mTypicalPositiveRatio': 0.984004, + 'keepEnglishLetter': constants.False, + 'charsetName': "windows-1255" +} diff --git a/fanficdownloader/chardet/langhungarianmodel.py b/fanficdownloader/chardet/langhungarianmodel.py new file mode 100644 index 00000000..d635f03c --- /dev/null +++ b/fanficdownloader/chardet/langhungarianmodel.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin2_HungarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, +175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, + 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, + 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, +245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +win1250HungarianCharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, +177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, + 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, + 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, +245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 94.7368% +# first 1024 sequences:5.2623% +# rest sequences: 0.8894% +# negative sequences: 0.0009% +HungarianLangModel = ( \ +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, +3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, +0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, +1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, +1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, +3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, +2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, +2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, +2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, +2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, +1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, +1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, +3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, +1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, +1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, +2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, +2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, +2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, +3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, +1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, +2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, +1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, +2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, +2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, +1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, +0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, +2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, +2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, +2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, +2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, +2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, +0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +) + +Latin2HungarianModel = { \ + 'charToOrderMap': Latin2_HungarianCharToOrderMap, + 'precedenceMatrix': HungarianLangModel, + 'mTypicalPositiveRatio': 0.947368, + 'keepEnglishLetter': constants.True, + 'charsetName': "ISO-8859-2" +} + +Win1250HungarianModel = { \ + 'charToOrderMap': win1250HungarianCharToOrderMap, + 'precedenceMatrix': HungarianLangModel, + 'mTypicalPositiveRatio': 0.947368, + 'keepEnglishLetter': constants.True, + 'charsetName': "windows-1250" +} diff --git a/fanficdownloader/chardet/langthaimodel.py b/fanficdownloader/chardet/langthaimodel.py new file mode 100644 index 00000000..96ec054f --- /dev/null +++ b/fanficdownloader/chardet/langthaimodel.py @@ -0,0 +1,200 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# The following result for thai was collected from a limited sample (1M). + +# Character Mapping Table: +TIS620CharToOrderMap = ( \ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40 +188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50 +253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60 + 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70 +209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, +223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, +236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, + 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, + 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, + 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, + 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, + 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 92.6386% +# first 1024 sequences:7.3177% +# rest sequences: 1.0230% +# negative sequences: 0.0436% +ThaiLangModel = ( \ +0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, +0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, +3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, +0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, +3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, +3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, +3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, +3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, +2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, +3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, +1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, +3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, +1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, +0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, +0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, +2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, +0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, +3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, +2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, +3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, +2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, +3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, +3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, +3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, +3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, +1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, +0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, +0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, +3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, +3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, +1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, +3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, +3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, +0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, +0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, +1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, +1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, +3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, +0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, +3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, +0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, +0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, +0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, +0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, +0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, +0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, +0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, +3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, +2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, +3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, +1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +TIS620ThaiModel = { \ + 'charToOrderMap': TIS620CharToOrderMap, + 'precedenceMatrix': ThaiLangModel, + 'mTypicalPositiveRatio': 0.926386, + 'keepEnglishLetter': constants.False, + 'charsetName': "TIS-620" +} diff --git a/fanficdownloader/chardet/latin1prober.py b/fanficdownloader/chardet/latin1prober.py new file mode 100644 index 00000000..b46129ba --- /dev/null +++ b/fanficdownloader/chardet/latin1prober.py @@ -0,0 +1,136 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from charsetprober import CharSetProber +import constants +import operator + +FREQ_CAT_NUM = 4 + +UDF = 0 # undefined +OTH = 1 # other +ASC = 2 # ascii capital letter +ASS = 3 # ascii small letter +ACV = 4 # accent capital vowel +ACO = 5 # accent capital other +ASV = 6 # accent small vowel +ASO = 7 # accent small other +CLASS_NUM = 8 # total classes + +Latin1_CharToClass = ( \ + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F + OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 + ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F + OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 + ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F + OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 + OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F + UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 + OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF + ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 + ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF + ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 + ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF + ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 + ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF + ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 + ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF +) + +# 0 : illegal +# 1 : very unlikely +# 2 : normal +# 3 : very likely +Latin1ClassModel = ( \ +# UDF OTH ASC ASS ACV ACO ASV ASO + 0, 0, 0, 0, 0, 0, 0, 0, # UDF + 0, 3, 3, 3, 3, 3, 3, 3, # OTH + 0, 3, 3, 3, 3, 3, 3, 3, # ASC + 0, 3, 3, 3, 1, 1, 3, 3, # ASS + 0, 3, 3, 3, 1, 2, 1, 2, # ACV + 0, 3, 3, 3, 3, 3, 3, 3, # ACO + 0, 3, 1, 3, 1, 1, 1, 3, # ASV + 0, 3, 1, 3, 1, 1, 3, 3, # ASO +) + +class Latin1Prober(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self.reset() + + def reset(self): + self._mLastCharClass = OTH + self._mFreqCounter = [0] * FREQ_CAT_NUM + CharSetProber.reset(self) + + def get_charset_name(self): + return "windows-1252" + + def feed(self, aBuf): + aBuf = self.filter_with_english_letters(aBuf) + for c in aBuf: + charClass = Latin1_CharToClass[ord(c)] + freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + charClass] + if freq == 0: + self._mState = constants.eNotMe + break + self._mFreqCounter[freq] += 1 + self._mLastCharClass = charClass + + return self.get_state() + + def get_confidence(self): + if self.get_state() == constants.eNotMe: + return 0.01 + + total = reduce(operator.add, self._mFreqCounter) + if total < 0.01: + confidence = 0.0 + else: + confidence = (self._mFreqCounter[3] / total) - (self._mFreqCounter[1] * 20.0 / total) + if confidence < 0.0: + confidence = 0.0 + # lower the confidence of latin1 so that other more accurate detector + # can take priority. + confidence = confidence * 0.5 + return confidence diff --git a/fanficdownloader/chardet/mbcharsetprober.py b/fanficdownloader/chardet/mbcharsetprober.py new file mode 100644 index 00000000..a8131445 --- /dev/null +++ b/fanficdownloader/chardet/mbcharsetprober.py @@ -0,0 +1,82 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from constants import eStart, eError, eItsMe +from charsetprober import CharSetProber + +class MultiByteCharSetProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mDistributionAnalyzer = None + self._mCodingSM = None + self._mLastChar = ['\x00', '\x00'] + + def reset(self): + CharSetProber.reset(self) + if self._mCodingSM: + self._mCodingSM.reset() + if self._mDistributionAnalyzer: + self._mDistributionAnalyzer.reset() + self._mLastChar = ['\x00', '\x00'] + + def get_charset_name(self): + pass + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n') + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mDistributionAnalyzer.feed(aBuf[i-1:i+1], charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if self._mDistributionAnalyzer.got_enough_data() and \ + (self.get_confidence() > constants.SHORTCUT_THRESHOLD): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + return self._mDistributionAnalyzer.get_confidence() diff --git a/fanficdownloader/chardet/mbcsgroupprober.py b/fanficdownloader/chardet/mbcsgroupprober.py new file mode 100644 index 00000000..941cc3e3 --- /dev/null +++ b/fanficdownloader/chardet/mbcsgroupprober.py @@ -0,0 +1,50 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from charsetgroupprober import CharSetGroupProber +from utf8prober import UTF8Prober +from sjisprober import SJISProber +from eucjpprober import EUCJPProber +from gb2312prober import GB2312Prober +from euckrprober import EUCKRProber +from big5prober import Big5Prober +from euctwprober import EUCTWProber + +class MBCSGroupProber(CharSetGroupProber): + def __init__(self): + CharSetGroupProber.__init__(self) + self._mProbers = [ \ + UTF8Prober(), + SJISProber(), + EUCJPProber(), + GB2312Prober(), + EUCKRProber(), + Big5Prober(), + EUCTWProber()] + self.reset() diff --git a/fanficdownloader/chardet/mbcssm.py b/fanficdownloader/chardet/mbcssm.py new file mode 100644 index 00000000..e46c1ffe --- /dev/null +++ b/fanficdownloader/chardet/mbcssm.py @@ -0,0 +1,514 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from constants import eStart, eError, eItsMe + +# BIG5 + +BIG5_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 4,4,4,4,4,4,4,4, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 4,3,3,3,3,3,3,3, # a0 - a7 + 3,3,3,3,3,3,3,3, # a8 - af + 3,3,3,3,3,3,3,3, # b0 - b7 + 3,3,3,3,3,3,3,3, # b8 - bf + 3,3,3,3,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0) # f8 - ff + +BIG5_st = ( \ + eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 + eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f + eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart)#10-17 + +Big5CharLenTable = (0, 1, 1, 2, 0) + +Big5SMModel = {'classTable': BIG5_cls, + 'classFactor': 5, + 'stateTable': BIG5_st, + 'charLenTable': Big5CharLenTable, + 'name': 'Big5'} + +# EUC-JP + +EUCJP_cls = ( \ + 4,4,4,4,4,4,4,4, # 00 - 07 + 4,4,4,4,4,4,5,5, # 08 - 0f + 4,4,4,4,4,4,4,4, # 10 - 17 + 4,4,4,5,4,4,4,4, # 18 - 1f + 4,4,4,4,4,4,4,4, # 20 - 27 + 4,4,4,4,4,4,4,4, # 28 - 2f + 4,4,4,4,4,4,4,4, # 30 - 37 + 4,4,4,4,4,4,4,4, # 38 - 3f + 4,4,4,4,4,4,4,4, # 40 - 47 + 4,4,4,4,4,4,4,4, # 48 - 4f + 4,4,4,4,4,4,4,4, # 50 - 57 + 4,4,4,4,4,4,4,4, # 58 - 5f + 4,4,4,4,4,4,4,4, # 60 - 67 + 4,4,4,4,4,4,4,4, # 68 - 6f + 4,4,4,4,4,4,4,4, # 70 - 77 + 4,4,4,4,4,4,4,4, # 78 - 7f + 5,5,5,5,5,5,5,5, # 80 - 87 + 5,5,5,5,5,5,1,3, # 88 - 8f + 5,5,5,5,5,5,5,5, # 90 - 97 + 5,5,5,5,5,5,5,5, # 98 - 9f + 5,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,0,5) # f8 - ff + +EUCJP_st = ( \ + 3, 4, 3, 5,eStart,eError,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 + eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f + 3,eError,eError,eError,eStart,eStart,eStart,eStart)#20-27 + +EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) + +EUCJPSMModel = {'classTable': EUCJP_cls, + 'classFactor': 6, + 'stateTable': EUCJP_st, + 'charLenTable': EUCJPCharLenTable, + 'name': 'EUC-JP'} + +# EUC-KR + +EUCKR_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,3,3,3, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,3,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 2,2,2,2,2,2,2,2, # e0 - e7 + 2,2,2,2,2,2,2,2, # e8 - ef + 2,2,2,2,2,2,2,2, # f0 - f7 + 2,2,2,2,2,2,2,0) # f8 - ff + +EUCKR_st = ( + eError,eStart, 3,eError,eError,eError,eError,eError,#00-07 + eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart)#08-0f + +EUCKRCharLenTable = (0, 1, 2, 0) + +EUCKRSMModel = {'classTable': EUCKR_cls, + 'classFactor': 4, + 'stateTable': EUCKR_st, + 'charLenTable': EUCKRCharLenTable, + 'name': 'EUC-KR'} + +# EUC-TW + +EUCTW_cls = ( \ + 2,2,2,2,2,2,2,2, # 00 - 07 + 2,2,2,2,2,2,0,0, # 08 - 0f + 2,2,2,2,2,2,2,2, # 10 - 17 + 2,2,2,0,2,2,2,2, # 18 - 1f + 2,2,2,2,2,2,2,2, # 20 - 27 + 2,2,2,2,2,2,2,2, # 28 - 2f + 2,2,2,2,2,2,2,2, # 30 - 37 + 2,2,2,2,2,2,2,2, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,2, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,6,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,3,4,4,4,4,4,4, # a0 - a7 + 5,5,1,1,1,1,1,1, # a8 - af + 1,1,1,1,1,1,1,1, # b0 - b7 + 1,1,1,1,1,1,1,1, # b8 - bf + 1,1,3,1,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0) # f8 - ff + +EUCTW_st = ( \ + eError,eError,eStart, 3, 3, 3, 4,eError,#00-07 + eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 + eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f + 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 + eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f + +EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) + +EUCTWSMModel = {'classTable': EUCTW_cls, + 'classFactor': 7, + 'stateTable': EUCTW_st, + 'charLenTable': EUCTWCharLenTable, + 'name': 'x-euc-tw'} + +# GB2312 + +GB2312_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 3,3,3,3,3,3,3,3, # 30 - 37 + 3,3,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,4, # 78 - 7f + 5,6,6,6,6,6,6,6, # 80 - 87 + 6,6,6,6,6,6,6,6, # 88 - 8f + 6,6,6,6,6,6,6,6, # 90 - 97 + 6,6,6,6,6,6,6,6, # 98 - 9f + 6,6,6,6,6,6,6,6, # a0 - a7 + 6,6,6,6,6,6,6,6, # a8 - af + 6,6,6,6,6,6,6,6, # b0 - b7 + 6,6,6,6,6,6,6,6, # b8 - bf + 6,6,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 6,6,6,6,6,6,6,6, # e0 - e7 + 6,6,6,6,6,6,6,6, # e8 - ef + 6,6,6,6,6,6,6,6, # f0 - f7 + 6,6,6,6,6,6,6,0) # f8 - ff + +GB2312_st = ( \ + eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07 + eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 + 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f + eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27 + eError,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f + +# To be accurate, the length of class 6 can be either 2 or 4. +# But it is not necessary to discriminate between the two since +# it is used for frequency analysis only, and we are validing +# each code range there as well. So it is safe to set it to be +# 2 here. +GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) + +GB2312SMModel = {'classTable': GB2312_cls, + 'classFactor': 7, + 'stateTable': GB2312_st, + 'charLenTable': GB2312CharLenTable, + 'name': 'GB2312'} + +# Shift_JIS + +SJIS_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 3,3,3,3,3,3,3,3, # 80 - 87 + 3,3,3,3,3,3,3,3, # 88 - 8f + 3,3,3,3,3,3,3,3, # 90 - 97 + 3,3,3,3,3,3,3,3, # 98 - 9f + #0xa0 is illegal in sjis encoding, but some pages does + #contain such byte. We need to be more error forgiven. + 2,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,4,4,4, # e8 - ef + 4,4,4,4,4,4,4,4, # f0 - f7 + 4,4,4,4,4,0,0,0) # f8 - ff + +SJIS_st = ( \ + eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart)#10-17 + +SJISCharLenTable = (0, 1, 1, 2, 0, 0) + +SJISSMModel = {'classTable': SJIS_cls, + 'classFactor': 6, + 'stateTable': SJIS_st, + 'charLenTable': SJISCharLenTable, + 'name': 'Shift_JIS'} + +# UCS2-BE + +UCS2BE_cls = ( \ + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5) # f8 - ff + +UCS2BE_st = ( \ + 5, 7, 7,eError, 4, 3,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17 + 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f + 6, 6, 6, 6, 5, 7, 7,eError,#20-27 + 5, 8, 6, 6,eError, 6, 6, 6,#28-2f + 6, 6, 6, 6,eError,eError,eStart,eStart)#30-37 + +UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) + +UCS2BESMModel = {'classTable': UCS2BE_cls, + 'classFactor': 6, + 'stateTable': UCS2BE_st, + 'charLenTable': UCS2BECharLenTable, + 'name': 'UTF-16BE'} + +# UCS2-LE + +UCS2LE_cls = ( \ + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5) # f8 - ff + +UCS2LE_st = ( \ + 6, 6, 7, 6, 4, 3,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17 + 5, 5, 5,eError, 5,eError, 6, 6,#18-1f + 7, 6, 8, 8, 5, 5, 5,eError,#20-27 + 5, 5, 5,eError,eError,eError, 5, 5,#28-2f + 5, 5, 5,eError, 5,eError,eStart,eStart)#30-37 + +UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) + +UCS2LESMModel = {'classTable': UCS2LE_cls, + 'classFactor': 6, + 'stateTable': UCS2LE_st, + 'charLenTable': UCS2LECharLenTable, + 'name': 'UTF-16LE'} + +# UTF-8 + +UTF8_cls = ( \ + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 2,2,2,2,3,3,3,3, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 5,5,5,5,5,5,5,5, # a0 - a7 + 5,5,5,5,5,5,5,5, # a8 - af + 5,5,5,5,5,5,5,5, # b0 - b7 + 5,5,5,5,5,5,5,5, # b8 - bf + 0,0,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 7,8,8,8,8,8,8,8, # e0 - e7 + 8,8,8,8,8,9,8,8, # e8 - ef + 10,11,11,11,11,11,11,11, # f0 - f7 + 12,13,13,13,14,15,0,0) # f8 - ff + +UTF8_st = ( \ + eError,eStart,eError,eError,eError,eError, 12, 10,#00-07 + 9, 11, 8, 7, 6, 5, 4, 3,#08-0f + eError,eError,eError,eError,eError,eError,eError,eError,#10-17 + eError,eError,eError,eError,eError,eError,eError,eError,#18-1f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f + eError,eError, 5, 5, 5, 5,eError,eError,#30-37 + eError,eError,eError,eError,eError,eError,eError,eError,#38-3f + eError,eError,eError, 5, 5, 5,eError,eError,#40-47 + eError,eError,eError,eError,eError,eError,eError,eError,#48-4f + eError,eError, 7, 7, 7, 7,eError,eError,#50-57 + eError,eError,eError,eError,eError,eError,eError,eError,#58-5f + eError,eError,eError,eError, 7, 7,eError,eError,#60-67 + eError,eError,eError,eError,eError,eError,eError,eError,#68-6f + eError,eError, 9, 9, 9, 9,eError,eError,#70-77 + eError,eError,eError,eError,eError,eError,eError,eError,#78-7f + eError,eError,eError,eError,eError, 9,eError,eError,#80-87 + eError,eError,eError,eError,eError,eError,eError,eError,#88-8f + eError,eError, 12, 12, 12, 12,eError,eError,#90-97 + eError,eError,eError,eError,eError,eError,eError,eError,#98-9f + eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7 + eError,eError,eError,eError,eError,eError,eError,eError,#a8-af + eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7 + eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf + eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 + eError,eError,eError,eError,eError,eError,eError,eError)#c8-cf + +UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) + +UTF8SMModel = {'classTable': UTF8_cls, + 'classFactor': 16, + 'stateTable': UTF8_st, + 'charLenTable': UTF8CharLenTable, + 'name': 'UTF-8'} diff --git a/fanficdownloader/chardet/sbcharsetprober.py b/fanficdownloader/chardet/sbcharsetprober.py new file mode 100644 index 00000000..da071163 --- /dev/null +++ b/fanficdownloader/chardet/sbcharsetprober.py @@ -0,0 +1,106 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from charsetprober import CharSetProber + +SAMPLE_SIZE = 64 +SB_ENOUGH_REL_THRESHOLD = 1024 +POSITIVE_SHORTCUT_THRESHOLD = 0.95 +NEGATIVE_SHORTCUT_THRESHOLD = 0.05 +SYMBOL_CAT_ORDER = 250 +NUMBER_OF_SEQ_CAT = 4 +POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 +#NEGATIVE_CAT = 0 + +class SingleByteCharSetProber(CharSetProber): + def __init__(self, model, reversed=constants.False, nameProber=None): + CharSetProber.__init__(self) + self._mModel = model + self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup + self._mNameProber = nameProber # Optional auxiliary prober for name decision + self.reset() + + def reset(self): + CharSetProber.reset(self) + self._mLastOrder = 255 # char order of last character + self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT + self._mTotalSeqs = 0 + self._mTotalChar = 0 + self._mFreqChar = 0 # characters that fall in our sampling range + + def get_charset_name(self): + if self._mNameProber: + return self._mNameProber.get_charset_name() + else: + return self._mModel['charsetName'] + + def feed(self, aBuf): + if not self._mModel['keepEnglishLetter']: + aBuf = self.filter_without_english_letters(aBuf) + aLen = len(aBuf) + if not aLen: + return self.get_state() + for c in aBuf: + order = self._mModel['charToOrderMap'][ord(c)] + if order < SYMBOL_CAT_ORDER: + self._mTotalChar += 1 + if order < SAMPLE_SIZE: + self._mFreqChar += 1 + if self._mLastOrder < SAMPLE_SIZE: + self._mTotalSeqs += 1 + if not self._mReversed: + self._mSeqCounters[self._mModel['precedenceMatrix'][(self._mLastOrder * SAMPLE_SIZE) + order]] += 1 + else: # reverse the order of the letters in the lookup + self._mSeqCounters[self._mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + self._mLastOrder]] += 1 + self._mLastOrder = order + + if self.get_state() == constants.eDetecting: + if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: + cf = self.get_confidence() + if cf > POSITIVE_SHORTCUT_THRESHOLD: + if constants._debug: + sys.stderr.write('%s confidence = %s, we have a winner\n' % (self._mModel['charsetName'], cf)) + self._mState = constants.eFoundIt + elif cf < NEGATIVE_SHORTCUT_THRESHOLD: + if constants._debug: + sys.stderr.write('%s confidence = %s, below negative shortcut threshhold %s\n' % (self._mModel['charsetName'], cf, NEGATIVE_SHORTCUT_THRESHOLD)) + self._mState = constants.eNotMe + + return self.get_state() + + def get_confidence(self): + r = 0.01 + if self._mTotalSeqs > 0: +# print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio'] + r = (1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs / self._mModel['mTypicalPositiveRatio'] +# print r, self._mFreqChar, self._mTotalChar + r = r * self._mFreqChar / self._mTotalChar + if r >= 1.0: + r = 0.99 + return r diff --git a/fanficdownloader/chardet/sbcsgroupprober.py b/fanficdownloader/chardet/sbcsgroupprober.py new file mode 100644 index 00000000..d19160c8 --- /dev/null +++ b/fanficdownloader/chardet/sbcsgroupprober.py @@ -0,0 +1,64 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from charsetgroupprober import CharSetGroupProber +from sbcharsetprober import SingleByteCharSetProber +from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model +from langgreekmodel import Latin7GreekModel, Win1253GreekModel +from langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel +from langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel +from langthaimodel import TIS620ThaiModel +from langhebrewmodel import Win1255HebrewModel +from hebrewprober import HebrewProber + +class SBCSGroupProber(CharSetGroupProber): + def __init__(self): + CharSetGroupProber.__init__(self) + self._mProbers = [ \ + SingleByteCharSetProber(Win1251CyrillicModel), + SingleByteCharSetProber(Koi8rModel), + SingleByteCharSetProber(Latin5CyrillicModel), + SingleByteCharSetProber(MacCyrillicModel), + SingleByteCharSetProber(Ibm866Model), + SingleByteCharSetProber(Ibm855Model), + SingleByteCharSetProber(Latin7GreekModel), + SingleByteCharSetProber(Win1253GreekModel), + SingleByteCharSetProber(Latin5BulgarianModel), + SingleByteCharSetProber(Win1251BulgarianModel), + SingleByteCharSetProber(Latin2HungarianModel), + SingleByteCharSetProber(Win1250HungarianModel), + SingleByteCharSetProber(TIS620ThaiModel), + ] + hebrewProber = HebrewProber() + logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.False, hebrewProber) + visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.True, hebrewProber) + hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) + self._mProbers.extend([hebrewProber, logicalHebrewProber, visualHebrewProber]) + + self.reset() diff --git a/fanficdownloader/chardet/sjisprober.py b/fanficdownloader/chardet/sjisprober.py new file mode 100644 index 00000000..fea2690c --- /dev/null +++ b/fanficdownloader/chardet/sjisprober.py @@ -0,0 +1,85 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from mbcharsetprober import MultiByteCharSetProber +from codingstatemachine import CodingStateMachine +from chardistribution import SJISDistributionAnalysis +from jpcntx import SJISContextAnalysis +from mbcssm import SJISSMModel +import constants, sys +from constants import eStart, eError, eItsMe + +class SJISProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(SJISSMModel) + self._mDistributionAnalyzer = SJISDistributionAnalysis() + self._mContextAnalyzer = SJISContextAnalysis() + self.reset() + + def reset(self): + MultiByteCharSetProber.reset(self) + self._mContextAnalyzer.reset() + + def get_charset_name(self): + return "SHIFT_JIS" + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n') + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mContextAnalyzer.feed(self._mLastChar[2 - charLen :], charLen) + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mContextAnalyzer.feed(aBuf[i + 1 - charLen : i + 3 - charLen], charLen) + self._mDistributionAnalyzer.feed(aBuf[i - 1 : i + 1], charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if self._mContextAnalyzer.got_enough_data() and \ + (self.get_confidence() > constants.SHORTCUT_THRESHOLD): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + contxtCf = self._mContextAnalyzer.get_confidence() + distribCf = self._mDistributionAnalyzer.get_confidence() + return max(contxtCf, distribCf) diff --git a/fanficdownloader/chardet/test.py b/fanficdownloader/chardet/test.py new file mode 100644 index 00000000..2ebf3a4d --- /dev/null +++ b/fanficdownloader/chardet/test.py @@ -0,0 +1,20 @@ +import sys, glob +sys.path.insert(0, '..') +from chardet.universaldetector import UniversalDetector + +count = 0 +u = UniversalDetector() +for f in glob.glob(sys.argv[1]): + print f.ljust(60), + u.reset() + for line in file(f, 'rb'): + u.feed(line) + if u.done: break + u.close() + result = u.result + if result['encoding']: + print result['encoding'], 'with confidence', result['confidence'] + else: + print '******** no result' + count += 1 +print count, 'tests' diff --git a/fanficdownloader/chardet/universaldetector.py b/fanficdownloader/chardet/universaldetector.py new file mode 100644 index 00000000..809df227 --- /dev/null +++ b/fanficdownloader/chardet/universaldetector.py @@ -0,0 +1,154 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from latin1prober import Latin1Prober # windows-1252 +from mbcsgroupprober import MBCSGroupProber # multi-byte character sets +from sbcsgroupprober import SBCSGroupProber # single-byte character sets +from escprober import EscCharSetProber # ISO-2122, etc. +import re + +MINIMUM_THRESHOLD = 0.20 +ePureAscii = 0 +eEscAscii = 1 +eHighbyte = 2 + +class UniversalDetector: + def __init__(self): + self._highBitDetector = re.compile(r'[\x80-\xFF]') + self._escDetector = re.compile(r'(\033|~{)') + self._mEscCharSetProber = None + self._mCharSetProbers = [] + self.reset() + + def reset(self): + self.result = {'encoding': None, 'confidence': 0.0} + self.done = constants.False + self._mStart = constants.True + self._mGotData = constants.False + self._mInputState = ePureAscii + self._mLastChar = '' + if self._mEscCharSetProber: + self._mEscCharSetProber.reset() + for prober in self._mCharSetProbers: + prober.reset() + + def feed(self, aBuf): + if self.done: return + + aLen = len(aBuf) + if not aLen: return + + if not self._mGotData: + # If the data starts with BOM, we know it is UTF + if aBuf[:3] == '\xEF\xBB\xBF': + # EF BB BF UTF-8 with BOM + self.result = {'encoding': "UTF-8", 'confidence': 1.0} + elif aBuf[:4] == '\xFF\xFE\x00\x00': + # FF FE 00 00 UTF-32, little-endian BOM + self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} + elif aBuf[:4] == '\x00\x00\xFE\xFF': + # 00 00 FE FF UTF-32, big-endian BOM + self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} + elif aBuf[:4] == '\xFE\xFF\x00\x00': + # FE FF 00 00 UCS-4, unusual octet order BOM (3412) + self.result = {'encoding': "X-ISO-10646-UCS-4-3412", 'confidence': 1.0} + elif aBuf[:4] == '\x00\x00\xFF\xFE': + # 00 00 FF FE UCS-4, unusual octet order BOM (2143) + self.result = {'encoding': "X-ISO-10646-UCS-4-2143", 'confidence': 1.0} + elif aBuf[:2] == '\xFF\xFE': + # FF FE UTF-16, little endian BOM + self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} + elif aBuf[:2] == '\xFE\xFF': + # FE FF UTF-16, big endian BOM + self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} + + self._mGotData = constants.True + if self.result['encoding'] and (self.result['confidence'] > 0.0): + self.done = constants.True + return + + if self._mInputState == ePureAscii: + if self._highBitDetector.search(aBuf): + self._mInputState = eHighbyte + elif (self._mInputState == ePureAscii) and self._escDetector.search(self._mLastChar + aBuf): + self._mInputState = eEscAscii + + self._mLastChar = aBuf[-1] + + if self._mInputState == eEscAscii: + if not self._mEscCharSetProber: + self._mEscCharSetProber = EscCharSetProber() + if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: + self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), + 'confidence': self._mEscCharSetProber.get_confidence()} + self.done = constants.True + elif self._mInputState == eHighbyte: + if not self._mCharSetProbers: + self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), Latin1Prober()] + for prober in self._mCharSetProbers: + if prober.feed(aBuf) == constants.eFoundIt: + self.result = {'encoding': prober.get_charset_name(), + 'confidence': prober.get_confidence()} + self.done = constants.True + break + + def close(self): + if self.done: return + if not self._mGotData: + if constants._debug: + sys.stderr.write('no data received!\n') + return + self.done = constants.True + + if self._mInputState == ePureAscii: + self.result = {'encoding': 'ascii', 'confidence': 1.0} + return self.result + + if self._mInputState == eHighbyte: + proberConfidence = None + maxProberConfidence = 0.0 + maxProber = None + for prober in self._mCharSetProbers: + if not prober: continue + proberConfidence = prober.get_confidence() + if proberConfidence > maxProberConfidence: + maxProberConfidence = proberConfidence + maxProber = prober + if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): + self.result = {'encoding': maxProber.get_charset_name(), + 'confidence': maxProber.get_confidence()} + return self.result + + if constants._debug: + sys.stderr.write('no probers hit minimum threshhold\n') + for prober in self._mCharSetProbers[0].mProbers: + if not prober: continue + sys.stderr.write('%s confidence = %s\n' % \ + (prober.get_charset_name(), \ + prober.get_confidence())) diff --git a/fanficdownloader/chardet/utf8prober.py b/fanficdownloader/chardet/utf8prober.py new file mode 100644 index 00000000..c1792bb3 --- /dev/null +++ b/fanficdownloader/chardet/utf8prober.py @@ -0,0 +1,76 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import constants, sys +from constants import eStart, eError, eItsMe +from charsetprober import CharSetProber +from codingstatemachine import CodingStateMachine +from mbcssm import UTF8SMModel + +ONE_CHAR_PROB = 0.5 + +class UTF8Prober(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(UTF8SMModel) + self.reset() + + def reset(self): + CharSetProber.reset(self) + self._mCodingSM.reset() + self._mNumOfMBChar = 0 + + def get_charset_name(self): + return "utf-8" + + def feed(self, aBuf): + for c in aBuf: + codingState = self._mCodingSM.next_state(c) + if codingState == eError: + self._mState = constants.eNotMe + break + elif codingState == eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == eStart: + if self._mCodingSM.get_current_charlen() >= 2: + self._mNumOfMBChar += 1 + + if self.get_state() == constants.eDetecting: + if self.get_confidence() > constants.SHORTCUT_THRESHOLD: + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + unlike = 0.99 + if self._mNumOfMBChar < 6: + for i in range(0, self._mNumOfMBChar): + unlike = unlike * ONE_CHAR_PROB + return 1.0 - unlike + else: + return unlike diff --git a/fanficdownloader/configurable.py b/fanficdownloader/configurable.py new file mode 100644 index 00000000..bc27a82f --- /dev/null +++ b/fanficdownloader/configurable.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ConfigParser + +# All of the writers(epub,html,txt) and adapters(ffnet,twlt,etc) +# inherit from Configurable. The config file(s) uses ini format: +# [sections] with key:value settings. +# +# There's a [defaults] section which is overriden by the writer's +# section [epub], which is overriden by the adapter's section for each +# site. +# +# [defaults] +# titlepage_entries: category,genre, status +# [epub] +# titlepage_entries: category,genre, status,datePublished,dateUpdated,dateCreated +# [www.whofic.com] +# titlepage_entries: category,genre, status,dateUpdated,rating + +class Configurable(object): + + def __init__(self, config): + self.config = config + self.sectionslist = ['defaults'] + + def addConfigSection(self,section): + self.sectionslist.insert(0,section) + + def getConfig(self, key): + val = "" + for section in self.sectionslist: + try: + val = self.config.get(section,key) + if val and val.lower() == "false": + val = False + #print "getConfig(%s)=[%s]%s" % (key,section,val) + return val + except (ConfigParser.NoOptionError, ConfigParser.NoSectionError), e: + pass + + return val + + # split and strip each. + def getConfigList(self, key): + vlist = self.getConfig(key).split(',') + vlist = [ v.strip() for v in vlist ] + #print "vlist("+key+"):"+str(vlist) + return vlist + diff --git a/fanficdownloader/exceptions.py b/fanficdownloader/exceptions.py new file mode 100644 index 00000000..cf8e558e --- /dev/null +++ b/fanficdownloader/exceptions.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## A few exceptions for different things for adapters + +class FailedToDownload(Exception): + def __init__(self,error): + self.error=error + + def __str__(self): + return self.error + +class InvalidStoryURL(Exception): + def __init__(self,url,domain,example): + self.url=url + self.domain=domain + self.example=example + + def __str__(self): + return "Bad Story URL: (%s) for site: (%s) Example: (%s)" % (self.url, self.domain, self.example) + +class FailedToLogin(Exception): + def __init__(self,url,username): + self.url=url + self.username=username + + def __str__(self): + return "Failed to Login for URL: (%s) with username: (%s)" % (self.url, self.username) + +class AdultCheckRequired(Exception): + def __init__(self,url): + self.url=url + + def __str__(self): + return "Story requires confirmation of adult status: (%s)" % self.url + +class StoryDoesNotExist(Exception): + def __init__(self,url): + self.url=url + + def __str__(self): + return "Story does not exist: (%s)" % self.url + +class UnknownSite(Exception): + def __init__(self,url,supported_sites_list): + self.url=url + self.supported_sites_list=supported_sites_list + + def __str__(self): + return "Unknown Site(%s). Supported sites: (%s)" % (self.url, ", ".join(self.supported_sites_list)) + diff --git a/fanficdownloader/gziphttp.py b/fanficdownloader/gziphttp.py new file mode 100644 index 00000000..76049eea --- /dev/null +++ b/fanficdownloader/gziphttp.py @@ -0,0 +1,38 @@ +## Borrowed from http://techknack.net/python-urllib2-handlers/ + +import urllib2 +from gzip import GzipFile +from StringIO import StringIO + +class GZipProcessor(urllib2.BaseHandler): + """A handler to add gzip capabilities to urllib2 requests + """ + def http_request(self, req): + req.add_header("Accept-Encoding", "gzip") + return req + https_request = http_request + + def http_response(self, req, resp): + #print("Content-Encoding:%s"%resp.headers.get("Content-Encoding")) + if resp.headers.get("Content-Encoding") == "gzip": + gz = GzipFile( + fileobj=StringIO(resp.read()), + mode="r" + ) +# resp.read = gz.read +# resp.readlines = gz.readlines +# resp.readline = gz.readline +# resp.next = gz.next + old_resp = resp + resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) + resp.msg = old_resp.msg + return resp + https_response = http_response + +# brave new world - 1:30 w/o, 1:10 with? 40 chapters, so 20s from sleeps. +# with gzip, no sleep: 47.469 +# w/o gzip, no sleep: 47.736 + +# I Am What I Am 67 chapters +# w/o gzip: 57.168 +# w/ gzip: 40.692 diff --git a/fanficdownloader/html.py b/fanficdownloader/html.py new file mode 100644 index 00000000..e1ca7db5 --- /dev/null +++ b/fanficdownloader/html.py @@ -0,0 +1,126 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + +import re +import sys +import StringIO +import urllib + +from BeautifulSoup import BeautifulSoup + +class HtmlProcessor: + WHITESPACE_RE = re.compile(r'\s') + # Look for </blockquote <p> + BAD_TAG_RE = re.compile(r'<[^>]+<', re.MULTILINE) + + def __init__(self, html, unfill=0): + self.unfill = unfill + html = self._ProcessRawHtml(html) + self._soup = BeautifulSoup(html) + if self._soup.title: + self.title = self._soup.title.contents[0] + else: + self.title = None + + def _ProcessRawHtml(self, html): + new_html, count = HtmlProcessor.BAD_TAG_RE.subn('<', html) + if count: + print >>sys.stderr, 'Replaced %d bad tags' % count + return new_html + + def _StubInternalAnchors(self): + '''Replace each internal anchor with a fixed-size filepos anchor. + + Looks for every anchor with <a href="#myanchor"> and replaces that + with <a filepos="00000000050">. Stores anchors in self._anchor_references''' + self._anchor_references = [] + anchor_num = 0 + # anchor links + anchorlist = self._soup.findAll('a', href=re.compile('^#')) + # treat reference tags like a tags for TOCTOP. + anchorlist.extend(self._soup.findAll('reference', href=re.compile('^#'))) + for anchor in anchorlist: + self._anchor_references.append((anchor_num, anchor['href'])) + del anchor['href'] + anchor['filepos'] = '%.10d' % anchor_num + anchor_num += 1 + + def _ReplaceAnchorStubs(self): + # TODO: Browsers allow extra whitespace in the href names. + # use __str__ instead of prettify--it inserts extra spaces. + assembled_text = self._soup.__str__('utf8') + del self._soup # shouldn't touch this anymore + for anchor_num, original_ref in self._anchor_references: + ref = urllib.unquote(original_ref[1:]) # remove leading '#' + # Find the position of ref in the utf-8 document. + # TODO(chatham): Using regexes and looking for name= would be better. + newpos = assembled_text.rfind(ref.encode('utf-8')) + if newpos == -1: + print >>sys.stderr, 'Could not find anchor "%s"' % original_ref + continue + newpos += len(ref) + 2 # don't point into the middle of the <a name> tag + old_filepos = 'filepos="%.10d"' % anchor_num + new_filepos = 'filepos="%.10d"' % newpos + assert assembled_text.find(old_filepos) != -1 + assembled_text = assembled_text.replace(old_filepos, new_filepos, 1) + return assembled_text + + def _FixPreTags(self): + '''Replace <pre> tags with HTML-ified text.''' + pres = self._soup.findAll('pre') + for pre in pres: + pre.replaceWith(self._FixPreContents(str(pre.contents[0]))) + + def _FixPreContents(self, text): + if self.unfill: + line_splitter = '\n\n' + line_joiner = '<p>' + else: + line_splitter = '\n' + line_joiner = '<br>' + lines = [] + for line in text.split(line_splitter): + lines.append(self.WHITESPACE_RE.subn(' ', line)[0]) + return line_joiner.join(lines) + + def _RemoveUnsupported(self): + '''Remove any tags which the kindle cannot handle.''' + # TODO(chatham): <link> tags to script? + unsupported_tags = ('script', 'style') + for tag_type in unsupported_tags: + for element in self._soup.findAll(tag_type): + element.extract() + + def RenameAnchors(self, prefix): + '''Rename every internal anchor to have the given prefix, then + return the contents of the body tag.''' + for anchor in self._soup.findAll('a', href=re.compile('^#')): + anchor['href'] = '#' + prefix + anchor['href'][1:] + for a in self._soup.findAll('a'): + if a.get('name'): + a['name'] = prefix + a['name'] + + # TODO(chatham): figure out how to fix this. sometimes body comes out + # as NoneType. + content = [] + if self._soup.body is not None: + content = [unicode(c) for c in self._soup.body.contents] + return '\n'.join(content) + + def CleanHtml(self): + # TODO(chatham): fix_html_br, fix_html + self._RemoveUnsupported() + self._StubInternalAnchors() + self._FixPreTags() + return self._ReplaceAnchorStubs() + + +if __name__ == '__main__': + FILE ='/tmp/documentation.html' + #FILE = '/tmp/multipre.html' + FILE = '/tmp/view.html' + import codecs + d = open(FILE).read() + h = HtmlProcessor(d) + s = h.CleanHtml() + #print s diff --git a/fanficdownloader/html2text.py b/fanficdownloader/html2text.py new file mode 100644 index 00000000..ce6e1d3d --- /dev/null +++ b/fanficdownloader/html2text.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""html2text: Turn HTML into equivalent Markdown-structured text.""" +__version__ = "2.37" +__author__ = "Aaron Swartz (me@aaronsw.com)" +__copyright__ = "(C) 2004-2008 Aaron Swartz. GNU GPL 3." +__contributors__ = ["Martin 'Joey' Schulze", "Ricardo Reyes", "Kevin Jay North"] + +# TODO: +# Support decoded entities with unifiable. + +if not hasattr(__builtins__, 'True'): True, False = 1, 0 +import re, sys, urllib, htmlentitydefs, codecs, StringIO, types +import sgmllib +import urlparse +sgmllib.charref = re.compile('&#([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]') + +try: from textwrap import wrap +except: pass + +# Use Unicode characters instead of their ascii psuedo-replacements +UNICODE_SNOB = 0 + +# Put the links after each paragraph instead of at the end. +LINKS_EACH_PARAGRAPH = 0 + +# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.) +BODY_WIDTH = 78 + +# Don't show internal links (href="#local-anchor") -- corresponding link targets +# won't be visible in the plain text file anyway. +SKIP_INTERNAL_LINKS = False + +### Entity Nonsense ### + +def name2cp(k): + if k == 'apos': return ord("'") + if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3 + return htmlentitydefs.name2codepoint[k] + else: + k = htmlentitydefs.entitydefs[k] + if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 + return ord(codecs.latin_1_decode(k)[0]) + +unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"', +'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*', +'ndash':'-', 'oelig':'oe', 'aelig':'ae', +'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a', +'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e', +'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i', +'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o', +'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u'} + +unifiable_n = {} + +for k in unifiable.keys(): + unifiable_n[name2cp(k)] = unifiable[k] + +def charref(name): + if name[0] in ['x','X']: + c = int(name[1:], 16) + else: + c = int(name) + + if not UNICODE_SNOB and c in unifiable_n.keys(): + return unifiable_n[c] + else: + return unichr(c) + +def entityref(c): + if not UNICODE_SNOB and c in unifiable.keys(): + return unifiable[c] + else: + try: name2cp(c) + except KeyError: return "&" + c + else: return unichr(name2cp(c)) + +def replaceEntities(s): + s = s.group(1) + if s[0] == "#": + return charref(s[1:]) + else: return entityref(s) + +r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") +def unescape(s): + return r_unescape.sub(replaceEntities, s) + +def fixattrs(attrs): + # Fix bug in sgmllib.py + if not attrs: return attrs + newattrs = [] + for attr in attrs: + newattrs.append((attr[0], unescape(attr[1]))) + return newattrs + +### End Entity Nonsense ### + +def onlywhite(line): + """Return true if the line does only consist of whitespace characters.""" + for c in line: + if c is not ' ' and c is not ' ': + return c is ' ' + return line + +def optwrap(text): + """Wrap all paragraphs in the provided text.""" + if not BODY_WIDTH: + return text + + assert wrap, "Requires Python 2.3." + result = '' + newlines = 0 + for para in text.split("\n"): + if len(para) > 0: + if para[0] is not ' ' and para[0] is not '-' and para[0] is not '*': + for line in wrap(para, BODY_WIDTH): + result += line + "\n" + result += "\n" + newlines = 2 + else: + if not onlywhite(para): + result += para + "\n" + newlines = 1 + else: + if newlines < 2: + result += "\n" + newlines += 1 + return result + +def hn(tag): + if tag[0] == 'h' and len(tag) == 2: + try: + n = int(tag[1]) + if n in range(1, 10): return n + except ValueError: return 0 + +class _html2text(sgmllib.SGMLParser): + def __init__(self, out=None, baseurl=''): + sgmllib.SGMLParser.__init__(self) + + if out is None: self.out = self.outtextf + else: self.out = out + self.outtext = u'' + self.quiet = 0 + self.p_p = 0 + self.outcount = 0 + self.start = 1 + self.space = 0 + self.a = [] + self.astack = [] + self.acount = 0 + self.list = [] + self.blockquote = 0 + self.pre = 0 + self.startpre = 0 + self.lastWasNL = 0 + self.abbr_title = None # current abbreviation definition + self.abbr_data = None # last inner HTML (for abbr being defined) + self.abbr_list = {} # stack of abbreviations to write later + self.baseurl = baseurl + + def outtextf(self, s): + self.outtext += s + + def close(self): + sgmllib.SGMLParser.close(self) + + self.pbr() + self.o('', 0, 'end') + + return self.outtext + + def handle_charref(self, c): + self.o(charref(c)) + + def handle_entityref(self, c): + self.o(entityref(c)) + + def unknown_starttag(self, tag, attrs): + self.handle_tag(tag, attrs, 1) + + def unknown_endtag(self, tag): + self.handle_tag(tag, None, 0) + + def previousIndex(self, attrs): + """ returns the index of certain set of attributes (of a link) in the + self.a list + + If the set of attributes is not found, returns None + """ + if not attrs.has_key('href'): return None + + i = -1 + for a in self.a: + i += 1 + match = 0 + + if a.has_key('href') and a['href'] == attrs['href']: + if a.has_key('title') or attrs.has_key('title'): + if (a.has_key('title') and attrs.has_key('title') and + a['title'] == attrs['title']): + match = True + else: + match = True + + if match: return i + + def handle_tag(self, tag, attrs, start): + attrs = fixattrs(attrs) + + if hn(tag): + self.p() + if start: self.o(hn(tag)*"#" + ' ') + + if tag in ['p', 'div']: self.p() + + if tag == "br" and start: self.o(" \n") + + if tag == "hr" and start: + self.p() + self.o("* * *") + self.p() + + if tag in ["head", "style", 'script']: + if start: self.quiet += 1 + else: self.quiet -= 1 + + if tag in ["body"]: + self.quiet = 0 # sites like 9rules.com never close <head> + + if tag == "blockquote": + if start: + self.p(); self.o('> ', 0, 1); self.start = 1 + self.blockquote += 1 + else: + self.blockquote -= 1 + self.p() + + if tag in ['em', 'i', 'u']: self.o("_") + if tag in ['strong', 'b']: self.o("**") + if tag == "code" and not self.pre: self.o('`') #TODO: `` `this` `` + if tag == "abbr": + if start: + attrsD = {} + for (x, y) in attrs: attrsD[x] = y + attrs = attrsD + + self.abbr_title = None + self.abbr_data = '' + if attrs.has_key('title'): + self.abbr_title = attrs['title'] + else: + if self.abbr_title != None: + self.abbr_list[self.abbr_data] = self.abbr_title + self.abbr_title = None + self.abbr_data = '' + + if tag == "a": + if start: + attrsD = {} + for (x, y) in attrs: attrsD[x] = y + attrs = attrsD + if attrs.has_key('href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')): + self.astack.append(attrs) + self.o("[") + else: + self.astack.append(None) + else: + if self.astack: + a = self.astack.pop() + if a: + i = self.previousIndex(a) + if i is not None: + a = self.a[i] + else: + self.acount += 1 + a['count'] = self.acount + a['outcount'] = self.outcount + self.a.append(a) + self.o("][" + `a['count']` + "]") + + if tag == "img" and start: + attrsD = {} + for (x, y) in attrs: attrsD[x] = y + attrs = attrsD + if attrs.has_key('src'): + attrs['href'] = attrs['src'] + alt = attrs.get('alt', '') + i = self.previousIndex(attrs) + if i is not None: + attrs = self.a[i] + else: + self.acount += 1 + attrs['count'] = self.acount + attrs['outcount'] = self.outcount + self.a.append(attrs) + self.o("![") + self.o(alt) + self.o("]["+`attrs['count']`+"]") + + if tag == 'dl' and start: self.p() + if tag == 'dt' and not start: self.pbr() + if tag == 'dd' and start: self.o(' ') + if tag == 'dd' and not start: self.pbr() + + if tag in ["ol", "ul"]: + if start: + self.list.append({'name':tag, 'num':0}) + else: + if self.list: self.list.pop() + + self.p() + + if tag == 'li': + if start: + self.pbr() + if self.list: li = self.list[-1] + else: li = {'name':'ul', 'num':0} + self.o(" "*len(self.list)) #TODO: line up <ol><li>s > 9 correctly. + if li['name'] == "ul": self.o("* ") + elif li['name'] == "ol": + li['num'] += 1 + self.o(`li['num']`+". ") + self.start = 1 + else: + self.pbr() + + if tag in ["table", "tr"] and start: self.p() + if tag == 'td': self.pbr() + + if tag == "pre": + if start: + self.startpre = 1 + self.pre = 1 + else: + self.pre = 0 + self.p() + + def pbr(self): + if self.p_p == 0: self.p_p = 1 + + def p(self): self.p_p = 2 + + def o(self, data, puredata=0, force=0): + if self.abbr_data is not None: self.abbr_data += data + + if not self.quiet: + if puredata and not self.pre: + data = re.sub('\s+', ' ', data) + if data and data[0] == ' ': + self.space = 1 + data = data[1:] + if not data and not force: return + + if self.startpre: + #self.out(" :") #TODO: not output when already one there + self.startpre = 0 + + bq = (">" * self.blockquote) + if not (force and data and data[0] == ">") and self.blockquote: bq += " " + + if self.pre: + bq += " " + data = data.replace("\n", "\n"+bq) + + if self.start: + self.space = 0 + self.p_p = 0 + self.start = 0 + + if force == 'end': + # It's the end. + self.p_p = 0 + self.out("\n") + self.space = 0 + + + if self.p_p: + self.out(('\n'+bq)*self.p_p) + self.space = 0 + + if self.space: + if not self.lastWasNL: self.out(' ') + self.space = 0 + + if self.a and ((self.p_p == 2 and LINKS_EACH_PARAGRAPH) or force == "end"): + if force == "end": self.out("\n") + + newa = [] + for link in self.a: + if self.outcount > link['outcount']: + self.out(" ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href'])) + if link.has_key('title'): self.out(" ("+link['title']+")") + self.out("\n") + else: + newa.append(link) + + if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done. + + self.a = newa + + if self.abbr_list and force == "end": + for abbr, definition in self.abbr_list.items(): + self.out(" *[" + abbr + "]: " + definition + "\n") + + self.p_p = 0 + self.out(data) + self.lastWasNL = data and data[-1] == '\n' + self.outcount += 1 + + def handle_data(self, data): + if r'\/script>' in data: self.quiet -= 1 + self.o(data, 1) + + def unknown_decl(self, data): pass + +def wrapwrite(text): sys.stdout.write(text.encode('utf8')) + +def html2text_file(html, out=wrapwrite, baseurl=''): + h = _html2text(out, baseurl) + h.feed(html) + h.feed("") + return h.close() + +def html2text(html, baseurl=''): + return optwrap(html2text_file(html, None, baseurl)) + +if __name__ == "__main__": + baseurl = '' + if sys.argv[1:]: + arg = sys.argv[1] + if arg.startswith('http://'): + baseurl = arg + j = urllib.urlopen(baseurl) + try: + from feedparser import _getCharacterEncoding as enc + except ImportError: + enc = lambda x, y: ('utf-8', 1) + text = j.read() + encoding = enc(j.headers, text)[0] + if encoding == 'us-ascii': encoding = 'utf-8' + data = text.decode(encoding) + + else: + encoding = 'utf8' + if len(sys.argv) > 2: + encoding = sys.argv[2] + data = open(arg, 'r').read().decode(encoding) + else: + data = sys.stdin.read().decode('utf8') + wrapwrite(html2text(data, baseurl)) diff --git a/fanficdownloader/htmlcleanup.py b/fanficdownloader/htmlcleanup.py new file mode 100644 index 00000000..d9e2d848 --- /dev/null +++ b/fanficdownloader/htmlcleanup.py @@ -0,0 +1,463 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re + +def _unirepl(match): + "Return the unicode string for a decimal number" + if match.group(1)=='x': + radix=16 + else: + radix=10 + value = int(match.group(2), radix ) + return unichr(value) + +def _replaceNumberEntities(data): + p = re.compile(r'&#(x?)([0-9a-fA-F]+);') + return p.sub(_unirepl, data) + +def _replaceNotEntities(data): + # not just \w or \S. regexp from c:\Python25\lib\sgmllib.py + # (or equiv), SGMLParser, entityref + p = re.compile(r'&([a-zA-Z][-.a-zA-Z0-9]*);') + return p.sub(r'&\1', data) + +def stripHTML(soup): + return removeAllEntities(re.sub(r'<[^>]+>','',"%s" % soup)).strip() + +def conditionalRemoveEntities(value): + if isinstance(value,str) or isinstance(value,unicode) : + return removeEntities(value).strip() + else: + return value + +def removeAllEntities(text): + # Remove < < and & + return removeEntities(text).replace('<', '<').replace('>', '>').replace('&', '&') + +def removeEntities(text): + + # replace numeric versions of [&<>] with named versions, + # then replace named versions with actual characters, + + if text is None: + return "" + if not (isinstance(text,str) or isinstance(text,unicode)): + return str(text) + + try: + t = text.decode('utf-8') + except UnicodeEncodeError, e: + try: + t = text.encode ('ascii', 'xmlcharrefreplace') + except UnicodeEncodeError, e: + t = text + text = t + text = re.sub(r'�*38;','&',text) + text = re.sub(r'�*60;','<',text) + text = re.sub(r'�*62;','>',text) + + # replace remaining � entities with unicode value, such as ' -> ' + text = _replaceNumberEntities(text) + + # replace several named entities with character, such as — -> - + # see constants.py for the list. + # reverse sort will put entities with ; before the same one without, when valid. + for e in reversed(sorted(entities.keys())): + v = entities[e] + try: + text = text.replace(e, v) + except UnicodeDecodeError, ex: + # for the pound symbol in constants.py + text = text.replace(e, v.decode('utf-8')) + + # SGMLParser, and in turn, BeautifulStoneSoup doesn't parse + # entities terribly well and inserts (;) after something that + # it thinks might be an entity. AT&T becomes AT&T; All of my + # attempts to fix this by changing the input to + # BeautifulStoneSoup break something else instead. But at + # this point, there should be *no* real entities left, so find + # these not-entities and removing them here should be safe. + text = _replaceNotEntities(text) + + # < < and & are the only html entities allowed in xhtml, put those back. + return text.replace('&', '&').replace('&lt', '<').replace('&gt', '>') + +# entity list from http://code.google.com/p/doctype/wiki/CharacterEntitiesConsistent +entities = { 'á' : 'á', + 'Á' : 'Á', + 'Á' : 'Á', + 'á' : 'á', + 'â' : 'â', + 'Â' : 'Â', + 'Â' : 'Â', + 'â' : 'â', + '´' : '´', + '´' : '´', + 'Æ' : 'Æ', + 'æ' : 'æ', + 'Æ' : 'Æ', + 'æ' : 'æ', + 'à' : 'à', + 'À' : 'À', + 'À' : 'À', + 'à' : 'à', + 'ℵ' : 'ℵ', + 'α' : 'α', + 'Α' : 'Α', + '&' : '&', + '&' : '&', + '&' : '&', + '&' : '&', + '∧' : '∧', + '∠' : '∠', + 'å' : 'å', + 'Å' : 'Å', + 'Å' : 'Å', + 'å' : 'å', + '≈' : '≈', + 'ã' : 'ã', + 'Ã' : 'Ã', + 'Ã' : 'Ã', + 'ã' : 'ã', + 'ä' : 'ä', + 'Ä' : 'Ä', + 'Ä' : 'Ä', + 'ä' : 'ä', + '„' : '„', + 'β' : 'β', + 'Β' : 'Β', + '¦' : '¦', + '¦' : '¦', + '•' : '•', + '∩' : '∩', + 'ç' : 'ç', + 'Ç' : 'Ç', + 'Ç' : 'Ç', + 'ç' : 'ç', + '¸' : '¸', + '¸' : '¸', + '¢' : '¢', + '¢' : '¢', + 'χ' : 'χ', + 'Χ' : 'Χ', + 'ˆ' : 'ˆ', + '♣' : '♣', + '≅' : '≅', + '©' : '©', + '©' : '©', + '©' : '©', + '©' : '©', + '↵' : '↵', + '∪' : '∪', + '¤' : '¤', + '¤' : '¤', + '†' : '†', + '‡' : '‡', + '↓' : '↓', + '⇓' : '⇓', + '°' : '°', + '°' : '°', + 'δ' : 'δ', + 'Δ' : 'Δ', + '♦' : '♦', + '÷' : '÷', + '÷' : '÷', + 'é' : 'é', + 'É' : 'É', + 'É' : 'É', + 'é' : 'é', + 'ê' : 'ê', + 'Ê' : 'Ê', + 'Ê' : 'Ê', + 'ê' : 'ê', + 'è' : 'è', + 'È' : 'È', + 'È' : 'È', + 'è' : 'è', + '∅' : '∅', + ' ' : ' ', + ' ' : ' ', + 'ε' : 'ε', + 'Ε' : 'Ε', + '≡' : '≡', + 'η' : 'η', + 'Η' : 'Η', + 'ð' : 'ð', + 'Ð' : 'Ð', + 'Ð' : 'Ð', + 'ð' : 'ð', + 'ë' : 'ë', + 'Ë' : 'Ë', + 'Ë' : 'Ë', + 'ë' : 'ë', + '€' : '€', + '∃' : '∃', + 'ƒ' : 'ƒ', + '∀' : '∀', + '½' : '½', + '½' : '½', + '¼' : '¼', + '¼' : '¼', + '¾' : '¾', + '¾' : '¾', + '⁄' : '⁄', + 'γ' : 'γ', + 'Γ' : 'Γ', + '≥' : '≥', + #'>' : '>', + #'>' : '>', + #'>' : '>', + #'>' : '>', + '↔' : '↔', + '⇔' : '⇔', + '♥' : '♥', + '…' : '…', + 'í' : 'í', + 'Í' : 'Í', + 'Í' : 'Í', + 'í' : 'í', + 'î' : 'î', + 'Î' : 'Î', + 'Î' : 'Î', + 'î' : 'î', + '¡' : '¡', + '¡' : '¡', + 'ì' : 'ì', + 'Ì' : 'Ì', + 'Ì' : 'Ì', + 'ì' : 'ì', + 'ℑ' : 'ℑ', + '∞' : '∞', + '∫' : '∫', + 'ι' : 'ι', + 'Ι' : 'Ι', + '¿' : '¿', + '¿' : '¿', + '∈' : '∈', + 'ï' : 'ï', + 'Ï' : 'Ï', + 'Ï' : 'Ï', + 'ï' : 'ï', + 'κ' : 'κ', + 'Κ' : 'Κ', + 'λ' : 'λ', + 'Λ' : 'Λ', + '«' : '«', + '«' : '«', + '←' : '←', + '⇐' : '⇐', + '⌈' : '⌈', + '“' : '“', + '≤' : '≤', + '⌊' : '⌊', + '∗' : '∗', + '◊' : '◊', + '‎' : '‎', + '‹' : '‹', + '‘' : '‘', + #'<' : '<', + #'<' : '<', + #'<' : '<', + #'<' : '<', + '¯' : '¯', + '¯' : '¯', + '—' : '—', + 'µ' : 'µ', + 'µ' : 'µ', + '·' : '·', + '·' : '·', + '−' : '−', + 'μ' : 'μ', + 'Μ' : 'Μ', + '∇' : '∇', + ' ' : ' ', + ' ' : ' ', + '–' : '–', + '≠' : '≠', + '∋' : '∋', + '¬' : '¬', + '¬' : '¬', + '∉' : '∉', + '⊄' : '⊄', + 'ñ' : 'ñ', + 'Ñ' : 'Ñ', + 'Ñ' : 'Ñ', + 'ñ' : 'ñ', + 'ν' : 'ν', + 'Ν' : 'Ν', + 'ó' : 'ó', + 'Ó' : 'Ó', + 'Ó' : 'Ó', + 'ó' : 'ó', + 'ô' : 'ô', + 'Ô' : 'Ô', + 'Ô' : 'Ô', + 'ô' : 'ô', + 'Œ' : 'Œ', + 'œ' : 'œ', + 'ò' : 'ò', + 'Ò' : 'Ò', + 'Ò' : 'Ò', + 'ò' : 'ò', + '‾' : '‾', + 'ω' : 'ω', + 'Ω' : 'Ω', + 'ο' : 'ο', + 'Ο' : 'Ο', + '⊕' : '⊕', + '∨' : '∨', + 'ª' : 'ª', + 'ª' : 'ª', + 'º' : 'º', + 'º' : 'º', + 'ø' : 'ø', + 'Ø' : 'Ø', + 'Ø' : 'Ø', + 'ø' : 'ø', + 'õ' : 'õ', + 'Õ' : 'Õ', + 'Õ' : 'Õ', + 'õ' : 'õ', + '⊗' : '⊗', + 'ö' : 'ö', + 'Ö' : 'Ö', + 'Ö' : 'Ö', + 'ö' : 'ö', + '¶' : '¶', + '¶' : '¶', + '∂' : '∂', + '‰' : '‰', + '⊥' : '⊥', + 'φ' : 'φ', + 'Φ' : 'Φ', + 'π' : 'π', + 'Π' : 'Π', + 'ϖ' : 'ϖ', + '±' : '±', + '±' : '±', + '£' : '£', + '£' : '£', + '′' : '′', + '″' : '″', + '∏' : '∏', + '∝' : '∝', + 'ψ' : 'ψ', + 'Ψ' : 'Ψ', + '"' : '"', + '"' : '"', + '"' : '"', + '"' : '"', + '√' : '√', + '»' : '»', + '»' : '»', + '→' : '→', + '⇒' : '⇒', + '⌉' : '⌉', + '”' : '”', + 'ℜ' : 'ℜ', + '®' : '®', + '®' : '®', + '®' : '®', + '®' : '®', + '⌋' : '⌋', + 'ρ' : 'ρ', + 'Ρ' : 'Ρ', + '‏' : '‏', + '›' : '›', + '’' : '’', + '‚' : '‚', + 'š' : 'š', + 'Š' : 'Š', + '⋅' : '⋅', + '§' : '§', + '§' : '§', + '­' : '­', # strange optional hyphenation control character, not just a dash + '­' : '­', + 'σ' : 'σ', + 'Σ' : 'Σ', + 'ς' : 'ς', + '∼' : '∼', + '♠' : '♠', + '⊂' : '⊂', + '⊆' : '⊆', + '∑' : '∑', + '¹' : '¹', + '¹' : '¹', + '²' : '²', + '²' : '²', + '³' : '³', + '³' : '³', + '⊃' : '⊃', + '⊇' : '⊇', + 'ß' : 'ß', + 'ß' : 'ß', + 'τ' : 'τ', + 'Τ' : 'Τ', + '∴' : '∴', + 'θ' : 'θ', + 'Θ' : 'Θ', + 'ϑ' : 'ϑ', + ' ' : ' ', + 'þ' : 'þ', + 'Þ' : 'Þ', + 'Þ' : 'Þ', + 'þ' : 'þ', + '˜' : '˜', + '×' : '×', + '×' : '×', + '™' : '™', + 'ú' : 'ú', + 'Ú' : 'Ú', + 'Ú' : 'Ú', + 'ú' : 'ú', + '↑' : '↑', + '⇑' : '⇑', + 'û' : 'û', + 'Û' : 'Û', + 'Û' : 'Û', + 'û' : 'û', + 'ù' : 'ù', + 'Ù' : 'Ù', + 'Ù' : 'Ù', + 'ù' : 'ù', + '¨' : '¨', + '¨' : '¨', + 'ϒ' : 'ϒ', + 'υ' : 'υ', + 'Υ' : 'Υ', + 'ü' : 'ü', + 'Ü' : 'Ü', + 'Ü' : 'Ü', + 'ü' : 'ü', + '℘' : '℘', + 'ξ' : 'ξ', + 'Ξ' : 'Ξ', + 'ý' : 'ý', + 'Ý' : 'Ý', + 'Ý' : 'Ý', + 'ý' : 'ý', + '¥' : '¥', + '¥' : '¥', + 'ÿ' : 'ÿ', + 'Ÿ' : 'Ÿ', + 'ÿ' : 'ÿ', + 'ζ' : 'ζ', + 'Ζ' : 'Ζ', + '‍' : '‍', # strange spacing control character, not just a space + '‌' : '‌', # strange spacing control character, not just a space + } diff --git a/fanficdownloader/mobi.py b/fanficdownloader/mobi.py new file mode 100644 index 00000000..4748e202 --- /dev/null +++ b/fanficdownloader/mobi.py @@ -0,0 +1,384 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + + +import StringIO +import struct +import time +import random +import logging + +from html import HtmlProcessor + +# http://wiki.mobileread.com/wiki/MOBI +# http://membres.lycos.fr/microfirst/palm/pdb.html + +encoding = { + 'UTF-8' : 65001, + 'latin-1' : 1252, +} + +languages = {"en-us" : 0x0409, + "sv" : 0x041d, + "fi" : 0x000b, + "en" : 0x0009, + "en-gb" : 0x0809} + +def ToHex(s): + v = ['%.2x' % ord(c) for c in s] + return ' '.join(v) + +class _SubEntry: + def __init__(self, pos, html_data): + self.pos = pos + self.html = HtmlProcessor(html_data) + self.title = self.html.title + self._name = 'mobi_article_%d' % pos + if not self.title: + self.title = 'Article %d' % self.pos + + def TocLink(self): + return '<a href="#%s_MOBI_START">%.80s</a>' % (self._name, self.title) + + def Anchor(self): + return '<a name="%s_MOBI_START">' % self._name + + def Body(self): + return self.html.RenameAnchors(self._name + '_') + +class Converter: + def __init__(self, refresh_url='', title='Unknown', author='Unknown', publisher='Unknown'): + self._header = Header() + self._header.SetTitle(title) + self._header.SetAuthor(author) + self._header.SetPublisher(publisher) + self._refresh_url = refresh_url + + def ConvertString(self, s): + out = StringIO.StringIO() + self._ConvertStringToFile(s, out) + return out.getvalue() + + def ConvertStrings(self, html_strs): + out = StringIO.StringIO() + self._ConvertStringsToFile(html_strs, out) + return out.getvalue() + + def ConvertFile(self, html_file, out_file): + self._ConvertStringToFile(open(html_file,'rb').read(), + open(out_file, 'wb')) + + def ConvertFiles(self, html_files, out_file): + html_strs = [open(f,'rb').read() for f in html_files] + self._ConvertStringsToFile(html_strs, open(out_file, 'wb')) + + def MakeOneHTML(self, html_strs): + """This takes a list of HTML strings and returns a big HTML file with + all contents consolidated. It constructs a table of contents and adds + anchors within the text + """ + title_html = [] + toc_html = [] + body_html = [] + + PAGE_BREAK = '<mbp:pagebreak>' + + # pull out the title page, assumed first html_strs. + htmltitle = html_strs[0] + entrytitle = _SubEntry(1, htmltitle) + title_html.append(entrytitle.Body()) + + title_html.append(PAGE_BREAK) + toc_html.append('<a name="TOCTOP"><h3>Table of Contents</h3><br />') + + for pos, html in enumerate(html_strs[1:]): + entry = _SubEntry(pos+1, html) + toc_html.append('%s<br />' % entry.TocLink()) + + # give some space between bodies of work. + body_html.append(PAGE_BREAK) + + body_html.append(entry.Anchor()) + + body_html.append(entry.Body()) + + # TODO: this title can get way too long with RSS feeds. Not sure how to fix + # cheat slightly and use the <a href> code to set filepos in references. + header = '''<html> +<head> +<title>Bibliorize %s GMT + + + + + +''' % time.ctime(time.time()) + + footer = '' + all_html = header + '\n'.join(title_html + toc_html + body_html) + footer + #print "%s" % all_html.encode('utf8') + return all_html + + def _ConvertStringsToFile(self, html_strs, out_file): + try: + tmp = self.MakeOneHTML(html_strs) + self._ConvertStringToFile(tmp, out_file) + except Exception, e: + logging.error('Error %s', e) + logging.debug('Details: %s' % html_strs) + + def _ConvertStringToFile(self, html_data, out): + html = HtmlProcessor(html_data) + data = html.CleanHtml() + + # collect offsets of '' tags, use to make index list. + # indexlist = [] # list of (offset,length) tuples. + # not in current use. + + # j=0 + # lastj=0 + # while True: + # j=data.find('',lastj+10) # plus a bit so we find the next. + # if j < 0: + # break + # indexlist.append((lastj,j-lastj)) + # print "index offset: %d length: %d" % (lastj,j-lastj) + # lastj=j + + records = [] +# title = html.title +# if title: +# self._header.SetTitle(title) + record_id = 1 + for start_pos in range(0, len(data), Record.MAX_SIZE): + end = min(len(data), start_pos + Record.MAX_SIZE) + record_data = data[start_pos:end] + records.append(self._header.AddRecord(record_data, record_id)) + #print "HTML Record %03d: (size:%d) [[%s ... %s]]" % ( record_id, len(record_data), record_data[:20], record_data[-20:] ) + record_id += 1 + self._header.SetImageRecordIndex(record_id) + records[0:0] = [self._header.MobiHeader()] + + header, rec_offset = self._header.PDBHeader(len(records)) + out.write(header) + for record in records: + record.WriteHeader(out, rec_offset) + #print "rec_offset: %d len(record.data): %d" % (rec_offset,len(record.data)) + rec_offset += (len(record.data)+1) # plus one for trailing null + + # Write to nuls for some reason + out.write('\0\0') + for record in records: + record.WriteData(out) + out.write('\0') + # needs a trailing null, I believe it indicates zero length 'overlap'. + # otherwise, the readers eat the last char of each html record. + # Calibre writes another 6-7 bytes of stuff after that, but we seem + # to be getting along without it. + +class Record: + MAX_SIZE = 4096 + INDEX_LEN = 8 + _unique_id_seed = 28 # should be arbitrary, but taken from MobiHeader + + # TODO(chatham): Record compression doesn't look that hard. + + def __init__(self, data, record_id): + assert len(data) <= self.MAX_SIZE + self.data = data + if record_id != 0: + self._id = record_id + else: + Record._unique_id_seed += 1 + self._id = 0 + + def __repr__(self): + return 'Record: id=%d len=%d' % (self._id, len(self.data)) + + def _SetUniqueId(self): + Record._unique_id_seed += 1 + # TODO(chatham): Wraparound crap + self._id = Record._unique_id_seed + + def WriteData(self, out): + out.write(self.data) + + def WriteHeader(self, out, rec_offset): + attributes = 64 # dirty? + header = struct.pack('>IbbH', + rec_offset, + attributes, + 0, self._id) + assert len(header) == Record.INDEX_LEN + out.write(header) + +EXTH_HEADER_FIELDS = { + 'author' : 100, + 'publisher' : 101, +} + +class Header: + EPOCH_1904 = 2082844800 + + def __init__(self): + self._length = 0 + self._record_count = 0 + self._title = '2008_2_34' + self._author = 'Unknown author' + self._publisher = 'Unknown publisher' + self._first_image_index = 0 + + def SetAuthor(self, author): + self._author = author.encode('ascii','ignore') + + def SetTitle(self, title): + # TODO(chatham): Reevaluate whether this needs to be ASCII. + # maybe just do sys.setdefaultencoding('utf-8')? Problems + # appending self._title with other things. + self._title = title.encode('ascii','ignore') + + def SetPublisher(self, publisher): + self._publisher = publisher.encode('ascii','ignore') + + def AddRecord(self, data, record_id): + self.max_record_size = max(Record.MAX_SIZE, len(data)) + self._record_count += 1 + self._length += len(data) + return Record(data, record_id) + + def _ReplaceWord(self, data, pos, word): + return data[:pos] + struct.pack('>I', word) + data[pos+4:] + + def PalmDocHeader(self): + compression = 1 # no compression + unused = 0 + encryption_type = 0 # no ecryption + records = self._record_count + 1 # the header record itself + palmdoc_header = struct.pack('>HHIHHHH', + compression, + unused, + self._length, + records, + Record.MAX_SIZE, + encryption_type, + unused) + assert len(palmdoc_header) == 16 + return palmdoc_header + + def PDBHeader(self, num_records): + HEADER_LEN = 32+2+2+9*4 + RECORD_INDEX_HEADER_LEN = 6 + RESOURCE_INDEX_LEN = 10 + + index_len = RECORD_INDEX_HEADER_LEN + num_records * Record.INDEX_LEN + rec_offset = HEADER_LEN + index_len + 2 + + short_title = self._title[0:31] + attributes = 0 + version = 0 + ctime = self.EPOCH_1904 + int(time.time()) + mtime = self.EPOCH_1904 + int(time.time()) + backup_time = self.EPOCH_1904 + int(time.time()) + modnum = 0 + appinfo_offset = 0 + sort_offset = 0 + type = 'BOOK' + creator = 'MOBI' + id_seed = 36 + header = struct.pack('>32sHHII', + short_title, attributes, version, + ctime, mtime) + header += struct.pack('>IIII', backup_time, modnum, + appinfo_offset, sort_offset) + header += struct.pack('>4s4sI', + type, creator, id_seed) + next_record = 0 # not used? + header += struct.pack('>IH', next_record, num_records) + return header, rec_offset + + def _GetExthHeader(self): + # They set author, publisher, coveroffset, thumboffset + data = {'author' : self._author, + 'publisher' : self._publisher, + } + # Turn string type names into EXTH typeids. + r = [] + for key, value in data.items(): + typeid = EXTH_HEADER_FIELDS[key] + length_encoding_len = 8 + r.append(struct.pack('>LL', typeid, len(value) + length_encoding_len,) + value) + content = ''.join(r) + + # Pad to word boundary + while len(content) % 4: + content += '\0' + TODO_mysterious = 12 + exth = 'EXTH' + struct.pack('>LL', len(content) + TODO_mysterious, len(data)) + content + return exth + + def SetImageRecordIndex(self, idx): + self._first_image_index = idx + + def MobiHeader(self): + exth_header = self._GetExthHeader(); + palmdoc_header = self.PalmDocHeader() + + fs = 0xffffffff + + # Record 0 + header_len = 0xE4 # TODO + mobi_type = 2 # BOOK + text_encoding = encoding['UTF-8'] + unique_id = random.randint(1, 1<<32) + creator_version = 4 + reserved = '%c' % 0xff * 40 + nonbook_index = fs + full_name_offset = header_len + len(palmdoc_header) + len(exth_header) # put full name after header + language = languages['en-us'] + unused = 0 + mobi_header = struct.pack('>4sIIIII40sIIIIII', + 'MOBI', + header_len, + mobi_type, + text_encoding, + unique_id, + creator_version, + reserved, + nonbook_index, + full_name_offset, + len(self._title), + language, + fs, fs) + assert len(mobi_header) == 104 - 16 + + unknown_fields = chr(0) * 32 + drm_offset = 0 + drm_count = 0 + drm_size = 0 + drm_flags = 0 + exth_flags = 0x50 + header_end = chr(0) * 64 + mobi_header += struct.pack('>IIIIIII', + creator_version, + self._first_image_index, + fs, + unused, + fs, + unused, + exth_flags) + mobi_header += '\0' * 112 # TODO: Why this much padding? + # Set some magic offsets to be 0xFFFFFFF. + for pos in (0x94, 0x98, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xdc): + mobi_header = self._ReplaceWord(mobi_header, pos, fs) + + # 16 bytes? + padding = '\0' * 48 * 4 # why? + total_header = palmdoc_header + mobi_header + exth_header + self._title + padding + + return self.AddRecord(total_header, 0) + +if __name__ == '__main__': + import sys + m = Converter(title='Testing Mobi', author='Mobi Author', publisher='mobi converter') + m.ConvertFiles(sys.argv[1:], 'test.mobi') + #m.ConvertFile(sys.argv[1], 'test.mobi') diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py new file mode 100644 index 00000000..ba3def4a --- /dev/null +++ b/fanficdownloader/story.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os, re + +from htmlcleanup import conditionalRemoveEntities, removeAllEntities + +# The list comes from ffnet, the only multi-language site we support +# at the time of writing. Values are taken largely from pycountry, +# but with some corrections and guesses. +langs = { + "English":"en", + "Spanish":"es", + "French":"fr", + "German":"de", + "Chinese":"zh", + "Japanese":"ja", + "Dutch":"nl", + "Portuguese":"pt", + "Russian":"ru", + "Italian":"it", + "Bulgarian":"bg", + "Polish":"pl", + "Hungarian":"hu", + "Hebrew":"he", + "Arabic":"ar", + "Swedish":"sv", + "Norwegian":"no", + "Danish":"da", + "Finnish":"fi", + "Filipino":"fil", + "Esperanto":"eo", + "Hindi":"hi", + "Punjabi":"pa", + "Farsi":"fa", + "Greek":"el", + "Romanian":"ro", + "Albanian":"sq", + "Serbian":"sr", + "Turkish":"tr", + "Czech":"cs", + "Indonesian":"id", + "Croatian":"hr", + "Catalan":"ca", + "Latin":"la", + "Korean":"ko", + "Vietnamese":"vi", + "Thai":"th", + "Devanagari":"hi", + } + +class Story: + + def __init__(self): + try: + self.metadata = {'version':os.environ['CURRENT_VERSION_ID']} + except: + self.metadata = {'version':'4.3'} + self.replacements = [] + self.chapters = [] # chapters will be tuples of (title,html) + self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists. + + def setMetadata(self, key, value): + ## still keeps < < and & + self.metadata[key]=conditionalRemoveEntities(value) + if key == "language": + try: + self.metadata['langcode'] = langs[self.metadata[key]] + except: + self.metadata['langcode'] = 'en' + + def getMetadataRaw(self,key): + if self.metadata.has_key(key): + return self.metadata[key] + + def doReplacments(self,value): + for (p,v) in self.replacements: + if (isinstance(value,str) or isinstance(value,unicode)) and re.match(p,value): + value = re.sub(p,v,value) + return value; + + def getMetadata(self, key, removeallentities=False): + value = None + if self.getLists().has_key(key): + value = ', '.join(self.getList(key)) + if self.metadata.has_key(key): + value = self.metadata[key] + if value: + if key == "numWords": + value = commaGroups(value) + if key == "dateCreated": + value = value.strftime("%Y-%m-%d %H:%M:%S") + if key == "datePublished" or key == "dateUpdated": + value = value.strftime("%Y-%m-%d") + + value=self.doReplacments(value) + if removeallentities and value != None: + return removeAllEntities(value) + else: + return value + + def getAllMetadata(self, removeallentities=False): + ''' + All single value *and* list value metadata as strings. + ''' + allmetadata = {} + for k in self.metadata.keys(): + allmetadata[k] = self.getMetadata(k, removeallentities) + for l in self.listables.keys(): + allmetadata[l] = self.getMetadata(l, removeallentities) + + return allmetadata + + def addToList(self,listname,value): + if value==None: + return + value = conditionalRemoveEntities(value) + if not self.listables.has_key(listname): + self.listables[listname]=[] + # prevent duplicates. + if not value in self.listables[listname]: + self.listables[listname].append(value) + + def getList(self,listname): + if not self.listables.has_key(listname): + return [] + return filter( lambda x : x!=None and x!='' , + map(self.doReplacments,self.listables[listname]) ) + + def getLists(self): + lsts = {} + for ln in self.listables.keys(): + lsts[ln] = self.getList(ln) + return lsts + + def addChapter(self, title, html): + self.chapters.append( (title,html) ) + + def getChapters(self): + "Chapters will be tuples of (title,html)" + return self.chapters + + def __str__(self): + return "Metadata: " +str(self.metadata) + "\nListables: " +str(self.listables) #+ "\nChapters: "+str(self.chapters) + + def setReplace(self,replace): + for line in replace.splitlines(): + if "=>" in line: + self.replacements.append(map( lambda x: x.strip(), line.split("=>") )) + +def commaGroups(s): + groups = [] + while s and s[-1].isdigit(): + groups.append(s[-3:]) + s = s[:-3] + return s + ','.join(reversed(groups)) + diff --git a/fanficdownloader/translit.py b/fanficdownloader/translit.py new file mode 100644 index 00000000..bf205a6d --- /dev/null +++ b/fanficdownloader/translit.py @@ -0,0 +1,57 @@ +#-*-coding:utf-8-*- +# Code taken from http://python.su/forum/viewtopic.php?pid=66946 +import unicodedata +def is_syllable(letter): + syllables = ("A", "E", "I", "O", "U", "a", "e", "i", "o", "u") + if letter in syllables: + return True + return False +def is_consonant(letter): + return not is_syllable(letter) +def romanize(letter): + try: + str(letter) + except UnicodeEncodeError: + pass + else: + return str(letter) + unid = unicodedata.name(letter) + exceptions = {"NUMERO SIGN": "No", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK": "\"", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK": "\"", "DASH": "-"} + for name_contains in exceptions: + if unid.find(name_contains)!=-1: + return exceptions[name_contains] + assert(unid.startswith("CYRILLIC"))# Not ready to romanize anything but cyrillics + transformation_pairs = {"CYRILLIC CAPITAL LETTER ": str.capitalize, "CYRILLIC SMALL LETTER ": str.lower} + func = str.lower + for name_contains in transformation_pairs: + if unid.find(name_contains)!=-1: + func = transformation_pairs[name_contains] + unid = unid.replace(name_contains, "") + cyrillic_exceptions = {"YERU": "y", "SHORT I": "y", "HARD SIGN": "\'", "SOFT SIGN": "\'", "BYELORUSSIAN-UKRAINIAN I": "i", "GHE WITH UPTURN": "g", "UKRAINIAN IE": "ie", "YU": "yu", "YA": "ya"} + for name_contains in cyrillic_exceptions: + if unid.find(name_contains)!=-1: + return cyrillic_exceptions[name_contains] + if all(map(is_syllable, unid)): + return func(unid) + else: + return func(filter(is_consonant, unid)) +def translit(text): + output = "" + for letter in text: + output += romanize(letter) + return output +#def main(): + #text = u"русск.: Любя, съешь щипцы, — вздохнёт мэр, — кайф жгуч." + #print translit(text) + #text = u"укр.: Гей, хлопці, не вспію - на ґанку ваша файна їжа знищується бурундучком." + #print translit(text) + #text = u"болг.: Ах, чудна българска земьо, полюшквай цъфтящи жита." + #print translit(text) + #text = u"серб.: Неуредне ноћне даме досађивале су Џеку К." + #print translit(text) + #russk.: Lyubya, s'iesh' shchiptsy, - vzdohniot mer, - kayf zhghuch. + #ukr.: Ghiey, hloptsi, nie vspiyu - na ganku vasha fayna yzha znishchuiet'sya burunduchkom. + #bolgh.: Ah, chudna b'lgharska ziem'o, polyushkvay ts'ftyashchi zhita. + #sierb.: Nieuriednie notshnie damie dosadjivalie su Dzhieku K. +if __name__=="__main__": + main() \ No newline at end of file diff --git a/fanficdownloader/writers/__init__.py b/fanficdownloader/writers/__init__.py new file mode 100644 index 00000000..7d9faf64 --- /dev/null +++ b/fanficdownloader/writers/__init__.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## This could (should?) use a dynamic loader like adapters, but for +## now, it's static, since there's so few of them. + +from ..exceptions import FailedToDownload + +from writer_html import HTMLWriter +from writer_txt import TextWriter +from writer_epub import EpubWriter +from writer_mobi import MobiWriter + +def getWriter(type,config,story): + if type == "html": + return HTMLWriter(config,story) + if type == "txt": + return TextWriter(config,story) + if type == "epub": + return EpubWriter(config,story) + if type == "mobi": + return MobiWriter(config,story) + + raise FailedToDownload("(%s) is not a supported download format."%type) diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py new file mode 100644 index 00000000..84a6f5c5 --- /dev/null +++ b/fanficdownloader/writers/base_writer.py @@ -0,0 +1,289 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +import os.path +import datetime +import string +import StringIO +import zipfile +from zipfile import ZipFile, ZIP_DEFLATED +import logging + +from ..configurable import Configurable +from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML + +class BaseStoryWriter(Configurable): + + @staticmethod + def getFormatName(): + return 'base' + + @staticmethod + def getFormatExt(): + return '.bse' + + def __init__(self, config, adapter): + Configurable.__init__(self, config) + self.addConfigSection(adapter.getSiteDomain()) + self.addConfigSection(self.getFormatName()) + self.addConfigSection(adapter.getSiteDomain()+":"+self.getFormatName()) + self.addConfigSection("overrides") + + self.adapter = adapter + self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially. + + self.story.setReplace(self.getConfig('replace_metadata')) + + self.validEntries = [ + 'category', + 'genre', + 'language', + 'characters', + 'series', + 'status', + 'datePublished', + 'dateUpdated', + 'dateCreated', + 'rating', + 'warnings', + 'numChapters', + 'numWords', + 'site', + 'storyId', + 'authorId', + 'extratags', + 'title', + 'storyUrl', + 'description', + 'author', + 'authorUrl', + 'formatname', + 'formatext', + 'siteabbrev', + 'version'] + + # fall back labels. + self.titleLabels = { + 'category':'Category', + 'genre':'Genre', + 'language':'Language', + 'status':'Status', + 'series':'Series', + 'characters':'Characters', + 'datePublished':'Published', + 'dateUpdated':'Updated', + 'dateCreated':'Packaged', + 'rating':'Rating', + 'warnings':'Warnings', + 'numChapters':'Chapters', + 'numWords':'Words', + 'site':'Site', + 'storyId':'Story ID', + 'authorId':'Author ID', + 'extratags':'Extra Tags', + 'title':'Title', + 'storyUrl':'Story URL', + 'description':'Summary', + 'author':'Author', + 'authorUrl':'Author URL', + 'formatname':'File Format', + 'formatext':'File Extension', + 'siteabbrev':'Site Abbrev', + 'version':'FFDL Version' + } + self.story.setMetadata('formatname',self.getFormatName()) + self.story.setMetadata('formatext',self.getFormatExt()) + + for tag in self.getConfigList("extratags"): + self.story.addToList("extratags",tag) + + def getMetadata(self,key): + return stripHTML(self.story.getMetadata(key)) + + def getOutputFileName(self): + if self.getConfig('zip_output'): + return self.getZipFileName() + else: + return self.getBaseFileName() + + def getBaseFileName(self): + return self.formatFileName(self.getConfig('output_filename')) + + def getZipFileName(self): + return self.formatFileName(self.getConfig('zip_filename')) + + def formatFileName(self,template): + values = origvalues = self.story.getAllMetadata() + # fall back default: + if not template: + template="${title}-${siteabbrev}_${storyId}${formatext}" + + if not self.getConfig('allow_unsafe_filename'): + values={} + pattern = re.compile(r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+") + for k in origvalues.keys(): + values[k]=re.sub(pattern,'_', removeAllEntities(self.story.getMetadata(k))) + + return string.Template(template).substitute(values).encode('utf8') + + def _write(self, out, text): + out.write(text.encode('utf8')) + + def writeTitlePage(self, out, START, ENTRY, END, WIDE_ENTRY=None): + """ + Write the title page, but only include entries that there's + metadata for. START, ENTRY and END are expected to already by + string.Template(). START and END are expected to use the same + names as Story.metadata, but ENTRY should use label and value. + """ + if self.getConfig("include_titlepage"): + self._write(out,START.substitute(self.story.metadata)) + + if WIDE_ENTRY==None: + WIDE_ENTRY=ENTRY + + titleEntriesList = self.getConfigList("titlepage_entries") + wideTitleEntriesList = self.getConfigList("wide_titlepage_entries") + + for entry in titleEntriesList: + if entry in self.validEntries: + if self.story.getMetadata(entry): + if entry in wideTitleEntriesList: + TEMPLATE=WIDE_ENTRY + else: + TEMPLATE=ENTRY + if self.getConfigList(entry): + label=self.getConfig(entry+"_label") + else: + label=self.titleLabels[entry] + self._write(out,TEMPLATE.substitute({'label':label, + 'value':self.story.getMetadata(entry)})) + + self._write(out,END.substitute(self.story.metadata)) + + def writeTOCPage(self, out, START, ENTRY, END): + """ + Write the Table of Contents page. START, ENTRY and END are expected to already by + string.Template(). START and END are expected to use the same + names as Story.metadata, but ENTRY should use index and chapter. + """ + # Only do TOC if there's more than one chapter and it's configured. + if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : + self._write(out,START.substitute(self.story.metadata)) + + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)})) + + self._write(out,END.substitute(self.story.metadata)) + + # if no outstream is given, write to file. + def writeStory(self,outstream=None, metaonly=False, outfilename=None, forceOverwrite=False): + + self.metaonly = metaonly + if outfilename == None: + outfilename=self.getOutputFileName() + + # minor cheat, tucking css into metadata. + if self.getConfig("output_css"): + self.story.metadata["output_css"] = self.getConfig("output_css") + else: + self.story.metadata["output_css"] = '' + + if not outstream: + close=True + logging.debug("Save directly to file: %s" % outfilename) + if self.getConfig('make_directories'): + path="" + dirs = os.path.dirname(outfilename).split('/') + for dir in dirs: + path+=dir+"/" + if not os.path.exists(path): + os.mkdir(path) ## os.makedirs() doesn't work in 2.5.2? + + ## Check for output file date vs updated date here + if not (self.getConfig('always_overwrite') or forceOverwrite): + if os.path.exists(outfilename): + ## date() truncs off time, which files have, but sites don't report. + lastupdated=self.story.getMetadataRaw('dateUpdated').date() + fileupdated=datetime.datetime.fromtimestamp(os.stat(outfilename)[8]).date() + if fileupdated > lastupdated: + print "File(%s) Updated(%s) more recently than Story(%s) - Skipping" % (outfilename,fileupdated,lastupdated) + return + if not metaonly: + self.story = self.adapter.getStory() # get full story + # now, just + # before writing. + # Fetch before + # opening file. + outstream = open(outfilename,"wb") + else: + close=False + logging.debug("Save to stream") + + if not metaonly: + self.story = self.adapter.getStory() # get full story now, + # just before + # writing. Okay if + # double called with + # above, it will only + # fetch once. + if self.getConfig('zip_output'): + out = StringIO.StringIO() + self.writeStoryImpl(out) + zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED) + zipout.writestr(self.getBaseFileName(),out.getvalue()) + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in zipout.filelist: + zf.create_system = 0 + zipout.close() + out.close() + else: + self.writeStoryImpl(outstream) + + if close: + outstream.close() + + def getTags(self): + # set to avoid duplicates subject tags. + subjectset = set() + + if self.story.getMetadataRaw('dateUpdated'): + # Last Update tags for Bill. + self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update Year/Month: %Y/%m")) + self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update: %Y/%m/%d")) + + for entry in self.validEntries: + if entry in self.getConfigList("include_subject_tags") and \ + entry not in self.story.getLists() and \ + self.story.getMetadata(entry): + subjectset.add(self.getMetadata(entry)) + + # listables all go into dc:subject tags, but only if they are configured. + for (name,lst) in self.story.getLists().iteritems(): + if name in self.getConfigList("include_subject_tags"): + for tag in lst: + subjectset.add(tag) + + return list(subjectset) + + def writeStoryImpl(self, out): + "Must be overriden by sub classes." + pass + diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py new file mode 100644 index 00000000..e423556d --- /dev/null +++ b/fanficdownloader/writers/writer_epub.py @@ -0,0 +1,405 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string +import StringIO +import zipfile +from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED + +## XML isn't as forgiving as HTML, so rather than generate as strings, +## use DOM to generate the XML files. +from xml.dom.minidom import parse, parseString, getDOMImplementation + +from base_writer import * +from ..htmlcleanup import stripHTML + +class EpubWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'epub' + + @staticmethod + def getFormatExt(): + return '.epub' + + def __init__(self, config, story): + BaseStoryWriter.__init__(self, config, story) + + self.EPUB_CSS = string.Template('''${output_css}''') + + self.EPUB_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +

      ${title} by ${author}

      +
      +''') + + self.EPUB_TITLE_ENTRY = string.Template(''' +${label}: ${value}
      +''') + + self.EPUB_TITLE_PAGE_END = string.Template(''' +
      + + + +''') + + self.EPUB_TABLE_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +

      ${title} by ${author}

      + +''') + + self.EPUB_TABLE_TITLE_ENTRY = string.Template(''' + +''') + + self.EPUB_TABLE_TITLE_WIDE_ENTRY = string.Template(''' + +''') + + self.EPUB_TABLE_TITLE_PAGE_END = string.Template(''' +
      ${label}:${value}
      ${label}: ${value}
      + + + +''') + + self.EPUB_TOC_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +
      +

      Table of Contents

      +''') + + self.EPUB_TOC_ENTRY = string.Template(''' +${chapter}
      +''') + + self.EPUB_TOC_PAGE_END = string.Template(''' +
      + + +''') + + self.EPUB_CHAPTER_START = string.Template(''' + + + +${chapter} + + + +

      ${chapter}

      +''') + + self.EPUB_CHAPTER_END = string.Template(''' + + +''') + + def writeStoryImpl(self, out): + + ## Python 2.5 ZipFile is rather more primative than later + ## versions. It can operate on a file, or on a StringIO, but + ## not on an open stream. OTOH, I suspect we would have had + ## problems with closing and opening again to change the + ## compression type anyway. + zipio = StringIO.StringIO() + + ## mimetype must be first file and uncompressed. Python 2.5 + ## ZipFile can't change compression type file-by-file, so we + ## have to close and re-open + outputepub = ZipFile(zipio, 'w', compression=ZIP_STORED) + outputepub.debug=3 + outputepub.writestr('mimetype','application/epub+zip') + outputepub.close() + + ## Re-open file for content. + outputepub = ZipFile(zipio, 'a', compression=ZIP_DEFLATED) + outputepub.debug=3 + + ## Create META-INF/container.xml file. The only thing it does is + ## point to content.opf + containerdom = getDOMImplementation().createDocument(None, "container", None) + containertop = containerdom.documentElement + containertop.setAttribute("version","1.0") + containertop.setAttribute("xmlns","urn:oasis:names:tc:opendocument:xmlns:container") + rootfiles = containerdom.createElement("rootfiles") + containertop.appendChild(rootfiles) + rootfiles.appendChild(newTag(containerdom,"rootfile",{"full-path":"content.opf", + "media-type":"application/oebps-package+xml"})) + outputepub.writestr("META-INF/container.xml",containerdom.toxml(encoding='utf-8')) + containerdom.unlink() + del containerdom + + ## Epub has two metadata files with real data. We're putting + ## them in content.opf (pointed to by META-INF/container.xml) + ## and toc.ncx (pointed to by content.opf) + + ## content.opf contains metadata, a 'manifest' list of all + ## other included files, and another 'spine' list of the items in the + ## file + + uniqueid= 'fanficdownloader-uid:%s-u%s-s%s' % ( + self.getMetadata('site'), + self.getMetadata('authorId'), + self.getMetadata('storyId')) + + contentdom = getDOMImplementation().createDocument(None, "package", None) + package = contentdom.documentElement + package.setAttribute("version","2.0") + package.setAttribute("xmlns","http://www.idpf.org/2007/opf") + package.setAttribute("unique-identifier","fanficdownloader-uid") + metadata=newTag(contentdom,"metadata", + attrs={"xmlns:dc":"http://purl.org/dc/elements/1.1/", + "xmlns:opf":"http://www.idpf.org/2007/opf"}) + package.appendChild(metadata) + + metadata.appendChild(newTag(contentdom,"dc:identifier", + text=uniqueid, + attrs={"id":"fanficdownloader-uid"})) + + if self.getMetadata('title'): + metadata.appendChild(newTag(contentdom,"dc:title",text=self.getMetadata('title'))) + + if self.getMetadata('author'): + metadata.appendChild(newTag(contentdom,"dc:creator", + attrs={"opf:role":"aut"}, + text=self.getMetadata('author'))) + + metadata.appendChild(newTag(contentdom,"dc:contributor",text="fanficdownloader [http://fanficdownloader.googlecode.com]",attrs={"opf:role":"bkp"})) + metadata.appendChild(newTag(contentdom,"dc:rights",text="")) + if self.story.getMetadata('langcode') != None: + metadata.appendChild(newTag(contentdom,"dc:language",text=self.story.getMetadata('langcode'))) + else: + metadata.appendChild(newTag(contentdom,"dc:language",text='en')) + + # published, created, updated, calibre + # Leave calling self.story.getMetadataRaw directly in case date format changes. + if self.story.getMetadataRaw('datePublished'): + metadata.appendChild(newTag(contentdom,"dc:date", + attrs={"opf:event":"publication"}, + text=self.story.getMetadataRaw('datePublished').strftime("%Y-%m-%d"))) + + if self.story.getMetadataRaw('dateCreated'): + metadata.appendChild(newTag(contentdom,"dc:date", + attrs={"opf:event":"creation"}, + text=self.story.getMetadataRaw('dateCreated').strftime("%Y-%m-%d"))) + + if self.story.getMetadataRaw('dateUpdated'): + metadata.appendChild(newTag(contentdom,"dc:date", + attrs={"opf:event":"modification"}, + text=self.story.getMetadataRaw('dateUpdated').strftime("%Y-%m-%d"))) + metadata.appendChild(newTag(contentdom,"meta", + attrs={"name":"calibre:timestamp", + "content":self.story.getMetadataRaw('dateUpdated').strftime("%Y-%m-%dT%H:%M:%S")})) + + if self.getMetadata('description'): + metadata.appendChild(newTag(contentdom,"dc:description",text= + self.getMetadata('description'))) + + for subject in self.getTags(): + metadata.appendChild(newTag(contentdom,"dc:subject",text=subject)) + + + if self.getMetadata('site'): + metadata.appendChild(newTag(contentdom,"dc:publisher", + text=self.getMetadata('site'))) + + if self.getMetadata('storyUrl'): + metadata.appendChild(newTag(contentdom,"dc:identifier", + attrs={"opf:scheme":"URL"}, + text=self.getMetadata('storyUrl'))) + metadata.appendChild(newTag(contentdom,"dc:source", + text=self.getMetadata('storyUrl'))) + + ## end of metadata, create manifest. + items = [] # list of (id, href, type, title) tuples(all strings) + itemrefs = [] # list of strings -- idrefs from .opfs' spines + items.append(("ncx","toc.ncx","application/x-dtbncx+xml",None)) ## we'll generate the toc.ncx file, + ## but it needs to be in the items manifest. + items.append(("style","OEBPS/stylesheet.css","text/css",None)) + if self.getConfig("include_titlepage"): + items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page")) + itemrefs.append("title_page") + if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly : + items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents")) + itemrefs.append("toc_page") + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + i=index+1 + items.append(("file%04d"%i, + "OEBPS/file%04d.xhtml"%i, + "application/xhtml+xml", + title)) + itemrefs.append("file%04d"%i) + + manifest = contentdom.createElement("manifest") + package.appendChild(manifest) + for item in items: + (id,href,type,title)=item + manifest.appendChild(newTag(contentdom,"item", + attrs={'id':id, + 'href':href, + 'media-type':type})) + + spine = newTag(contentdom,"spine",attrs={"toc":"ncx"}) + package.appendChild(spine) + for itemref in itemrefs: + spine.appendChild(newTag(contentdom,"itemref", + attrs={"idref":itemref, + "linear":"yes"})) + # write content.opf to zip. + outputepub.writestr("content.opf",contentdom.toxml(encoding='utf-8')) + contentdom.unlink() + del contentdom + + ## create toc.ncx file + tocncxdom = getDOMImplementation().createDocument(None, "ncx", None) + ncx = tocncxdom.documentElement + ncx.setAttribute("version","2005-1") + ncx.setAttribute("xmlns","http://www.daisy.org/z3986/2005/ncx/") + head = tocncxdom.createElement("head") + ncx.appendChild(head) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:uid", "content":uniqueid})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:depth", "content":"1"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:totalPageCount", "content":"0"})) + head.appendChild(newTag(tocncxdom,"meta", + attrs={"name":"dtb:maxPageNumber", "content":"0"})) + + docTitle = tocncxdom.createElement("docTitle") + docTitle.appendChild(newTag(tocncxdom,"text",text=self.getMetadata('title'))) + ncx.appendChild(docTitle) + + tocnavMap = tocncxdom.createElement("navMap") + ncx.appendChild(tocnavMap) + + # + # + # + # + # + # + index=0 + for item in items: + (id,href,type,title)=item + # only items to be skipped, toc.ncx, stylesheet.css, should have no title. + if title : + navPoint = newTag(tocncxdom,"navPoint", + attrs={'id':id, + 'playOrder':str(index)}) + tocnavMap.appendChild(navPoint) + navLabel = newTag(tocncxdom,"navLabel") + navPoint.appendChild(navLabel) + ## the xml library will re-escape as needed. + navLabel.appendChild(newTag(tocncxdom,"text",text=stripHTML(title))) + navPoint.appendChild(newTag(tocncxdom,"content",attrs={"src":href})) + index=index+1 + + # write toc.ncs to zip file + outputepub.writestr("toc.ncx",tocncxdom.toxml(encoding='utf-8')) + tocncxdom.unlink() + del tocncxdom + + # write stylesheet.css file. + outputepub.writestr("OEBPS/stylesheet.css",self.EPUB_CSS.substitute(self.story.metadata)) + + # write title page. + if self.getConfig("titlepage_use_table"): + TITLE_PAGE_START = self.EPUB_TABLE_TITLE_PAGE_START + TITLE_ENTRY = self.EPUB_TABLE_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.EPUB_TABLE_TITLE_WIDE_ENTRY + TITLE_PAGE_END = self.EPUB_TABLE_TITLE_PAGE_END + else: + TITLE_PAGE_START = self.EPUB_TITLE_PAGE_START + TITLE_ENTRY = self.EPUB_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.EPUB_TITLE_ENTRY # same, only wide in tables. + TITLE_PAGE_END = self.EPUB_TITLE_PAGE_END + + titlepageIO = StringIO.StringIO() + self.writeTitlePage(out=titlepageIO, + START=TITLE_PAGE_START, + ENTRY=TITLE_ENTRY, + WIDE_ENTRY=WIDE_TITLE_ENTRY, + END=TITLE_PAGE_END) + if titlepageIO.getvalue(): # will be false if no title page. + outputepub.writestr("OEBPS/title_page.xhtml",titlepageIO.getvalue()) + titlepageIO.close() + + # write toc page. + tocpageIO = StringIO.StringIO() + self.writeTOCPage(tocpageIO, + self.EPUB_TOC_PAGE_START, + self.EPUB_TOC_ENTRY, + self.EPUB_TOC_PAGE_END) + if tocpageIO.getvalue(): # will be false if no toc page. + outputepub.writestr("OEBPS/toc_page.xhtml",tocpageIO.getvalue()) + tocpageIO.close() + + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + logging.debug('Writing chapter text for: %s' % title) + fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) + # ffnet(& maybe others) gives the whole chapter text + # as one line. This causes problems for nook(at + # least) when the chapter size starts getting big + # (200k+) + fullhtml = fullhtml.replace('

      ','

      \n').replace('
      ','
      \n') + outputepub.writestr("OEBPS/file%04d.xhtml"%(index+1),fullhtml.encode('utf-8')) + del fullhtml + + # declares all the files created by Windows. otherwise, when + # it runs in appengine, windows unzips the files as 000 perms. + for zf in outputepub.filelist: + zf.create_system = 0 + outputepub.close() + out.write(zipio.getvalue()) + zipio.close() + +## Utility method for creating new tags. +def newTag(dom,name,attrs=None,text=None): + tag = dom.createElement(name) + if( attrs is not None ): + for attr in attrs.keys(): + tag.setAttribute(attr,attrs[attr]) + if( text is not None ): + tag.appendChild(dom.createTextNode(text)) + return tag + diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py new file mode 100644 index 00000000..5b0b1ea0 --- /dev/null +++ b/fanficdownloader/writers/writer_html.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string + +from base_writer import * + +class HTMLWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'html' + + @staticmethod + def getFormatExt(): + return '.html' + + def __init__(self, config, story): + BaseStoryWriter.__init__(self, config, story) + + self.HTML_FILE_START = string.Template(''' + + + +${title} by ${author} + + + +

      ${title} by ${author}

      +''') + + self.HTML_TITLE_PAGE_START = string.Template(''' + +''') + + self.HTML_TITLE_ENTRY = string.Template(''' + +''') + + self.HTML_TITLE_PAGE_END = string.Template(''' +
      ${label}:${value}
      +''') + + self.HTML_TOC_PAGE_START = string.Template(''' +

      Table of Contents

      +

      +''') + + self.HTML_TOC_ENTRY = string.Template(''' +${chapter}
      +''') + + self.HTML_TOC_PAGE_END = string.Template(''' +

      +''') + + self.HTML_CHAPTER_START = string.Template(''' +

      ${chapter}

      +''') + + self.HTML_FILE_END = string.Template(''' + +''') + + + def writeStoryImpl(self, out): + + self._write(out,self.HTML_FILE_START.substitute(self.story.metadata)) + + self.writeTitlePage(out, + self.HTML_TITLE_PAGE_START, + self.HTML_TITLE_ENTRY, + self.HTML_TITLE_PAGE_END) + + self.writeTOCPage(out, + self.HTML_TOC_PAGE_START, + self.HTML_TOC_ENTRY, + self.HTML_TOC_PAGE_END) + + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + logging.debug('Writing chapter text for: %s' % title) + self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)})) + self._write(out,html) + + self._write(out,self.HTML_FILE_END.substitute(self.story.metadata)) diff --git a/fanficdownloader/writers/writer_mobi.py b/fanficdownloader/writers/writer_mobi.py new file mode 100644 index 00000000..d6ced534 --- /dev/null +++ b/fanficdownloader/writers/writer_mobi.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string +import StringIO + +from base_writer import * +from ..htmlcleanup import stripHTML +from ..mobi import Converter + +class MobiWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'mobi' + + @staticmethod + def getFormatExt(): + return '.mobi' + + def __init__(self, config, story): + BaseStoryWriter.__init__(self, config, story) + + self.MOBI_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + +

      ${title} by ${author}

      +
      +''') + + self.MOBI_TITLE_ENTRY = string.Template(''' +${label}: ${value}
      +''') + + self.MOBI_TITLE_PAGE_END = string.Template(''' +
      + + + +''') + + self.MOBI_TABLE_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + +

      ${title} by ${author}

      + +''') + + self.MOBI_TABLE_TITLE_ENTRY = string.Template(''' + +''') + + self.MOBI_TABLE_TITLE_WIDE_ENTRY = string.Template(''' + +''') + + self.MOBI_TABLE_TITLE_PAGE_END = string.Template(''' +
      ${label}:${value}
      ${label}: ${value}
      + + + +''') + + self.MOBI_TOC_PAGE_START = string.Template(''' + + + +${title} by ${author} + + +
      +

      Table of Contents

      +''') + + self.MOBI_TOC_ENTRY = string.Template(''' +${chapter}
      +''') + + self.MOBI_TOC_PAGE_END = string.Template(''' +
      + + +''') + + self.MOBI_CHAPTER_START = string.Template(''' + + + +${chapter} + + +

      ${chapter}

      +''') + + self.MOBI_CHAPTER_END = string.Template(''' + + +''') + + def writeStoryImpl(self, out): + + files = [] + + # write title page. + if self.getConfig("titlepage_use_table"): + TITLE_PAGE_START = self.MOBI_TABLE_TITLE_PAGE_START + TITLE_ENTRY = self.MOBI_TABLE_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.MOBI_TABLE_TITLE_WIDE_ENTRY + TITLE_PAGE_END = self.MOBI_TABLE_TITLE_PAGE_END + else: + TITLE_PAGE_START = self.MOBI_TITLE_PAGE_START + TITLE_ENTRY = self.MOBI_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.MOBI_TITLE_ENTRY # same, only wide in tables. + TITLE_PAGE_END = self.MOBI_TITLE_PAGE_END + + titlepageIO = StringIO.StringIO() + self.writeTitlePage(out=titlepageIO, + START=TITLE_PAGE_START, + ENTRY=TITLE_ENTRY, + WIDE_ENTRY=WIDE_TITLE_ENTRY, + END=TITLE_PAGE_END) + if titlepageIO.getvalue(): # will be false if no title page. + files.append(titlepageIO.getvalue()) + titlepageIO.close() + + ## MOBI always has a TOC injected by mobi.py because there's + ## no meta-data TOC. + # # write toc page. + # tocpageIO = StringIO.StringIO() + # self.writeTOCPage(tocpageIO, + # self.MOBI_TOC_PAGE_START, + # self.MOBI_TOC_ENTRY, + # self.MOBI_TOC_PAGE_END) + # if tocpageIO.getvalue(): # will be false if no toc page. + # files.append(tocpageIO.getvalue()) + # tocpageIO.close() + + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + logging.debug('Writing chapter text for: %s' % title) + fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) + # ffnet(& maybe others) gives the whole chapter text + # as one line. This causes problems for nook(at + # least) when the chapter size starts getting big + # (200k+) + fullhtml = fullhtml.replace('

      ','

      \n').replace('
      ','
      \n') + files.append(fullhtml.encode('utf-8')) + del fullhtml + + c = Converter(title=self.getMetadata('title'), + author=self.getMetadata('author'), + publisher=self.getMetadata('site')) + mobidata = c.ConvertStrings(files) + out.write(mobidata) + + del files + del mobidata + +## Utility method for creating new tags. +def newTag(dom,name,attrs=None,text=None): + tag = dom.createElement(name) + if( attrs is not None ): + for attr in attrs.keys(): + tag.setAttribute(attr,attrs[attr]) + if( text is not None ): + tag.appendChild(dom.createTextNode(text)) + return tag + diff --git a/fanficdownloader/writers/writer_txt.py b/fanficdownloader/writers/writer_txt.py new file mode 100644 index 00000000..55cc7719 --- /dev/null +++ b/fanficdownloader/writers/writer_txt.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string +from textwrap import wrap + +from base_writer import * + +from ..html2text import html2text, BODY_WIDTH + +## In BaseStoryWriter, we define _write to encode objects +## back into for true output. But txt needs to write the +## title page and TOC to a buffer first to wordwrap. And StringIO +## gets pissy about unicode bytes in its buflist. This decodes the +## unicode containing object passed in back to a +## object so they join up properly. Could override _write to not +## encode and do out.write(whatever.encode('utf8') instead. Honestly +## not sure which is uglier. +class KludgeStringIO(): + def __init__(self, buf = ''): + self.buflist=[] + def write(self,s): + try: + s=s.decode('utf-8') + except: + pass + self.buflist.append(s) + def getvalue(self): + return u''.join(self.buflist) + def close(self): + pass + +class TextWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'txt' + + @staticmethod + def getFormatExt(): + return '.txt' + + def __init__(self, config, story): + + BaseStoryWriter.__init__(self, config, story) + + self.TEXT_FILE_START = string.Template(u''' + + +${title} + +by ${author} + + +''') + + self.TEXT_TITLE_PAGE_START = string.Template(u''' +''') + + self.TEXT_TITLE_ENTRY = string.Template(u'''${label}: ${value} +''') + + self.TEXT_TITLE_PAGE_END = string.Template(u''' + + +''') + + self.TEXT_TOC_PAGE_START = string.Template(u''' + +TABLE OF CONTENTS + +''') + + self.TEXT_TOC_ENTRY = string.Template(u''' +${chapter} +''') + + self.TEXT_TOC_PAGE_END = string.Template(u''' +''') + + self.TEXT_CHAPTER_START = string.Template(u''' + +\t${chapter} + +''') + + self.TEXT_FILE_END = string.Template(u''' + +End file. +''') + + def writeStoryImpl(self, out): + + wrapout = KludgeStringIO() + + wrapout.write(self.TEXT_FILE_START.substitute(self.story.metadata)) + + self.writeTitlePage(wrapout, + self.TEXT_TITLE_PAGE_START, + self.TEXT_TITLE_ENTRY, + self.TEXT_TITLE_PAGE_END) + towrap = wrapout.getvalue() + + self.writeTOCPage(wrapout, + self.TEXT_TOC_PAGE_START, + self.TEXT_TOC_ENTRY, + self.TEXT_TOC_PAGE_END) + + towrap = wrapout.getvalue() + wrapout.close() + towrap = removeAllEntities(towrap) + + self._write(out,self.lineends(self.wraplines(towrap))) + + for index, (title,html) in enumerate(self.story.getChapters()): + if html: + logging.debug('Writing chapter text for: %s' % title) + self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1}))))) + self._write(out,self.lineends(html2text(html))) + + self._write(out,self.lineends(self.wraplines(self.TEXT_FILE_END.substitute(self.story.metadata)))) + + def wraplines(self, text): + result='' + for para in text.split("\n"): + first=True + for line in wrap(para, BODY_WIDTH): + if first: + first=False + else: + result += u"\n" + result += line + result += u"\n" + return result + + ## The appengine will return unix line endings. + def lineends(self, txt): + txt = txt.replace('\r','') + if self.getConfig("windows_eol"): + txt = txt.replace('\n',u'\r\n') + return txt + diff --git a/ffstorage.py b/ffstorage.py new file mode 100644 index 00000000..92e29d04 --- /dev/null +++ b/ffstorage.py @@ -0,0 +1,39 @@ +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from google.appengine.ext import db + +class DownloadMeta(db.Model): + user = db.UserProperty() + url = db.StringProperty() + name = db.StringProperty() + title = db.StringProperty() + author = db.StringProperty() + format = db.StringProperty() + failure = db.TextProperty() + completed = db.BooleanProperty(default=False) + date = db.DateTimeProperty(auto_now_add=True) + version = db.StringProperty() + # data_chunks is implicit from DownloadData def. + +class DownloadData(db.Model): + download = db.ReferenceProperty(DownloadMeta, + collection_name='data_chunks') + blob = db.BlobProperty() + index = db.IntegerProperty() + +class UserConfig(db.Model): + user = db.UserProperty() + config = db.BlobProperty() diff --git a/index-ajax.html b/index-ajax.html new file mode 100644 index 00000000..62eba47c --- /dev/null +++ b/index-ajax.html @@ -0,0 +1,109 @@ + + + + + + + FanFictionDownLoader (fanfiction.net, fictionalley, ficwad to epub and HTML) + + + + + + + + + +
      +

      + FanFictionDownLoader +

      + + +
      +
      + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
      + +
      + Ebook format   +
      + +
      + +
      + + + +
      + + + +
      +
      + +

      + Login and Password +

      +
      + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
      +
      +
      +
      Login
      +
      +
      + +
      +
      Password
      +
      +
      +
      +
      + + +
      + + +
      + +
      +
      + Few things to know, which will make your life substantially easier: +
        +
      1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
      2. +
      3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
      4. +
      5. Paste a URL of the first chapter of the fanfic, not the index page
      6. +
      7. Fics with a single chapter are not supported (you can just copy and paste it)
      8. +
      9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
      10. +
      11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
      12. +
      13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
      14. +
      15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
      16. +
      17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
      18. +
      + Otherwise, just have fun, and if you want to say thank you — use the email above. +
      +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © Fanficdownloader team +
      + +
      + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..f43930d2 --- /dev/null +++ b/index.html @@ -0,0 +1,300 @@ + + + + + FanFictionDownLoader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + + + + + + +
      +

      + FanFictionDownLoader +

      + +
      + + +
      + + {{yourfile}} + + + {% if authorized %} +
      +
      +
      +

      Hi, {{ nickname }}! This is FanFictionDownLoader, which makes reading stories from various websites + much easier.

      +
      + +

      fanfiction.net fixed

      +

      + fanfiction.net changed their formatting slightly, which broken the downloader for a while. It's fixed now. +

      +

      New Russian Language Site ficbook.net

      +

      + Thanks to Ida Leter's hard work, we now support ficbook.net, a Russian language fanfiction site. +

      +

      + If you have any problems with this application, please + report them in + the FanFictionDownLoader Google Group. The + Previous Version is also available for you to use if necessary. +

      +
      + {{ error_message }} +
      +
      + +
      +
      URL:
      +
      +
      Ebook format
      +
      + EPub + HTML + Plain Text + Mobi(Kindle) +
      +
      +
      + +

      For most readers, including Sony Reader, Nook and iPad, use EPub.

      +
      +
      +
      +

      + Customize your User Configuration. +

      +

      + Or see your personal list of previously downloaded fanfics. +

      +
      +
      + {% else %} +
      +
      +

      + This is a FanFictionDownLoader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so FanFictionDownLoader can remember your fanfics and store them. +

      +

      Login using Google account

      +
      +
      + {% endif %} + +
      +

      + FanFictionDownLoader calibre Plugin +

      + + There's now a version of this downloader that runs + entirely inside the + popular calibre + ebook management package as a plugin. + +

      + + Once you have calibre installed and running, inside + calibre, you can go to 'Get plugins to enhance calibre' or + 'Get new plugins' and + install FanFictionDownLoader. + +

      +
      +
      +
      +
      fictionalley.org
      +
      + Use the URL of the story's chapter list, such as +
      http://www.fictionalley.org/authors/drt/DA.html. +
      Or a chapter URL (or one-shot text), such as +
      http://www.fictionalley.org/authors/drt/JOTP01a.html. +
      Both will work for both chaptered and one-shot stories now. +
      +
      fanfiction.net
      +
      + Use the URL of any story chapter, with or without story title such as +
      http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
      http://www.fanfiction.net/s/2345466/3/. +
      +
      fictionpress.com
      +
      + Use the URL of any story chapter, such as +
      http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
      http://www.fictionpress.com/s/2847338/6/. +
      +
      twilighted.net
      +
      + Use the URL of the start of the story, such as +
      http://twilighted.net/viewstory.php?sid=8422. +
      +
      twiwrite.net
      +
      + Use the URL of the start of the story, such as +
      http://twiwrite.net/viewstory.php?sid=427. +
      +
      ficwad.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.ficwad.com/story/74884. +
      Note that this is changed from the previous version. The system will still accept chapter URLs, however. +
      +
      harrypotterfanfiction.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
      +
      potionsandsnitches.net
      +
      + Use the URL of the story's chapter list, such as +
      http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
      +
      mediaminer.org
      +
      + Use the URL of the story's chapter list, such as +
      http://www.mediaminer.org/fanfic/view_st.php/166653. +
      Or the story URL for one-shots, such as +
      http://www.mediaminer.org/fanfic/view_st.php/167618 or +
      http://www.mediaminer.org/fanfic/view_ch.php/1234123/123444#fic_c +
      +
      adastrafanfic.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.adastrafanfic.com/viewstory.php?sid=854. +
      +
      whofic.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.whofic.com/viewstory.php?sid=16334. +
      +
      thewriterscoffeeshop.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.thewriterscoffeeshop.com/library/viewstory.php?sid=2110. +
      +
      fanfiction.tenhawkpresents.com
      +
      + Use the URL of the story's chapter list, such as +
      http://fanfiction.tenhawkpresents.com/viewstory.php?sid=294. +
      +
      castlefans.org
      +
      + Use the URL of the story's chapter list, such as +
      http://castlefans.org/fanfic/viewstory.php?sid=123. +
      +
      fimfiction.net
      +
      + Use the URL of the story's chapter list, such as +
      http://www.fimfiction.com/story/123/ +
      or the URL of any chapter, such as +
      http://www.fimfiction.com/story/123/1/. +
      +
      tthfanfic.org
      +
      + Use the URL of any story, with or without chapter, title and notice, such as +
      http://www.tthfanfic.org/Story-5583 +
      http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm. +
      http://www.tthfanfic.org/T-99999999/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm. +
      +
      www.siye.co.uk
      +
      + Use the URL of the story's chapter list, such as +
      http://www.siye.co.uk/siye/viewstory.php?sid=123. +
      +
      archiveofourown.org
      +
      + Use the URL of the story, or one of it's chapters, such as +
      http://archiveofourown.org/works/76366. +
      http://archiveofourown.org/works/76366/chapters/101584. +
      +
      +
      ficbook.net(Russian)
      +
      + Use the URL of the story, or one of it's chapters, such as +
      http://ficbook.net/readfic/93626. +
      http://ficbook.net/readfic/93626/246417#part_content. +
      + +

      + A few additional things to know, which will make your life substantially easier: +

      +
        +
      1. + First thing to know: I do not use your Google login and password. In fact, all I know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
      2. +
      3. + Small post written by me + — how to read fiction in Stanza or any other ebook reader. +
      4. +
      5. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
      6. +
      7. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
      8. +
      9. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
      10. +
      11. + If you think that something that should work in fact doesn't, post a message to + our Google Group. I also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
      12. +
      + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
      +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © FanFictionDownLoader team +
      + +
      + + +
      +
      + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..16bcaefe --- /dev/null +++ b/index.yaml @@ -0,0 +1,33 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadData + properties: + - name: download + - name: index + +- kind: DownloadMeta + properties: + - name: user + - name: date + direction: desc + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
      " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "", "
      " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + +
      +

      + FanFictionDownLoader +

      + +
      + + +
      + + {% if fic.failure %} +
      + {{ fic.failure }} +
      + {% endif %} +
      + + +
      + + {% if is_login %} + +

      Login and Password

      +
      + {{ site }} requires a Login/Password for this story. + You need to provide your Login/Password for {{ site }} + to download it. +
      +
      +
      Login
      +
      +
      + +
      +
      Password
      +
      +
      + + {% else %} + + + +
      +
      Are you an Adult?
      +
      + + {% endif %} + +
      + +
      + +
      +
      + +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © FanFictionDownLoader team +
      + +
      + + +
      +
      + + diff --git a/main.py b/main.py new file mode 100644 index 00000000..50da6652 --- /dev/null +++ b/main.py @@ -0,0 +1,570 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google Inc. +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +logging.getLogger().setLevel(logging.DEBUG) + +import os +from os.path import dirname, basename, normpath +import re +import sys +import zlib +import urllib +import datetime + +import traceback +from StringIO import StringIO +import ConfigParser + +## Just to shut up the appengine warning about "You are using the +## default Django version (0.96). The default Django version will +## change in an App Engine release in the near future. Please call +## use_library() to explicitly select a Django version. For more +## information see +## http://code.google.com/appengine/docs/python/tools/libraries.html#Django" +## Note that if you are using the SDK App Engine Launcher and hit an SDK +## Console page first, you will get a django version mismatch error when you +## to go hit one of the application pages. Just change a file again, and +## make sure to hit an app page before the SDK page to clear it. +#os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' +#from google.appengine.dist import use_library +#use_library('django', '1.2') + +from google.appengine.ext import db +from google.appengine.api import taskqueue +from google.appengine.api import users +#from google.appengine.ext import webapp +import webapp2 +from google.appengine.ext.webapp import template +#from google.appengine.ext.webapp2 import util +from google.appengine.runtime import DeadlineExceededError + +from ffstorage import * + +from fanficdownloader import adapters, writers, exceptions + +class UserConfigServer(webapp2.RequestHandler): + def getUserConfig(self,user): + config = ConfigParser.SafeConfigParser() + + logging.debug('reading defaults.ini config file') + config.read('defaults.ini') + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l and l[0].config: + uconfig=l[0] + #logging.debug('reading config from UserConfig(%s)'%uconfig.config) + config.readfp(StringIO(uconfig.config)) + + return config + +class MainHandler(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if user: + error = self.request.get('error') + template_values = {'nickname' : user.nickname(), 'authorized': True} + url = self.request.get('url') + template_values['url'] = url + + if error: + if error == 'login_required': + template_values['error_message'] = 'This story (or one of the chapters) requires you to be logged in.' + elif error == 'bad_url': + template_values['error_message'] = 'Unsupported URL: ' + url + elif error == 'custom': + template_values['error_message'] = 'Error happened: ' + self.request.get('errtext') + elif error == 'configsaved': + template_values['error_message'] = 'Configuration Saved' + elif error == 'recentcleared': + template_values['error_message'] = 'Your Recent Downloads List has been Cleared' + + filename = self.request.get('file') + if len(filename) > 1: + template_values['yourfile'] = '''''' % (filename, self.request.get('name'), self.request.get('author')) + + self.response.headers['Content-Type'] = 'text/html' + path = os.path.join(os.path.dirname(__file__), 'index.html') + + self.response.out.write(template.render(path, template_values)) + else: + logging.debug(users.create_login_url('/')) + url = users.create_login_url(self.request.uri) + template_values = {'login_url' : url, 'authorized': False} + path = os.path.join(os.path.dirname(__file__), 'index.html') + self.response.out.write(template.render(path, template_values)) + + +class EditConfigServer(UserConfigServer): + def get(self): + self.post() + + def post(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + template_values = {'nickname' : user.nickname(), 'authorized': True} + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l: + uconfig=l[0] + else: + uconfig=None + + if self.request.get('update'): + if uconfig is None: + uconfig = UserConfig() + uconfig.user = user + uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case. + uconfig.put() + try: + config = self.getUserConfig(user) + self.redirect("/?error=configsaved") + except Exception, e: + logging.info("Saved Config Failed:%s"%e) + self.redirect("/?error=custom&errtext=%s"%urlEscape(str(e))) + else: # not update, assume display for edit + if uconfig is not None and uconfig.config: + config = uconfig.config + else: + configfile = open("example.ini","rb") + config = configfile.read() + configfile.close() + template_values['config'] = config + + configfile = open("defaults.ini","rb") + config = configfile.read() + configfile.close() + template_values['defaultsini'] = config + + path = os.path.join(os.path.dirname(__file__), 'editconfig.html') + self.response.headers['Content-Type'] = 'text/html' + self.response.out.write(template.render(path, template_values)) + + +class FileServer(webapp2.RequestHandler): + + def get(self): + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + return + + try: + download = getDownloadMeta(id=fileId) + + name = download.name.encode('utf-8') + + logging.info("Serving file: %s" % name) + + if name.endswith('.epub'): + self.response.headers['Content-Type'] = 'application/epub+zip' + elif name.endswith('.html'): + self.response.headers['Content-Type'] = 'text/html' + elif name.endswith('.txt'): + self.response.headers['Content-Type'] = 'text/plain' + elif name.endswith('.mobi'): + self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' + elif name.endswith('.zip'): + self.response.headers['Content-Type'] = 'application/zip' + else: + self.response.headers['Content-Type'] = 'application/octet-stream' + + self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name + + data = DownloadData.all().filter("download =", download).order("index") + # epubs are all already compressed. + # Each chunk is compress individually to avoid having + # to hold the whole in memory just for the + # compress/uncompress + if download.format != 'epub': + def dc(data): + try: + return zlib.decompress(data) + # if error, assume it's a chunk from before we started compessing. + except zlib.error: + return data + else: + def dc(data): + return data + + for datum in data: + self.response.out.write(dc(datum.blob)) + + except Exception, e: + fic = DownloadMeta() + fic.failure = unicode(e) + + template_values = dict(fic = fic, + #nickname = user.nickname(), + #escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + +class FileStatusServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + + escaped_url=False + + try: + download = getDownloadMeta(id=fileId) + + if download: + logging.info("Status url: %s" % download.url) + if download.completed and download.format=='epub': + escaped_url = urlEscape(self.request.host_url+"/file/"+download.name+"."+download.format+"?id="+fileId+"&fake=file."+download.format) + else: + download = DownloadMeta() + download.failure = "Download not found" + + except Exception, e: + download = DownloadMeta() + download.failure = unicode(e) + + template_values = dict(fic = download, + nickname = user.nickname(), + escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + +class ClearRecentServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + logging.info("Clearing Recent List for user: "+user.nickname()) + q = DownloadMeta.all() + q.filter('user =', user) + num=0 + while( True ): + results = q.fetch(100) + if results: + for d in results: + d.delete() + for c in d.data_chunks: + c.delete() + num = num + 1 + logging.debug('Delete '+d.url) + else: + break + logging.info('Deleted %d instances download.' % num) + self.redirect("/?error=recentcleared") + +class RecentFilesServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + q = DownloadMeta.all() + q.filter('user =', user).order('-date') + fics = q.fetch(100) + logging.info("Recent fetched %d downloads for user %s."%(len(fics),user.nickname())) + + for fic in fics: + if fic.completed and fic.format == 'epub': + fic.escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+str(fic.key())+"&fake=file."+fic.format) + + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + +class FanfictionDownloader(UserConfigServer): + def get(self): + self.post() + + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + format = self.request.get('format') + url = self.request.get('url') + + if not url or url.strip() == "": + self.redirect('/') + return + + logging.info("Queuing Download: %s" % url) + login = self.request.get('login') + password = self.request.get('password') + is_adult = self.request.get('is_adult') == "on" + + # use existing record if available. Fetched/Created before + # the adapter can normalize the URL in case we need to record + # an exception. + download = getDownloadMeta(url=url,user=user,format=format,new=True) + + adapter = None + try: + try: + config = self.getUserConfig(user) + except Exception, e: + self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e))) + return + + adapter = adapters.getAdapter(config,url) + logging.info('Created an adaper: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password + adapter.is_adult=is_adult + + ## This scrapes the metadata, which will be + ## duplicated in the queue task, but it + ## detects bad URLs, bad login, bad story, etc + ## without waiting for the queue. So I think + ## it's worth the double up. Could maybe save + ## it all in the download object someday. + story = adapter.getStoryMetadataOnly() + + ## Fetch again using normalized story URL. The one + ## fetched/created above, if different, will not be saved. + download = getDownloadMeta(url=story.getMetadata('storyUrl'), + user=user,format=format,new=True) + + download.title = story.getMetadata('title') + download.author = story.getMetadata('author') + download.url = story.getMetadata('storyUrl') + download.put() + + taskqueue.add(url='/fdowntask', + queue_name="download", + params={'id':str(download.key()), + 'format':format, + 'url':download.url, + 'login':login, + 'password':password, + 'user':user.email(), + 'is_adult':is_adult}) + + logging.info("enqueued download key: " + str(download.key())) + + except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e: + download.failure = unicode(e) + download.put() + logging.info(unicode(e)) + is_login= ( isinstance(e, exceptions.FailedToLogin) ) + template_values = dict(nickname = user.nickname(), + url = url, + format = format, + site = adapter.getSiteDomain(), + fic = download, + is_login=is_login, + ) + # thewriterscoffeeshop.com can do adult check *and* user required. + if isinstance(e,exceptions.AdultCheckRequired): + template_values['login']=login + template_values['password']=password + + path = os.path.join(os.path.dirname(__file__), 'login.html') + self.response.out.write(template.render(path, template_values)) + return + except (exceptions.InvalidStoryURL,exceptions.UnknownSite,exceptions.StoryDoesNotExist), e: + logging.warn(unicode(e)) + download.failure = unicode(e) + download.put() + except Exception, e: + logging.error("Failure Queuing Download: url:%s" % url) + logging.exception(e) + download.failure = unicode(e) + download.put() + + self.redirect('/status?id='+str(download.key())) + + return + + +class FanfictionDownloaderTask(UserConfigServer): + + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + fileId = self.request.get('id') + # User object can't pass, just email address + user = users.User(self.request.get('user')) + format = self.request.get('format') + url = self.request.get('url') + login = self.request.get('login') + password = self.request.get('password') + is_adult = self.request.get('is_adult') + + logging.info("Downloading: " + url + " for user: "+user.nickname()) + logging.info("ID: " + fileId) + + adapter = None + writerClass = None + + # use existing record if available. + # fileId should have record from /fdown. + download = getDownloadMeta(id=fileId,url=url,user=user,format=format,new=True) + for c in download.data_chunks: + c.delete() + download.put() + + logging.info('Creating adapter...') + + try: + config = self.getUserConfig(user) + adapter = adapters.getAdapter(config,url) + + logging.info('Created an adapter: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password + adapter.is_adult=is_adult + + # adapter.getStory() is what does all the heavy lifting. + # adapter.getStoryMetadataOnly() only fetches enough to + # get metadata. writer.writeStory() will call + # adapter.getStory(), too. + writer = writers.getWriter(format,config,adapter) + download.name = writer.getOutputFileName() + #logging.debug('output_filename:'+writer.getConfig('output_filename')) + logging.debug('getOutputFileName:'+writer.getOutputFileName()) + download.title = adapter.getStory().getMetadata('title') + download.author = adapter.getStory().getMetadata('author') + download.url = adapter.getStory().getMetadata('storyUrl') + download.put() + + outbuffer = StringIO() + writer.writeStory(outbuffer) + data = outbuffer.getvalue() + outbuffer.close() + del outbuffer + #del writer.adapter + #del writer.story + del writer + #del adapter.story + del adapter + + # epubs are all already compressed. Each chunk is + # compressed individually to avoid having to hold the + # whole in memory just for the compress/uncompress. + if format != 'epub': + def c(data): + return zlib.compress(data) + else: + def c(data): + return data + + index=0 + while( len(data) > 0 ): + DownloadData(download=download, + index=index, + blob=c(data[:1000000])).put() + index += 1 + data = data[1000000:] + download.completed=True + download.put() + + logging.info("Download finished OK") + del data + + except Exception, e: + logging.exception(e) + download.failure = unicode(e) + download.put() + return + + return + +def getDownloadMeta(id=None,url=None,user=None,format=None,new=False): + ## try to get download rec from passed id first. then fall back + ## to user/url/format + download = None + if id: + try: + download = db.get(db.Key(id)) + logging.info("DownloadMeta found by ID:"+id) + except: + pass + + if not download and url and user and format: + try: + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is not None and len(q) > 0 ): + logging.debug("DownloadMeta found by user:%s url:%s format:%s"%(user,url,format)) + download = q[0] + except: + pass + + if new: + # NOT clearing existing chunks here, because this record may + # never be saved. + if not download: + logging.debug("New DownloadMeta") + download = DownloadMeta() + + download.completed=False + download.failure=None + download.date=datetime.datetime.now() + + download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID']) + if user: + download.user = user + if url: + download.url = url + if format: + download.format = format + + return download + +def toPercentDecimal(match): + "Return the %decimal number for the character for url escaping" + s = match.group(1) + return "%%%02x" % ord(s) + +def urlEscape(data): + "Escape text, including unicode, for use in URLs" + p = re.compile(r'([^\w])') + return p.sub(toPercentDecimal, data.encode("utf-8")) + +logging.getLogger().setLevel(logging.DEBUG) +app = webapp2.WSGIApplication([('/', MainHandler), + ('/fdowntask', FanfictionDownloaderTask), + ('/fdown', FanfictionDownloader), + (r'/file.*', FileServer), + ('/status', FileStatusServer), + ('/recent', RecentFilesServer), + ('/editconfig', EditConfigServer), + ('/clearrecent', ClearRecentServer), + ], + debug=False) diff --git a/makeplugin.py b/makeplugin.py new file mode 100644 index 00000000..e4abac41 --- /dev/null +++ b/makeplugin.py @@ -0,0 +1,38 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from glob import glob + +from makezip import createZipFile + +if __name__=="__main__": + filename="FanFictionDownLoader.zip" + exclude=['*.pyc','*~','*.xcf'] + # from top dir. 'w' for overwrite + createZipFile(filename,"w", + ['plugin-defaults.ini','plugin-example.ini','epubmerge.py','fanficdownloader'], + exclude=exclude) + #from calibre-plugin dir. 'a' for append + os.chdir('calibre-plugin') + files=['about.txt','images',] + files.extend(glob('*.py')) + files.extend(glob('plugin-import-name-*.txt')) + createZipFile("../"+filename,"a", + files,exclude=exclude) diff --git a/makezip.py b/makezip.py new file mode 100644 index 00000000..55a10197 --- /dev/null +++ b/makezip.py @@ -0,0 +1,54 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, zipfile, sys +from glob import glob + +def addFolderToZip(myZipFile,folder,exclude=[]): + folder = folder.encode('ascii') #convert path to ascii for ZipFile Method + excludelist=[] + for ex in exclude: + excludelist.extend(glob(folder+"/"+ex)) + for file in glob(folder+"/*"): + if file in excludelist: + continue + if os.path.isfile(file): + #print file + myZipFile.write(file, file, zipfile.ZIP_DEFLATED) + elif os.path.isdir(file): + addFolderToZip(myZipFile,file,exclude=exclude) + +def createZipFile(filename,mode,files,exclude=[]): + myZipFile = zipfile.ZipFile( filename, mode ) # Open the zip file for writing + excludelist=[] + for ex in exclude: + excludelist.extend(glob(ex)) + for file in files: + if file in excludelist: + continue + file = file.encode('ascii') #convert path to ascii for ZipFile Method + if os.path.isfile(file): + (filepath, filename) = os.path.split(file) + #print file + myZipFile.write( file, filename, zipfile.ZIP_DEFLATED ) + if os.path.isdir(file): + addFolderToZip(myZipFile,file,exclude=exclude) + myZipFile.close() + return (1,filename) + diff --git a/plugin-defaults.ini b/plugin-defaults.ini new file mode 100644 index 00000000..55eed582 --- /dev/null +++ b/plugin-defaults.ini @@ -0,0 +1,343 @@ +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +[defaults] + +## [defaults] section applies to all formats and sites but may be +## overridden at several levels + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## All available titlepage_entries and the label used for them: +## _label:
    2. + Small post written by me — how to read fiction in Stanza or any other ebook reader.
    3. diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 60cd9a41..2dedcee0 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -292,9 +292,16 @@ extratags: FanFiction,Testing,HTML #is_adult:true [fanfiction.mugglenet.com] -## Some sites do not require a login, but do require the user to -## confirm they are adult for adult content. In commandline version, -## this should go in your personal.ini, not defaults.ini. +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. #is_adult:true [fanfiction.portkey.org] From 53c80e3c71e2bab829e8ba3843735281f3e26d10 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 19 Mar 2012 21:06:10 -0500 Subject: [PATCH 427/482] Added tag calibre-plugin-1.5.10 for changeset 4a7cf5c4919c From a9e873490d1ce25f60153f256733b0ddfdacc2de Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 19 Mar 2012 21:06:20 -0500 Subject: [PATCH 428/482] Added tag FanFictionDownLoader-4.4.4 for changeset 4a7cf5c4919c From ad092e2d5f3c37336a441c3f3e7e8b30e7ea0741 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sat, 24 Mar 2012 12:09:18 -0500 Subject: [PATCH 429/482] Another work around for SGMLParser's entity handling. —again v —a;gain --- calibre-plugin/__init__.py | 2 +- fanficdownloader/adapters/base_adapter.py | 7 +++---- fanficdownloader/htmlcleanup.py | 16 ++++++++++------ fanficdownloader/story.py | 2 +- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index a65dec79..00457da5 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 10) + version = (1, 5, 11) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index 35d6d035..5875ec0d 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -331,10 +331,6 @@ class BaseSiteAdapter(Configurable): # This is primarily for epub updates. return re.sub(r"\r?\n?","",retval) -fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05", - "June":"06","July":"07", "August":"08", "September":"09", "October":"10", - "November":"11", "December":"12" } - def cachedfetch(realfetch,cache,url): if url in cache: print("cache hit") @@ -342,6 +338,9 @@ def cachedfetch(realfetch,cache,url): else: return realfetch(url) +fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05", + "June":"06","July":"07", "August":"08", "September":"09", "October":"10", + "November":"11", "December":"12" } def makeDate(string,format): # Surprise! Abstracting this turned out to be more useful than diff --git a/fanficdownloader/htmlcleanup.py b/fanficdownloader/htmlcleanup.py index d9e2d848..2e223e73 100644 --- a/fanficdownloader/htmlcleanup.py +++ b/fanficdownloader/htmlcleanup.py @@ -23,11 +23,16 @@ def _unirepl(match): radix=16 else: radix=10 - value = int(match.group(2), radix ) - return unichr(value) + value = int(match.group(2), radix) + return "%s%s"%(unichr(value),match.group(3)) def _replaceNumberEntities(data): - p = re.compile(r'&#(x?)([0-9a-fA-F]+);') + # The same brokenish entity parsing in SGMLParser that inserts ';' + # after non-entities will also insert ';' incorrectly after number + # entities, including part of the next word if it's a-z. + # "Don't—ever—do—that—again," becomes + # "Don't—e;ver—d;o—that—a;gain," + p = re.compile(r'&#(x?)([0-9a-fA-F]{,4})([0-9a-fA-F]*);') return p.sub(_unirepl, data) def _replaceNotEntities(data): @@ -50,9 +55,6 @@ def removeAllEntities(text): return removeEntities(text).replace('<', '<').replace('>', '>').replace('&', '&') def removeEntities(text): - - # replace numeric versions of [&<>] with named versions, - # then replace named versions with actual characters, if text is None: return "" @@ -67,6 +69,8 @@ def removeEntities(text): except UnicodeEncodeError, e: t = text text = t + # replace numeric versions of [&<>] with named versions, + # then replace named versions with actual characters, text = re.sub(r'�*38;','&',text) text = re.sub(r'�*60;','<',text) text = re.sub(r'�*62;','>',text) diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index 555cde37..c8f06a04 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -216,7 +216,7 @@ class Story: for (p,v) in self.replacements: if (isinstance(value,str) or isinstance(value,unicode)) and re.match(p,value): value = re.sub(p,v,value) - return value; + return value def getMetadata(self, key, removeallentities=False): value = None From d5f1484cb7739e0cb20ab4cfa81d4ba66d4634cb Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sat, 24 Mar 2012 12:09:31 -0500 Subject: [PATCH 430/482] Added tag calibre-plugin-1.5.11 for changeset 5ad254071cc4 From ce8cf441014f0ede7ed4ef037b5ef86a1a64f620 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 9 Apr 2012 11:34:52 -0500 Subject: [PATCH 431/482] Set calibre's author link field to authorUrl. --- calibre-plugin/__init__.py | 2 +- calibre-plugin/ffdl_plugin.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 00457da5..e43518dd 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 11) + version = (1, 5, 12) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index 322758d1..ab7f03fd 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -768,7 +768,13 @@ make_firstimage_cover:true if epubmi.cover_data[1] is not None: db.set_cover(book_id, epubmi.cover_data[1]) #mi.cover = epubmi.cover_data[1] - + + # set author link if found. All current adapters have authorUrl. + if 'authorUrl' in book['all_metadata']: + autid=db.get_author_id(book['author']) + db.set_link_field_for_author(autid, unicode(book['all_metadata']['authorUrl']), + commit=False, notify=False) + db.set_metadata(book_id,mi) # do configured column updates here. From a5fd8646b74f25881c36af13eb2959054fd8e2fa Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 9 Apr 2012 11:35:25 -0500 Subject: [PATCH 432/482] Added tag calibre-plugin-1.5.12 for changeset 6145294332cd From e647b0e69b806fff76b7f324b83a5307fa48eef8 Mon Sep 17 00:00:00 2001 From: Ida Date: Tue, 10 Apr 2012 01:59:34 -0400 Subject: [PATCH 433/482] First version adapter for archive.skyehawke.com --- .../adapters/adapter_archiveskyehawkecom.py | 190 ++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 fanficdownloader/adapters/adapter_archiveskyehawkecom.py diff --git a/fanficdownloader/adapters/adapter_archiveskyehawkecom.py b/fanficdownloader/adapters/adapter_archiveskyehawkecom.py new file mode 100644 index 00000000..999d53b4 --- /dev/null +++ b/fanficdownloader/adapters/adapter_archiveskyehawkecom.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + + +def getClass(): + return ArchiveSkyeHawkeComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','ash') + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y-%m-%d" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'archive.skyehawke.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/story.php?no=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/story.php?no=")+r"\d+$" + + + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + url = self.url + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + data = self._fetchUrl(url) + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('div', {'class':"story border"}).find('span',{'class':'left'}) + title=a.text.split('"')[1] + self.story.setMetadata('title',title) + + # Find authorid and URL from... author url. + author = a.find('a') + self.story.setMetadata('authorId',author['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+author['href']) + self.story.setMetadata('author',author.string) + + authorSoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + chapter=soup.find('select',{'name':'chapter'}).findAll('option') + + for i in range(1,len(chapter)): + ch=chapter[i] + self.chapterUrls.append((stripHTML(ch),ch['value'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + box=soup.find('div', {'class': "container borderridge"}) + sum=box.find('span').text + self.setDescription(url,sum) + + boxes=soup.findAll('div', {'class': "container bordersolid"}) + for box in boxes: + if box.find('b') != None and box.find('b').text == "History and Story Information": + + for b in box.findAll('b'): + if "words" in b.nextSibling: + self.story.setMetadata('numWords', b.text) + if "archived" in b.previousSibling: + self.story.setMetadata('datePublished', makeDate(stripHTML(b.text), self.dateformat)) + if "updated" in b.previousSibling: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(b.text), self.dateformat)) + if "fandom" in b.nextSibling: + self.story.addToList('category', b.text) + + for br in box.findAll('br'): + br.replaceWith('split') + genre=box.text.split("Genre:")[1].split("split")[0] + if not "Unspecified" in genre: + self.story.addToList('genre',genre) + + + if box.find('span') != None and box.find('span').text == "WARNING": + + rating=box.findAll('span')[1] + rating.find('br').replaceWith('split') + rating=rating.text.replace("This story is rated",'').split('split')[0] + self.story.setMetadata('rating',rating) + logging.debug(self.story.getMetadata('rating')) + + warnings=box.find('ol') + if warnings != None: + warnings=warnings.text.replace(']', '').replace('[', '').split(' ') + for warning in warnings: + self.story.addToList('warnings',warning) + + + for asoup in authorSoup.findAll('div', {'class':"story bordersolid"}): + if asoup.find('a')['href'] == 'story.php?no='+self.story.getMetadata('storyId'): + if '[ Completed ]' in asoup.text: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + chars=asoup.findNext('div').text.split('Characters')[1].split(']')[0] + for char in chars.split(','): + if not "None" in char: + self.story.addToList('characters',char) + break + + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div',{'class':"chapter bordersolid"}).findNext('div').findNext('div') + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) From b4a8361df92b6aae22309bccbc3d055da36d02af Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 10 Apr 2012 09:42:01 -0500 Subject: [PATCH 434/482] Package Ida's new archive.skyehawke.com adapter for release. --- app.yaml | 2 +- calibre-plugin/__init__.py | 2 +- fanficdownloader/adapters/__init__.py | 1 + index.html | 23 ++++++++++++----------- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/app.yaml b/app.yaml index 161ef428..5b75406c 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanfictiondownloader application: fanfictiondownloader -version: 4-4-4 +version: 4-4-5 runtime: python27 api_version: 1 threadsafe: true diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index e43518dd..4dc026a9 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 12) + version = (1, 5, 13) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py index 3bc57c90..e8eeb85d 100644 --- a/fanficdownloader/adapters/__init__.py +++ b/fanficdownloader/adapters/__init__.py @@ -51,6 +51,7 @@ import adapter_thequidditchpitchorg import adapter_nfacommunitycom import adapter_midnightwhispersca import adapter_ksarchivecom +import adapter_archiveskyehawkecom ## This bit of complexity allows adapters to be added by just adding ## importing. It eliminates the long if/else clauses we used to need diff --git a/index.html b/index.html index f65edd79..eb193e75 100644 --- a/index.html +++ b/index.html @@ -56,17 +56,17 @@

      New Site Added

      - Support for ksarchive.com has been added. Thanks for Jade AislinSam implementing this. + Support for archive.skyehawke.com has been added. Thanks to Ida Leter for implementing this.

      - Questions? Check out our new - FAQs. Thanks to Wyndham for writing these. + Questions? Check out our + FAQs.

      If you have any problems with this application, please report them in the FanFictionDownLoader Google Group. The - Previous Version is also available for you to use if necessary. + Previous Version is also available for you to use if necessary.

      {{ error_message }} @@ -248,10 +248,6 @@
      http://ficbook.net/readfic/93626.
      http://ficbook.net/readfic/93626/246417#part_content. -
      gayauthors.org
      -
      - Removed following complaints by the site administration. -
      fanfiction.mugglenet.com
      Use the URL of the story's chapter list, such as @@ -287,18 +283,23 @@ Use the URL of the story's chapter list, such as
      http://ksarchive.com/viewstory.php?sid=1124.
      +
      archive.skyehawke.com
      +
      + Use the URL of the story's summary, such as +
      http://archive.skyehawke.com/story.php?no=17466. +

      A few additional things to know, which will make your life substantially easier:

      1. - First thing to know: I do not use your Google login and password. In fact, all I know about it is your ID – password + First thing to know: We do not use your Google login and password. In fact, all we know about it is your ID – password is being verified by Google and is absolutely, totally unknown to anyone but you.
      2. - Small post written by me + Small post written by Roman — how to read fiction in Stanza or any other ebook reader.
      3. @@ -314,7 +315,7 @@
      4. If you think that something that should work in fact doesn't, post a message to - our Google Group. I also encourage you to join it so + our Google Group. we also encourage you to join it so you will find out about latest updates and fixes as soon as possible
      From 1f3220a221e657b07e035a90f3ed2a6269828d5f Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 10 Apr 2012 09:42:32 -0500 Subject: [PATCH 435/482] Added tag calibre-plugin-1.5.13 for changeset b0720fc31c10 From bd477d26962fc11ba3c820e6f5283e0c77e5b11d Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 10 Apr 2012 09:43:20 -0500 Subject: [PATCH 436/482] Added tag FanFictionDownLoader-4.4.5 for changeset b0720fc31c10 From 08aa67785581c867d09c5edeaf885e8d0c51d72c Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 11 Apr 2012 11:11:25 -0500 Subject: [PATCH 437/482] Fix for Python GC'ing menu objects with no explicit Python references. --- calibre-plugin/ffdl_plugin.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index ab7f03fd..a83806e7 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -109,6 +109,9 @@ class FanFictionDownLoaderPlugin(InterfaceAction): # Assign our menu to this action self.menu = QMenu(self.gui) self.old_actions_unique_map = {} + # menu_actions is just to keep a live reference to the menu + # items to prevent GC removing it. + self.menu_actions = [] self.qaction.setMenu(self.menu) self.menu.aboutToShow.connect(self.about_to_show_menu) @@ -135,6 +138,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction): do_user_config = self.interface_action_base_plugin.do_user_config self.menu.clear() self.actions_unique_map = {} + self.menu_actions = [] self.add_action = self.create_menu_item_ex(self.menu, '&Add New from URL(s)', image='plus.png', unique_name='Add New FanFiction Book(s) from URL(s)', shortcut_name='Add New FanFiction Book(s) from URL(s)', @@ -227,6 +231,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction): ac = create_menu_action_unique(self, parent_menu, menu_text, image, tooltip, shortcut, triggered, is_checked, shortcut_name, unique_name) self.actions_unique_map[ac.calibre_shortcut_unique_name] = ac.calibre_shortcut_unique_name + self.menu_actions.append(ac) return ac def plugin_button(self): From 01b14701a055abc4418d6b8dd327333660bc1866 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 11 Apr 2012 11:11:43 -0500 Subject: [PATCH 438/482] Moved tag calibre-plugin-1.5.13 to changeset b55fabb0b8c9 (from changeset b0720fc31c10) From c2d7b4ecce8ac3571e3d9d9075a8a834de294f82 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 17 Apr 2012 14:07:19 -0500 Subject: [PATCH 439/482] Change plugin 'images' checkbox to add to [epubs], kludge for fimfiction images --- calibre-plugin/__init__.py | 2 +- calibre-plugin/config.py | 2 +- calibre-plugin/ffdl_plugin.py | 2 +- fanficdownloader/adapters/adapter_fimfictionnet.py | 7 +++++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 4dc026a9..8a075343 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 13) + version = (1, 5, 14) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py index 04f74f87..5d33643b 100644 --- a/calibre-plugin/config.py +++ b/calibre-plugin/config.py @@ -283,7 +283,7 @@ class BasicTab(QWidget): # this is a cheat to make it easier for users to realize there's a new include_images features. self.includeimages = QCheckBox("Include images in EPUBs?",self) - self.includeimages.setToolTip("Download and include images in EPUB stories. This is equivalent to adding:\n\n[defaults]\ninclude_images:true\nkeep_summary_html:true\nmake_firstimage_cover:true\n\n ...to the top of personal.ini. Your settings in personal.ini will override this.") + self.includeimages.setToolTip("Download and include images in EPUB stories. This is equivalent to adding:\n\n[epub]\ninclude_images:true\nkeep_summary_html:true\nmake_firstimage_cover:true\n\n ...to the top of personal.ini. Your settings in personal.ini will override this.") self.includeimages.setChecked(prefs['includeimages']) self.l.addWidget(self.includeimages) diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index a83806e7..6be7a618 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -420,7 +420,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction): options['personal.ini'] = prefs['personal.ini'] if prefs['includeimages']: # this is a cheat to make it easier for users. - options['personal.ini'] = '''[defaults] + options['personal.ini'] = '''[epub] include_images:true keep_summary_html:true make_firstimage_cover:true diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py index cc58b510..a56870b7 100644 --- a/fanficdownloader/adapters/adapter_fimfictionnet.py +++ b/fanficdownloader/adapters/adapter_fimfictionnet.py @@ -155,11 +155,14 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter): description_soup.find('a', {"class":"more"}).extract() except: pass - + # fimfic is the first site with an explicit cover image. story_img = soup.find('img',{'class':'story_image'}) if self.getConfig('include_images') and story_img: - self.story.addImgUrl(self,self.url,story_img['src'],self._fetchUrlRaw,cover=True) + coverurl = story_img['src'] + if coverurl.startswith('//static.fimfiction.net'): # fix for img urls missing 'http:' + coverurl = "http:"+coverurl + self.story.addImgUrl(self,self.url,coverurl,self._fetchUrlRaw,cover=True) self.setDescription(self.url,description_soup.text) #self.story.setMetadata('description', description_soup.text) From 6299fc7582c618139dad469d6730672fbbf9990a Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 17 Apr 2012 14:07:29 -0500 Subject: [PATCH 440/482] Added tag calibre-plugin-1.5.14 for changeset b69d6004ae25 From 04eb551d48c09d0e07e0304b7ed2aca59cd094d2 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 17 Apr 2012 14:07:50 -0500 Subject: [PATCH 441/482] Added tag CLI-4.4.6 for changeset b69d6004ae25 From 6f4be261ef19eb1b00f77f7d63665b2a03340683 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 18 Apr 2012 17:54:42 -0500 Subject: [PATCH 442/482] Fix Japanese in CLI & web version. Broke it in 443 and didn't notice. --- fanficdownloader/adapters/adapter_test1.py | 4 ++-- fanficdownloader/htmlcleanup.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 0d5cb109..0496227b 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -179,9 +179,9 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"

      Chapter title from site

      Centered text

      -

      Lorem '''+self.crazystring+''' italics, bold, underline consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

      +

      Lorem '''+self.crazystring+u''' italics, bold, underline consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

      br breaks

      - +Puella Magi Madoka Magica/魔法少女まどか★マギカ
      br breaks


      diff --git a/fanficdownloader/htmlcleanup.py b/fanficdownloader/htmlcleanup.py index 2e223e73..2bf42803 100644 --- a/fanficdownloader/htmlcleanup.py +++ b/fanficdownloader/htmlcleanup.py @@ -32,7 +32,8 @@ def _replaceNumberEntities(data): # entities, including part of the next word if it's a-z. # "Don't—ever—do—that—again," becomes # "Don't—e;ver—d;o—that—a;gain," - p = re.compile(r'&#(x?)([0-9a-fA-F]{,4})([0-9a-fA-F]*);') + # Also need to allow for 5 digit decimal entities 法 + p = re.compile(r'&#(x?)([0-9]{,5}|[0-9a-fA-F]{,4})([0-9a-fA-F]*?);') return p.sub(_unirepl, data) def _replaceNotEntities(data): From 7f7a1a983c754672e3fbd2cbd8208ce37a5659ca Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Thu, 19 Apr 2012 12:14:51 -0500 Subject: [PATCH 443/482] Plugin-Fix setting status column with no status available(fictionalley). --- calibre-plugin/ffdl_plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index 6be7a618..1c73fc3a 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -793,7 +793,8 @@ make_firstimage_cover:true print("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] - if not meta.startswith('status-') and meta not in book['all_metadata']: + if not meta.startswith('status-') and meta not in book['all_metadata'] or \ + meta.startswith('status-') and 'status' not in book['all_metadata']: print("No value for %s, skipping."%meta) continue if meta not in permitted_values[coldef['datatype']]: From 0c85cf1d102c2001bf74256032e19cf09931454c Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Thu, 19 Apr 2012 12:18:40 -0500 Subject: [PATCH 444/482] Web- Add list of all downloads feature to web service. --- allrecent.html | 78 +++++++++++++++++++++++++++++++++++++++ app.yaml | 6 +-- fanficdownloader/story.py | 11 +++--- ffstorage.py | 24 ++++++++++++ index.html | 11 ++++-- main.py | 51 +++++++++++++++++++++++++ 6 files changed, 170 insertions(+), 11 deletions(-) create mode 100644 allrecent.html diff --git a/allrecent.html b/allrecent.html new file mode 100644 index 00000000..477b17b7 --- /dev/null +++ b/allrecent.html @@ -0,0 +1,78 @@ + + + + + FanFictionDownLoader (fanfiction.net, fanficauthors, fictionalley, ficwad to epub and HTML) + + + + +
      +

      + FanFictionDownLoader +

      + + + + + {{yourfile}} + + +
      + {% for fic in fics %} +

      + {{ fic.title }} + by {{ fic.author }} Download Count: {{ fic.count }}
      + Word Count: {{ fic.numWords }} Chapter Count: {{ fic.numChapters }}
      + {% if fic.category %} Categories: {{ fic.category }}
      {% endif %} + {% if fic.genre %} Genres: {{ fic.genre }}
      {% endif %} + {% if fic.language %} Language: {{ fic.language }}
      {% endif %} + {% if fic.series %} Series: {{ fic.series }}
      {% endif %} + {% if fic.characters %} Characters: {{ fic.characters }}
      {% endif %} + {% if fic.status %} Status: {{ fic.status }}
      {% endif %} + {% if fic.datePublished %} Published: {{ fic.datePublished }}
      {% endif %} + {% if fic.dateUpdated %} Last Updated: {{ fic.dateUpdated }}
      {% endif %} + {% if fic.dateCreated %} Last Downloaded: {{ fic.dateCreated }}
      {% endif %} + {% if fic.rating %} Rating: {{ fic.rating }}
      {% endif %} + {% if fic.warnings %} Warnings: {{ fic.warnings }}
      {% endif %} + {% if fic.description %} Summary: {{ fic.description }}
      {% endif %} +

      + {% endfor %} +
      + + + + +
      + + diff --git a/app.yaml b/app.yaml index 5b75406c..3e09de3b 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanfictiondownloader application: fanfictiondownloader -version: 4-4-5 +version: 4-4-7 runtime: python27 api_version: 1 threadsafe: true @@ -35,8 +35,8 @@ handlers: - url: /.* script: main.app -builtins: -- datastore_admin: on +#builtins: +#- datastore_admin: on libraries: - name: django diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index c8f06a04..5e94164f 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -218,7 +218,7 @@ class Story: value = re.sub(p,v,value) return value - def getMetadata(self, key, removeallentities=False): + def getMetadata(self, key, removeallentities=False, doreplacements=True): value = None if self.getLists().has_key(key): value = ', '.join(self.getList(key)) @@ -232,21 +232,22 @@ class Story: if key == "datePublished" or key == "dateUpdated": value = value.strftime("%Y-%m-%d") - value=self.doReplacments(value) + if doreplacements: + value=self.doReplacments(value) if removeallentities and value != None: return removeAllEntities(value) else: return value - def getAllMetadata(self, removeallentities=False): + def getAllMetadata(self, removeallentities=False, doreplacements=True): ''' All single value *and* list value metadata as strings. ''' allmetadata = {} for k in self.metadata.keys(): - allmetadata[k] = self.getMetadata(k, removeallentities) + allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements) for l in self.listables.keys(): - allmetadata[l] = self.getMetadata(l, removeallentities) + allmetadata[l] = self.getMetadata(l, removeallentities, doreplacements) return allmetadata diff --git a/ffstorage.py b/ffstorage.py index 92e29d04..bad9b4a4 100644 --- a/ffstorage.py +++ b/ffstorage.py @@ -13,8 +13,23 @@ # limitations under the License. # +import pickle, copy from google.appengine.ext import db +class ObjectProperty(db.Property): + data_type = db.Blob + + def get_value_for_datastore(self, model_instance): + value = self.__get__(model_instance, model_instance.__class__) + pickled_val = pickle.dumps(value,protocol=pickle.HIGHEST_PROTOCOL) + if value is not None: return db.Blob(pickled_val) + + def make_value_from_datastore(self, value): + if value is not None: return pickle.loads(value) + + def default_value(self): + return copy.copy(self.default) + class DownloadMeta(db.Model): user = db.UserProperty() url = db.StringProperty() @@ -37,3 +52,12 @@ class DownloadData(db.Model): class UserConfig(db.Model): user = db.UserProperty() config = db.BlobProperty() + +class SavedMeta(db.Model): + url = db.StringProperty() + title = db.StringProperty() + author = db.StringProperty() + date = db.DateTimeProperty(auto_now_add=True) + count = db.IntegerProperty() + meta = ObjectProperty() + diff --git a/index.html b/index.html index eb193e75..43ec8f2b 100644 --- a/index.html +++ b/index.html @@ -54,9 +54,11 @@ much easier.

      -

      New Site Added

      +

      New Feature Added

      - Support for archive.skyehawke.com has been added. Thanks to Ida Leter for implementing this. + You can now see a list of downloaded fanfics by all + users by most popular + or most recent.

      Questions? Check out our @@ -66,7 +68,7 @@ If you have any problems with this application, please report them in the FanFictionDownLoader Google Group. The - Previous Version is also available for you to use if necessary. + Previous Version is also available for you to use if necessary.

      {{ error_message }} @@ -96,6 +98,9 @@

      Or see your personal list of previously downloaded fanfics.

      +

      + See a list of downloaded fanfics by all users by most popular or most recent. +

      {% else %} diff --git a/main.py b/main.py index e2d4ab47..d6884ddc 100644 --- a/main.py +++ b/main.py @@ -303,6 +303,39 @@ class RecentFilesServer(webapp2.RequestHandler): path = os.path.join(os.path.dirname(__file__), 'recent.html') self.response.out.write(template.render(path, template_values)) +class AllRecentFilesServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + q = SavedMeta.all() + if self.request.get('bydate'): + q.order('-date') + else: + q.order('-count') + + fics = q.fetch(200) + logging.info("Recent fetched %d downloads for user %s."%(len(fics),user.nickname())) + + sendslugs = [] + + for fic in fics: + ficslug = FicSlug(fic) + sendslugs.append(ficslug) + + template_values = dict(fics = sendslugs, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'allrecent.html') + self.response.out.write(template.render(path, template_values)) + +class FicSlug(): + def __init__(self,savedmeta): + self.url = savedmeta.url + self.count = savedmeta.count + for k, v in savedmeta.meta.iteritems(): + setattr(self,k,v) + class FanfictionDownloader(UserConfigServer): def get(self): self.post() @@ -464,6 +497,8 @@ class FanfictionDownloaderTask(UserConfigServer): download.url = adapter.getStory().getMetadata('storyUrl') download.put() + allmeta = adapter.getStory().getAllMetadata(removeallentities=True,doreplacements=False) + outbuffer = StringIO() writer.writeStory(outbuffer) data = outbuffer.getvalue() @@ -495,6 +530,21 @@ class FanfictionDownloaderTask(UserConfigServer): download.completed=True download.put() + smetal = SavedMeta.all().filter('url =', allmeta['storyUrl'] ).fetch(1) + if smetal and smetal[0]: + smeta = smetal[0] + smeta.count += 1 + else: + smeta=SavedMeta() + smeta.count = 1 + + smeta.url = allmeta['storyUrl'] + smeta.title = allmeta['title'] + smeta.author = allmeta['author'] + smeta.meta = allmeta + smeta.date = datetime.datetime.now() + smeta.put() + logging.info("Download finished OK") del data @@ -563,6 +613,7 @@ app = webapp2.WSGIApplication([('/', MainHandler), ('/fdown', FanfictionDownloader), (r'/file.*', FileServer), ('/status', FileStatusServer), + ('/allrecent', AllRecentFilesServer), ('/recent', RecentFilesServer), ('/editconfig', EditConfigServer), ('/clearrecent', ClearRecentServer), From 465e25d3e6a17f117adc05b0aa8348bf8c5b5621 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Thu, 19 Apr 2012 12:19:06 -0500 Subject: [PATCH 445/482] Added tag FanFictionDownLoader-4.4.7 for changeset c8ab1f6bbd57 From 0cf5ec9880535990da39a884b4e34f3a00192770 Mon Sep 17 00:00:00 2001 From: althaine Date: Fri, 20 Apr 2012 20:36:20 +1000 Subject: [PATCH 446/482] Updated fimfic.net adapter to use provided API --- allrecent.html | 78 + app.yaml | 46 + calibre-plugin/__init__.py | 90 + calibre-plugin/about.txt | 28 + calibre-plugin/common_utils.py | 447 ++ calibre-plugin/config.py | 589 +++ calibre-plugin/dialogs.py | 663 +++ calibre-plugin/ffdl_plugin.py | 1007 ++++ calibre-plugin/images/icon.png | Bin 0 -> 24649 bytes calibre-plugin/images/icon.xcf | Bin 0 -> 63927 bytes calibre-plugin/jobs.py | 163 + ...mport-name-fanfictiondownloader_plugin.txt | 0 cron.yaml | 10 + css/index.css | 73 + defaults.ini | 508 ++ delete_fic.py | 59 + downloader.py | 202 + editconfig.html | 89 + epubmerge.py | 25 + example.ini | 40 + fanficdownloader/BeautifulSoup.py | 2014 ++++++++ fanficdownloader/__init__.py | 1 + fanficdownloader/adapters/__init__.py | 106 + .../adapters/adapter_adastrafanficcom.py | 228 + .../adapters/adapter_archiveofourownorg.py | 263 + .../adapters/adapter_archiveskyehawkecom.py | 190 + .../adapters/adapter_castlefansorg.py | 310 ++ .../adapters/adapter_fanfictionnet.py | 278 ++ .../adapters/adapter_ficbooknet.py | 222 + .../adapters/adapter_fictionalleyorg.py | 231 + .../adapters/adapter_fictionpresscom.py | 49 + .../adapters/adapter_ficwadcom.py | 217 + .../adapters/adapter_fimfictionnet.py | 168 + .../adapter_harrypotterfanfictioncom.py | 201 + .../adapters/adapter_hpfandomnet.py | 234 + .../adapters/adapter_ksarchivecom.py | 306 ++ .../adapters/adapter_mediaminerorg.py | 235 + .../adapters/adapter_midnightwhispersca.py | 289 ++ .../adapters/adapter_mugglenetcom.py | 331 ++ .../adapters/adapter_nfacommunitycom.py | 289 ++ .../adapters/adapter_portkeyorg.py | 278 ++ .../adapters/adapter_potionsandsnitchesnet.py | 209 + fanficdownloader/adapters/adapter_siyecouk.py | 238 + .../adapters/adapter_tenhawkpresentscom.py | 246 + fanficdownloader/adapters/adapter_test1.py | 199 + .../adapters/adapter_thequidditchpitchorg.py | 293 ++ .../adapter_thewriterscoffeeshopcom.py | 252 + .../adapters/adapter_tthfanficorg.py | 258 + .../adapters/adapter_twilightednet.py | 250 + .../adapters/adapter_twiwritenet.py | 276 ++ .../adapters/adapter_whoficcom.py | 232 + fanficdownloader/adapters/base_adapter.py | 366 ++ fanficdownloader/chardet/__init__.py | 26 + fanficdownloader/chardet/big5freq.py | 923 ++++ fanficdownloader/chardet/big5prober.py | 41 + fanficdownloader/chardet/chardistribution.py | 200 + .../chardet/charsetgroupprober.py | 96 + fanficdownloader/chardet/charsetprober.py | 60 + .../chardet/codingstatemachine.py | 56 + fanficdownloader/chardet/constants.py | 47 + fanficdownloader/chardet/escprober.py | 79 + fanficdownloader/chardet/escsm.py | 240 + fanficdownloader/chardet/eucjpprober.py | 85 + fanficdownloader/chardet/euckrfreq.py | 594 +++ fanficdownloader/chardet/euckrprober.py | 41 + fanficdownloader/chardet/euctwfreq.py | 426 ++ fanficdownloader/chardet/euctwprober.py | 41 + fanficdownloader/chardet/gb2312freq.py | 471 ++ fanficdownloader/chardet/gb2312prober.py | 41 + fanficdownloader/chardet/hebrewprober.py | 269 + fanficdownloader/chardet/jisfreq.py | 567 +++ fanficdownloader/chardet/jpcntx.py | 210 + .../chardet/langbulgarianmodel.py | 228 + fanficdownloader/chardet/langcyrillicmodel.py | 329 ++ fanficdownloader/chardet/langgreekmodel.py | 225 + fanficdownloader/chardet/langhebrewmodel.py | 201 + .../chardet/langhungarianmodel.py | 225 + fanficdownloader/chardet/langthaimodel.py | 200 + fanficdownloader/chardet/latin1prober.py | 136 + fanficdownloader/chardet/mbcharsetprober.py | 82 + fanficdownloader/chardet/mbcsgroupprober.py | 50 + fanficdownloader/chardet/mbcssm.py | 514 ++ fanficdownloader/chardet/sbcharsetprober.py | 106 + fanficdownloader/chardet/sbcsgroupprober.py | 64 + fanficdownloader/chardet/sjisprober.py | 85 + fanficdownloader/chardet/test.py | 20 + fanficdownloader/chardet/universaldetector.py | 154 + fanficdownloader/chardet/utf8prober.py | 76 + fanficdownloader/configurable.py | 77 + fanficdownloader/epubutils.py | 96 + fanficdownloader/exceptions.py | 69 + fanficdownloader/gziphttp.py | 38 + fanficdownloader/html.py | 126 + fanficdownloader/html2text.py | 452 ++ fanficdownloader/htmlcleanup.py | 468 ++ fanficdownloader/mobi.py | 384 ++ fanficdownloader/story.py | 406 ++ fanficdownloader/translit.py | 57 + fanficdownloader/writers/__init__.py | 38 + fanficdownloader/writers/base_writer.py | 281 ++ fanficdownloader/writers/writer_epub.py | 478 ++ fanficdownloader/writers/writer_html.py | 103 + fanficdownloader/writers/writer_mobi.py | 202 + fanficdownloader/writers/writer_txt.py | 157 + ffstorage.py | 63 + index-ajax.html | 109 + index.html | 352 ++ index.yaml | 33 + js/fdownloader.js | 116 + js/jquery-1.3.2.js | 4376 +++++++++++++++++ login.html | 110 + main.py | 621 +++ makeplugin.py | 38 + makezip.py | 54 + plugin-defaults.ini | 477 ++ plugin-example.ini | 97 + queue.yaml | 7 + readme.txt | 19 + recent.html | 85 + settings.py | 25 + simplejson/__init__.py | 318 ++ simplejson/_speedups.c | 2329 +++++++++ simplejson/decoder.py | 354 ++ simplejson/encoder.py | 440 ++ simplejson/scanner.py | 65 + simplejson/tests/__init__.py | 23 + simplejson/tests/test_check_circular.py | 30 + simplejson/tests/test_decode.py | 22 + simplejson/tests/test_default.py | 9 + simplejson/tests/test_dump.py | 21 + .../tests/test_encode_basestring_ascii.py | 38 + simplejson/tests/test_fail.py | 76 + simplejson/tests/test_float.py | 15 + simplejson/tests/test_indent.py | 41 + simplejson/tests/test_pass1.py | 76 + simplejson/tests/test_pass2.py | 14 + simplejson/tests/test_pass3.py | 20 + simplejson/tests/test_recursion.py | 67 + simplejson/tests/test_scanstring.py | 111 + simplejson/tests/test_separators.py | 42 + simplejson/tests/test_unicode.py | 64 + simplejson/tool.py | 37 + static/ajax-loader.gif | Bin 0 -> 10819 bytes static/favicon.ico | Bin 0 -> 21792 bytes status.html | 94 + utils/__init__.py | 1 + utils/remover.py | 109 + utils/tally.py | 64 + 148 files changed, 35078 insertions(+) create mode 100644 allrecent.html create mode 100644 app.yaml create mode 100644 calibre-plugin/__init__.py create mode 100644 calibre-plugin/about.txt create mode 100644 calibre-plugin/common_utils.py create mode 100644 calibre-plugin/config.py create mode 100644 calibre-plugin/dialogs.py create mode 100644 calibre-plugin/ffdl_plugin.py create mode 100644 calibre-plugin/images/icon.png create mode 100644 calibre-plugin/images/icon.xcf create mode 100644 calibre-plugin/jobs.py create mode 100644 calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt create mode 100644 cron.yaml create mode 100644 css/index.css create mode 100644 defaults.ini create mode 100644 delete_fic.py create mode 100644 downloader.py create mode 100644 editconfig.html create mode 100644 epubmerge.py create mode 100644 example.ini create mode 100644 fanficdownloader/BeautifulSoup.py create mode 100644 fanficdownloader/__init__.py create mode 100644 fanficdownloader/adapters/__init__.py create mode 100644 fanficdownloader/adapters/adapter_adastrafanficcom.py create mode 100644 fanficdownloader/adapters/adapter_archiveofourownorg.py create mode 100644 fanficdownloader/adapters/adapter_archiveskyehawkecom.py create mode 100644 fanficdownloader/adapters/adapter_castlefansorg.py create mode 100644 fanficdownloader/adapters/adapter_fanfictionnet.py create mode 100644 fanficdownloader/adapters/adapter_ficbooknet.py create mode 100644 fanficdownloader/adapters/adapter_fictionalleyorg.py create mode 100644 fanficdownloader/adapters/adapter_fictionpresscom.py create mode 100644 fanficdownloader/adapters/adapter_ficwadcom.py create mode 100644 fanficdownloader/adapters/adapter_fimfictionnet.py create mode 100644 fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py create mode 100644 fanficdownloader/adapters/adapter_hpfandomnet.py create mode 100644 fanficdownloader/adapters/adapter_ksarchivecom.py create mode 100644 fanficdownloader/adapters/adapter_mediaminerorg.py create mode 100644 fanficdownloader/adapters/adapter_midnightwhispersca.py create mode 100644 fanficdownloader/adapters/adapter_mugglenetcom.py create mode 100644 fanficdownloader/adapters/adapter_nfacommunitycom.py create mode 100644 fanficdownloader/adapters/adapter_portkeyorg.py create mode 100644 fanficdownloader/adapters/adapter_potionsandsnitchesnet.py create mode 100644 fanficdownloader/adapters/adapter_siyecouk.py create mode 100644 fanficdownloader/adapters/adapter_tenhawkpresentscom.py create mode 100644 fanficdownloader/adapters/adapter_test1.py create mode 100644 fanficdownloader/adapters/adapter_thequidditchpitchorg.py create mode 100644 fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py create mode 100644 fanficdownloader/adapters/adapter_tthfanficorg.py create mode 100644 fanficdownloader/adapters/adapter_twilightednet.py create mode 100644 fanficdownloader/adapters/adapter_twiwritenet.py create mode 100644 fanficdownloader/adapters/adapter_whoficcom.py create mode 100644 fanficdownloader/adapters/base_adapter.py create mode 100644 fanficdownloader/chardet/__init__.py create mode 100644 fanficdownloader/chardet/big5freq.py create mode 100644 fanficdownloader/chardet/big5prober.py create mode 100644 fanficdownloader/chardet/chardistribution.py create mode 100644 fanficdownloader/chardet/charsetgroupprober.py create mode 100644 fanficdownloader/chardet/charsetprober.py create mode 100644 fanficdownloader/chardet/codingstatemachine.py create mode 100644 fanficdownloader/chardet/constants.py create mode 100644 fanficdownloader/chardet/escprober.py create mode 100644 fanficdownloader/chardet/escsm.py create mode 100644 fanficdownloader/chardet/eucjpprober.py create mode 100644 fanficdownloader/chardet/euckrfreq.py create mode 100644 fanficdownloader/chardet/euckrprober.py create mode 100644 fanficdownloader/chardet/euctwfreq.py create mode 100644 fanficdownloader/chardet/euctwprober.py create mode 100644 fanficdownloader/chardet/gb2312freq.py create mode 100644 fanficdownloader/chardet/gb2312prober.py create mode 100644 fanficdownloader/chardet/hebrewprober.py create mode 100644 fanficdownloader/chardet/jisfreq.py create mode 100644 fanficdownloader/chardet/jpcntx.py create mode 100644 fanficdownloader/chardet/langbulgarianmodel.py create mode 100644 fanficdownloader/chardet/langcyrillicmodel.py create mode 100644 fanficdownloader/chardet/langgreekmodel.py create mode 100644 fanficdownloader/chardet/langhebrewmodel.py create mode 100644 fanficdownloader/chardet/langhungarianmodel.py create mode 100644 fanficdownloader/chardet/langthaimodel.py create mode 100644 fanficdownloader/chardet/latin1prober.py create mode 100644 fanficdownloader/chardet/mbcharsetprober.py create mode 100644 fanficdownloader/chardet/mbcsgroupprober.py create mode 100644 fanficdownloader/chardet/mbcssm.py create mode 100644 fanficdownloader/chardet/sbcharsetprober.py create mode 100644 fanficdownloader/chardet/sbcsgroupprober.py create mode 100644 fanficdownloader/chardet/sjisprober.py create mode 100644 fanficdownloader/chardet/test.py create mode 100644 fanficdownloader/chardet/universaldetector.py create mode 100644 fanficdownloader/chardet/utf8prober.py create mode 100644 fanficdownloader/configurable.py create mode 100644 fanficdownloader/epubutils.py create mode 100644 fanficdownloader/exceptions.py create mode 100644 fanficdownloader/gziphttp.py create mode 100644 fanficdownloader/html.py create mode 100644 fanficdownloader/html2text.py create mode 100644 fanficdownloader/htmlcleanup.py create mode 100644 fanficdownloader/mobi.py create mode 100644 fanficdownloader/story.py create mode 100644 fanficdownloader/translit.py create mode 100644 fanficdownloader/writers/__init__.py create mode 100644 fanficdownloader/writers/base_writer.py create mode 100644 fanficdownloader/writers/writer_epub.py create mode 100644 fanficdownloader/writers/writer_html.py create mode 100644 fanficdownloader/writers/writer_mobi.py create mode 100644 fanficdownloader/writers/writer_txt.py create mode 100644 ffstorage.py create mode 100644 index-ajax.html create mode 100644 index.html create mode 100644 index.yaml create mode 100644 js/fdownloader.js create mode 100644 js/jquery-1.3.2.js create mode 100644 login.html create mode 100644 main.py create mode 100644 makeplugin.py create mode 100644 makezip.py create mode 100644 plugin-defaults.ini create mode 100644 plugin-example.ini create mode 100644 queue.yaml create mode 100644 readme.txt create mode 100644 recent.html create mode 100644 settings.py create mode 100644 simplejson/__init__.py create mode 100644 simplejson/_speedups.c create mode 100644 simplejson/decoder.py create mode 100644 simplejson/encoder.py create mode 100644 simplejson/scanner.py create mode 100644 simplejson/tests/__init__.py create mode 100644 simplejson/tests/test_check_circular.py create mode 100644 simplejson/tests/test_decode.py create mode 100644 simplejson/tests/test_default.py create mode 100644 simplejson/tests/test_dump.py create mode 100644 simplejson/tests/test_encode_basestring_ascii.py create mode 100644 simplejson/tests/test_fail.py create mode 100644 simplejson/tests/test_float.py create mode 100644 simplejson/tests/test_indent.py create mode 100644 simplejson/tests/test_pass1.py create mode 100644 simplejson/tests/test_pass2.py create mode 100644 simplejson/tests/test_pass3.py create mode 100644 simplejson/tests/test_recursion.py create mode 100644 simplejson/tests/test_scanstring.py create mode 100644 simplejson/tests/test_separators.py create mode 100644 simplejson/tests/test_unicode.py create mode 100644 simplejson/tool.py create mode 100644 static/ajax-loader.gif create mode 100644 static/favicon.ico create mode 100644 status.html create mode 100644 utils/__init__.py create mode 100644 utils/remover.py create mode 100644 utils/tally.py diff --git a/allrecent.html b/allrecent.html new file mode 100644 index 00000000..477b17b7 --- /dev/null +++ b/allrecent.html @@ -0,0 +1,78 @@ + + + + + FanFictionDownLoader (fanfiction.net, fanficauthors, fictionalley, ficwad to epub and HTML) + + + + +
      +

      + FanFictionDownLoader +

      + + + + + {{yourfile}} + + +
      + {% for fic in fics %} +

      + {{ fic.title }} + by {{ fic.author }} Download Count: {{ fic.count }}
      + Word Count: {{ fic.numWords }} Chapter Count: {{ fic.numChapters }}
      + {% if fic.category %} Categories: {{ fic.category }}
      {% endif %} + {% if fic.genre %} Genres: {{ fic.genre }}
      {% endif %} + {% if fic.language %} Language: {{ fic.language }}
      {% endif %} + {% if fic.series %} Series: {{ fic.series }}
      {% endif %} + {% if fic.characters %} Characters: {{ fic.characters }}
      {% endif %} + {% if fic.status %} Status: {{ fic.status }}
      {% endif %} + {% if fic.datePublished %} Published: {{ fic.datePublished }}
      {% endif %} + {% if fic.dateUpdated %} Last Updated: {{ fic.dateUpdated }}
      {% endif %} + {% if fic.dateCreated %} Last Downloaded: {{ fic.dateCreated }}
      {% endif %} + {% if fic.rating %} Rating: {{ fic.rating }}
      {% endif %} + {% if fic.warnings %} Warnings: {{ fic.warnings }}
      {% endif %} + {% if fic.description %} Summary: {{ fic.description }}
      {% endif %} +

      + {% endfor %} +
      + + + + +
      + + diff --git a/app.yaml b/app.yaml new file mode 100644 index 00000000..3e09de3b --- /dev/null +++ b/app.yaml @@ -0,0 +1,46 @@ +# ffd-retief-hrd fanfictiondownloader +application: fanfictiondownloader +version: 4-4-7 +runtime: python27 +api_version: 1 +threadsafe: true + +handlers: + +- url: /r3m0v3r.* + script: utils.remover.app + login: admin + +- url: /tally.* + script: utils.tally.app + login: admin + +- url: /fdownloadtask + script: main.app + login: admin + +- url: /css + static_dir: css + +- url: /js + static_dir: js + +- url: /static + static_dir: static + +- url: /favicon\.ico + static_files: static/favicon.ico + upload: static/favicon\.ico + +- url: /.* + script: main.app + +#builtins: +#- datastore_admin: on + +libraries: +- name: django + version: "1.2" + +- name: PIL + version: "1.1.7" diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py new file mode 100644 index 00000000..8a075343 --- /dev/null +++ b/calibre-plugin/__init__.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +# -*- coding: utf-8 -*- +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Jim Miller' +__docformat__ = 'restructuredtext en' + +# The class that all Interface Action plugin wrappers must inherit from +from calibre.customize import InterfaceActionBase + +## Apparently the name for this class doesn't matter--it was still +## 'demo' for the first few versions. +class FanFictionDownLoaderBase(InterfaceActionBase): + ''' + This class is a simple wrapper that provides information about the + actual plugin class. The actual interface plugin class is called + InterfacePlugin and is defined in the ffdl_plugin.py file, as + specified in the actual_plugin field below. + + The reason for having two classes is that it allows the command line + calibre utilities to run without needing to load the GUI libraries. + ''' + name = 'FanFictionDownLoader' + description = 'UI plugin to download FanFiction stories from various sites.' + supported_platforms = ['windows', 'osx', 'linux'] + author = 'Jim Miller' + version = (1, 5, 14) + minimum_calibre_version = (0, 8, 30) + + #: This field defines the GUI plugin class that contains all the code + #: that actually does something. Its format is module_path:class_name + #: The specified class must be defined in the specified module. + actual_plugin = 'calibre_plugins.fanfictiondownloader_plugin.ffdl_plugin:FanFictionDownLoaderPlugin' + + def is_customizable(self): + ''' + This method must return True to enable customization via + Preferences->Plugins + ''' + return True + + def config_widget(self): + ''' + Implement this method and :meth:`save_settings` in your plugin to + use a custom configuration dialog. + + This method, if implemented, must return a QWidget. The widget can have + an optional method validate() that takes no arguments and is called + immediately after the user clicks OK. Changes are applied if and only + if the method returns True. + + If for some reason you cannot perform the configuration at this time, + return a tuple of two strings (message, details), these will be + displayed as a warning dialog to the user and the process will be + aborted. + + The base class implementation of this method raises NotImplementedError + so by default no user configuration is possible. + ''' + # It is important to put this import statement here rather than at the + # top of the module as importing the config class will also cause the + # GUI libraries to be loaded, which we do not want when using calibre + # from the command line + from calibre_plugins.fanfictiondownloader_plugin.config import ConfigWidget + return ConfigWidget(self.actual_plugin_) + + def save_settings(self, config_widget): + ''' + Save the settings specified by the user with config_widget. + + :param config_widget: The widget returned by :meth:`config_widget`. + ''' + config_widget.save_settings() + + # Apply the changes + ac = self.actual_plugin_ + if ac is not None: + ac.apply_settings() + +# For testing, run from command line with this: +# calibre-debug -e __init__.py +# +if __name__ == '__main__': + from PyQt4.Qt import QApplication + from calibre.gui2.preferences import test_widget + app = QApplication([]) + test_widget('Advanced', 'Plugins') diff --git a/calibre-plugin/about.txt b/calibre-plugin/about.txt new file mode 100644 index 00000000..9174f8b1 --- /dev/null +++ b/calibre-plugin/about.txt @@ -0,0 +1,28 @@ +
      + +

      Created by Jim Miller, borrowing heavily from Grant Drake's +'Reading List', +'Extract ISBN' and +'Count Pages' +plugins.

      + +

      +Calibre officially distributes plugins from the mobileread.com forum site. +The official distro channel for this plugin is there: FanFictionDownLoader +

      + +

      I also monitor the +general users +group for the downloader. That covers the web application and CLI, too. +

      + +The source for this plugin is available at it's +project home. +
      + +

      +See the list of supported sites. +

      +

      +Read the FAQs. +

      diff --git a/calibre-plugin/common_utils.py b/calibre-plugin/common_utils.py new file mode 100644 index 00000000..19e8697e --- /dev/null +++ b/calibre-plugin/common_utils.py @@ -0,0 +1,447 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Grant Drake ' +__docformat__ = 'restructuredtext en' + +import os +from PyQt4 import QtGui +from PyQt4.Qt import (Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout, + QTableWidgetItem, QFont, QLineEdit, QComboBox, + QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime) +from calibre.constants import iswindows +from calibre.gui2 import gprefs, error_dialog, UNDEFINED_QDATETIME +from calibre.gui2.actions import menu_action_unique_name +from calibre.gui2.keyboard import ShortcutConfig +from calibre.utils.config import config_dir +from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE + +# Global definition of our plugin name. Used for common functions that require this. +plugin_name = None +# Global definition of our plugin resources. Used to share between the xxxAction and xxxBase +# classes if you need any zip images to be displayed on the configuration dialog. +plugin_icon_resources = {} + + +def set_plugin_icon_resources(name, resources): + ''' + Set our global store of plugin name and icon resources for sharing between + the InterfaceAction class which reads them and the ConfigWidget + if needed for use on the customization dialog for this plugin. + ''' + global plugin_icon_resources, plugin_name + plugin_name = name + plugin_icon_resources = resources + + +def get_icon(icon_name): + ''' + Retrieve a QIcon for the named image from the zip file if it exists, + or if not then from Calibre's image cache. + ''' + if icon_name: + pixmap = get_pixmap(icon_name) + if pixmap is None: + # Look in Calibre's cache for the icon + return QIcon(I(icon_name)) + else: + return QIcon(pixmap) + return QIcon() + + +def get_pixmap(icon_name): + ''' + Retrieve a QPixmap for the named image + Any icons belonging to the plugin must be prefixed with 'images/' + ''' + global plugin_icon_resources, plugin_name + + if not icon_name.startswith('images/'): + # We know this is definitely not an icon belonging to this plugin + pixmap = QPixmap() + pixmap.load(I(icon_name)) + return pixmap + + # Check to see whether the icon exists as a Calibre resource + # This will enable skinning if the user stores icons within a folder like: + # ...\AppData\Roaming\calibre\resources\images\Plugin Name\ + if plugin_name: + local_images_dir = get_local_images_dir(plugin_name) + local_image_path = os.path.join(local_images_dir, icon_name.replace('images/', '')) + if os.path.exists(local_image_path): + pixmap = QPixmap() + pixmap.load(local_image_path) + return pixmap + + # As we did not find an icon elsewhere, look within our zip resources + if icon_name in plugin_icon_resources: + pixmap = QPixmap() + pixmap.loadFromData(plugin_icon_resources[icon_name]) + return pixmap + return None + + +def get_local_images_dir(subfolder=None): + ''' + Returns a path to the user's local resources/images folder + If a subfolder name parameter is specified, appends this to the path + ''' + images_dir = os.path.join(config_dir, 'resources/images') + if subfolder: + images_dir = os.path.join(images_dir, subfolder) + if iswindows: + images_dir = os.path.normpath(images_dir) + return images_dir + + +def create_menu_item(ia, parent_menu, menu_text, image=None, tooltip=None, + shortcut=(), triggered=None, is_checked=None): + ''' + Create a menu action with the specified criteria and action + Note that if no shortcut is specified, will not appear in Preferences->Keyboard + This method should only be used for actions which either have no shortcuts, + or register their menus only once. Use create_menu_action_unique for all else. + ''' + if shortcut is not None: + if len(shortcut) == 0: + shortcut = () + else: + shortcut = _(shortcut) + ac = ia.create_action(spec=(menu_text, None, tooltip, shortcut), + attr=menu_text) + if image: + ac.setIcon(get_icon(image)) + if triggered is not None: + ac.triggered.connect(triggered) + if is_checked is not None: + ac.setCheckable(True) + if is_checked: + ac.setChecked(True) + + parent_menu.addAction(ac) + return ac + + +def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=None, + shortcut=None, triggered=None, is_checked=None, shortcut_name=None, + unique_name=None): + ''' + Create a menu action with the specified criteria and action, using the new + InterfaceAction.create_menu_action() function which ensures that regardless of + whether a shortcut is specified it will appear in Preferences->Keyboard + ''' + orig_shortcut = shortcut + kb = ia.gui.keyboard + if unique_name is None: + unique_name = menu_text + if not shortcut == False: + full_unique_name = menu_action_unique_name(ia, unique_name) + if full_unique_name in kb.shortcuts: + shortcut = False + else: + if shortcut is not None and not shortcut == False: + if len(shortcut) == 0: + shortcut = None + else: + shortcut = _(shortcut) + + if shortcut_name is None: + shortcut_name = menu_text.replace('&','') + + ac = ia.create_menu_action(parent_menu, unique_name, menu_text, icon=None, shortcut=shortcut, + description=tooltip, triggered=triggered, shortcut_name=shortcut_name) + if shortcut == False and not orig_shortcut == False: + if ac.calibre_shortcut_unique_name in ia.gui.keyboard.shortcuts: + kb.replace_action(ac.calibre_shortcut_unique_name, ac) + if image: + ac.setIcon(get_icon(image)) + if is_checked is not None: + ac.setCheckable(True) + if is_checked: + ac.setChecked(True) + return ac + + +def swap_author_names(author): + if author.find(',') == -1: + return author + name_parts = author.strip().partition(',') + return name_parts[2].strip() + ' ' + name_parts[0] + + +def get_library_uuid(db): + try: + library_uuid = db.library_id + except: + library_uuid = '' + return library_uuid + + +class ImageLabel(QLabel): + + def __init__(self, parent, icon_name, size=16): + QLabel.__init__(self, parent) + pixmap = get_pixmap(icon_name) + self.setPixmap(pixmap) + self.setMaximumSize(size, size) + self.setScaledContents(True) + + +class ImageTitleLayout(QHBoxLayout): + ''' + A reusable layout widget displaying an image followed by a title + ''' + def __init__(self, parent, icon_name, title): + QHBoxLayout.__init__(self) + title_image_label = QLabel(parent) + pixmap = get_pixmap(icon_name) + if pixmap is None: + pixmap = get_pixmap('library.png') + # error_dialog(parent, _('Restart required'), + # _('You must restart Calibre before using this plugin!'), show=True) + else: + title_image_label.setPixmap(pixmap) + title_image_label.setMaximumSize(32, 32) + title_image_label.setScaledContents(True) + self.addWidget(title_image_label) + + title_font = QFont() + title_font.setPointSize(16) + shelf_label = QLabel(title, parent) + shelf_label.setFont(title_font) + self.addWidget(shelf_label) + self.insertStretch(-1) + + +class SizePersistedDialog(QDialog): + ''' + This dialog is a base class for any dialogs that want their size/position + restored when they are next opened. + ''' + def __init__(self, parent, unique_pref_name): + QDialog.__init__(self, parent) + self.unique_pref_name = unique_pref_name + self.geom = gprefs.get(unique_pref_name, None) + self.finished.connect(self.dialog_closing) + + def resize_dialog(self): + if self.geom is None: + self.resize(self.sizeHint()) + else: + self.restoreGeometry(self.geom) + + def dialog_closing(self, result): + geom = bytearray(self.saveGeometry()) + gprefs[self.unique_pref_name] = geom + + +class ReadOnlyTableWidgetItem(QTableWidgetItem): + + def __init__(self, text): + if text is None: + text = '' + QTableWidgetItem.__init__(self, text, QtGui.QTableWidgetItem.UserType) + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + + +class RatingTableWidgetItem(QTableWidgetItem): + + def __init__(self, rating, is_read_only=False): + QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType) + self.setData(Qt.DisplayRole, rating) + if is_read_only: + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + + +class DateTableWidgetItem(QTableWidgetItem): + + def __init__(self, date_read, is_read_only=False, default_to_today=False): + if date_read == UNDEFINED_DATE and default_to_today: + date_read = now() + if is_read_only: + QTableWidgetItem.__init__(self, format_date(date_read, None), QtGui.QTableWidgetItem.UserType) + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + else: + QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType) + self.setData(Qt.DisplayRole, QDateTime(date_read)) + + +class NoWheelComboBox(QComboBox): + + def wheelEvent (self, event): + # Disable the mouse wheel on top of the combo box changing selection as plays havoc in a grid + event.ignore() + + +class CheckableTableWidgetItem(QTableWidgetItem): + + def __init__(self, checked=False, is_tristate=False): + QTableWidgetItem.__init__(self, '') + self.setFlags(Qt.ItemFlags(Qt.ItemIsSelectable | Qt.ItemIsUserCheckable | Qt.ItemIsEnabled )) + if is_tristate: + self.setFlags(self.flags() | Qt.ItemIsTristate) + if checked: + self.setCheckState(Qt.Checked) + else: + if is_tristate and checked is None: + self.setCheckState(Qt.PartiallyChecked) + else: + self.setCheckState(Qt.Unchecked) + + def get_boolean_value(self): + ''' + Return a boolean value indicating whether checkbox is checked + If this is a tristate checkbox, a partially checked value is returned as None + ''' + if self.checkState() == Qt.PartiallyChecked: + return None + else: + return self.checkState() == Qt.Checked + + +class TextIconWidgetItem(QTableWidgetItem): + + def __init__(self, text, icon): + QTableWidgetItem.__init__(self, text) + if icon: + self.setIcon(icon) + + +class ReadOnlyTextIconWidgetItem(ReadOnlyTableWidgetItem): + + def __init__(self, text, icon): + ReadOnlyTableWidgetItem.__init__(self, text) + if icon: + self.setIcon(icon) + + +class ReadOnlyLineEdit(QLineEdit): + + def __init__(self, text, parent): + if text is None: + text = '' + QLineEdit.__init__(self, text, parent) + self.setEnabled(False) + + +class KeyValueComboBox(QComboBox): + + def __init__(self, parent, values, selected_key): + QComboBox.__init__(self, parent) + self.values = values + self.populate_combo(selected_key) + + def populate_combo(self, selected_key): + self.clear() + selected_idx = idx = -1 + for key, value in self.values.iteritems(): + idx = idx + 1 + self.addItem(value) + if key == selected_key: + selected_idx = idx + self.setCurrentIndex(selected_idx) + + def selected_key(self): + for key, value in self.values.iteritems(): + if value == unicode(self.currentText()).strip(): + return key + + +class CustomColumnComboBox(QComboBox): + + def __init__(self, parent, custom_columns, selected_column, initial_items=['']): + QComboBox.__init__(self, parent) + self.populate_combo(custom_columns, selected_column, initial_items) + + def populate_combo(self, custom_columns, selected_column, initial_items=['']): + self.clear() + self.column_names = initial_items + if len(initial_items) > 0: + self.addItems(initial_items) + selected_idx = 0 + for idx, value in enumerate(initial_items): + if value == selected_column: + selected_idx = idx + for key in sorted(custom_columns.keys()): + self.column_names.append(key) + self.addItem('%s (%s)'%(key, custom_columns[key]['name'])) + if key == selected_column: + selected_idx = len(self.column_names) - 1 + self.setCurrentIndex(selected_idx) + + def get_selected_column(self): + return self.column_names[self.currentIndex()] + + +class KeyboardConfigDialog(SizePersistedDialog): + ''' + This dialog is used to allow editing of keyboard shortcuts. + ''' + def __init__(self, gui, group_name): + SizePersistedDialog.__init__(self, gui, 'Keyboard shortcut dialog') + self.gui = gui + self.setWindowTitle('Keyboard shortcuts') + layout = QVBoxLayout(self) + self.setLayout(layout) + + self.keyboard_widget = ShortcutConfig(self) + layout.addWidget(self.keyboard_widget) + self.group_name = group_name + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.commit) + button_box.rejected.connect(self.reject) + layout.addWidget(button_box) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.initialize() + + def initialize(self): + self.keyboard_widget.initialize(self.gui.keyboard) + self.keyboard_widget.highlight_group(self.group_name) + + def commit(self): + self.keyboard_widget.commit() + self.accept() + + +class DateDelegate(QStyledItemDelegate): + ''' + Delegate for dates. Because this delegate stores the + format as an instance variable, a new instance must be created for each + column. This differs from all the other delegates. + ''' + def __init__(self, parent): + QStyledItemDelegate.__init__(self, parent) + self.format = 'dd MMM yyyy' + + def displayText(self, val, locale): + d = val.toDateTime() + if d <= UNDEFINED_QDATETIME: + return '' + return format_date(qt_to_dt(d, as_utc=False), self.format) + + def createEditor(self, parent, option, index): + qde = QStyledItemDelegate.createEditor(self, parent, option, index) + qde.setDisplayFormat(self.format) + qde.setMinimumDateTime(UNDEFINED_QDATETIME) + qde.setSpecialValueText(_('Undefined')) + qde.setCalendarPopup(True) + return qde + + def setEditorData(self, editor, index): + val = index.model().data(index, Qt.DisplayRole).toDateTime() + if val is None or val == UNDEFINED_QDATETIME: + val = now() + editor.setDateTime(val) + + def setModelData(self, editor, model, index): + val = editor.dateTime() + if val <= UNDEFINED_QDATETIME: + model.setData(index, UNDEFINED_QDATETIME, Qt.EditRole) + else: + model.setData(index, QDateTime(val), Qt.EditRole) diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py new file mode 100644 index 00000000..5d33643b --- /dev/null +++ b/calibre-plugin/config.py @@ -0,0 +1,589 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Jim Miller' +__docformat__ = 'restructuredtext en' + +import traceback, copy + +from PyQt4.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QFont, + QTextEdit, QComboBox, QCheckBox, QPushButton, QTabWidget, QVariant) + +from calibre.gui2 import dynamic, info_dialog +from calibre.utils.config import JSONConfig +from calibre.gui2.ui import get_gui + +from calibre_plugins.fanfictiondownloader_plugin.dialogs \ + import (SKIP, ADDNEW, UPDATE, UPDATEALWAYS, OVERWRITE, OVERWRITEALWAYS, + CALIBREONLY,collision_order) + +from calibre_plugins.fanfictiondownloader_plugin.common_utils \ + import ( get_library_uuid, KeyboardConfigDialog ) + +from calibre.gui2.complete import MultiCompleteLineEdit + +# This is where all preferences for this plugin will be stored +# Remember that this name (i.e. plugins/fanfictiondownloader_plugin) is also +# in a global namespace, so make it as unique as possible. +# You should always prefix your config file name with plugins/, +# so as to ensure you dont accidentally clobber a calibre config file +all_prefs = JSONConfig('plugins/fanfictiondownloader_plugin') + +# Set defaults used by all. Library specific settings continue to +# take from here. +all_prefs.defaults['personal.ini'] = get_resources('plugin-example.ini') + +all_prefs.defaults['updatemeta'] = True +all_prefs.defaults['updatecover'] = False +all_prefs.defaults['keeptags'] = False +all_prefs.defaults['urlsfromclip'] = True +all_prefs.defaults['updatedefault'] = True +all_prefs.defaults['fileform'] = 'epub' +all_prefs.defaults['collision'] = OVERWRITE +all_prefs.defaults['deleteotherforms'] = False +all_prefs.defaults['adddialogstaysontop'] = False +all_prefs.defaults['includeimages'] = False + +all_prefs.defaults['send_lists'] = '' +all_prefs.defaults['read_lists'] = '' +all_prefs.defaults['addtolists'] = False +all_prefs.defaults['addtoreadlists'] = False +all_prefs.defaults['addtolistsonread'] = False + +all_prefs.defaults['custom_cols'] = {} + +# The list of settings to copy from all_prefs or the previous library +# when config is called for the first time on a library. +copylist = ['personal.ini', + 'updatemeta', + 'updatecover', + 'keeptags', + 'urlsfromclip', + 'updatedefault', + 'fileform', + 'collision', + 'deleteotherforms', + 'adddialogstaysontop', + 'includeimages'] + +# fake out so I don't have to change the prefs calls anywhere. The +# Java programmer in me is offended by op-overloading, but it's very +# tidy. +class PrefsFacade(): + def __init__(self,all_prefs): + self.all_prefs = all_prefs + self.lastlibid = None + + def _get_copylist_prefs(self,frompref): + return filter( lambda x : x[0] in copylist, frompref.items() ) + + def _get_prefs(self): + libraryid = get_library_uuid(get_gui().current_db) + if libraryid not in self.all_prefs: + if self.lastlibid == None: + self.all_prefs[libraryid] = dict(self._get_copylist_prefs(self.all_prefs)) + else: + self.all_prefs[libraryid] = dict(self._get_copylist_prefs(self.all_prefs[self.lastlibid])) + self.lastlibid = libraryid + + return self.all_prefs[libraryid] + + def _save_prefs(self,prefs): + libraryid = get_library_uuid(get_gui().current_db) + self.all_prefs[libraryid] = prefs + + def __getitem__(self,k): + prefs = self._get_prefs() + if k not in prefs: + # pulls from all_prefs.defaults automatically if not set + # in all_prefs + return self.all_prefs[k] + return prefs[k] + + def __setitem__(self,k,v): + prefs = self._get_prefs() + prefs[k]=v + self._save_prefs(prefs) + + # to be avoided--can cause unexpected results as possibly ancient + # all_pref settings may be pulled. + def __delitem__(self,k): + prefs = self._get_prefs() + del prefs[k] + self._save_prefs(prefs) + +prefs = PrefsFacade(all_prefs) + +class ConfigWidget(QWidget): + + def __init__(self, plugin_action): + QWidget.__init__(self) + self.plugin_action = plugin_action + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel('List of Supported Sites -- FAQs') + label.setOpenExternalLinks(True) + self.l.addWidget(label) + + tab_widget = QTabWidget(self) + self.l.addWidget(tab_widget) + + self.basic_tab = BasicTab(self, plugin_action) + tab_widget.addTab(self.basic_tab, 'Basic') + + self.personalini_tab = PersonalIniTab(self, plugin_action) + tab_widget.addTab(self.personalini_tab, 'personal.ini') + + self.list_tab = ListTab(self, plugin_action) + tab_widget.addTab(self.list_tab, 'Reading Lists') + if 'Reading List' not in plugin_action.gui.iactions: + self.list_tab.setEnabled(False) + + self.columns_tab = ColumnsTab(self, plugin_action) + tab_widget.addTab(self.columns_tab, 'Custom Columns') + + self.other_tab = OtherTab(self, plugin_action) + tab_widget.addTab(self.other_tab, 'Other') + + + def save_settings(self): + + # basic + prefs['fileform'] = unicode(self.basic_tab.fileform.currentText()) + prefs['collision'] = unicode(self.basic_tab.collision.currentText()) + prefs['updatemeta'] = self.basic_tab.updatemeta.isChecked() + prefs['updatecover'] = self.basic_tab.updatecover.isChecked() + prefs['keeptags'] = self.basic_tab.keeptags.isChecked() + prefs['urlsfromclip'] = self.basic_tab.urlsfromclip.isChecked() + prefs['updatedefault'] = self.basic_tab.updatedefault.isChecked() + prefs['deleteotherforms'] = self.basic_tab.deleteotherforms.isChecked() + prefs['adddialogstaysontop'] = self.basic_tab.adddialogstaysontop.isChecked() + prefs['includeimages'] = self.basic_tab.includeimages.isChecked() + + if self.list_tab: + # lists + prefs['send_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.list_tab.send_lists_box.text()).split(',')))) + prefs['read_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.list_tab.read_lists_box.text()).split(',')))) + # print("send_lists: %s"%prefs['send_lists']) + # print("read_lists: %s"%prefs['read_lists']) + prefs['addtolists'] = self.list_tab.addtolists.isChecked() + prefs['addtoreadlists'] = self.list_tab.addtoreadlists.isChecked() + prefs['addtolistsonread'] = self.list_tab.addtolistsonread.isChecked() + + # personal.ini + ini = unicode(self.personalini_tab.ini.toPlainText()) + if ini: + prefs['personal.ini'] = ini + else: + # if they've removed everything, reset to default. + prefs['personal.ini'] = get_resources('plugin-example.ini') + + # Custom Columns tab + colsmap = {} + for (col,combo) in self.columns_tab.custcol_dropdowns.iteritems(): + val = unicode(combo.itemData(combo.currentIndex()).toString()) + if val != 'none': + colsmap[col] = val + #print("colsmap[%s]:%s"%(col,colsmap[col])) + prefs['custom_cols'] = colsmap + + def edit_shortcuts(self): + self.save_settings() + # Force the menus to be rebuilt immediately, so we have all our actions registered + self.plugin_action.rebuild_menus() + d = KeyboardConfigDialog(self.plugin_action.gui, self.plugin_action.action_spec[0]) + if d.exec_() == d.Accepted: + self.plugin_action.gui.keyboard.finalize() + +class BasicTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel('These settings control the basic features of the plugin--downloading FanFiction.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + horz = QHBoxLayout() + label = QLabel('Default Output &Format:') + horz.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip('Choose output format to create. May set default from plugin configuration.') + self.fileform.activated.connect(self.set_collisions) + label.setBuddy(self.fileform) + horz.addWidget(self.fileform) + self.l.addLayout(horz) + + horz = QHBoxLayout() + label = QLabel('Default If Story Already Exists?') + label.setToolTip("What to do if there's already an existing story with the same title and author.") + horz.addWidget(label) + self.collision = QComboBox(self) + # add collision options + self.set_collisions() + i = self.collision.findText(prefs['collision']) + if i > -1: + self.collision.setCurrentIndex(i) + # self.collision.setToolTip('Overwrite will replace the existing story. Add New will create a new story with the same title and author.') + label.setBuddy(self.collision) + horz.addWidget(self.collision) + self.l.addLayout(horz) + + self.updatemeta = QCheckBox('Default Update Calibre &Metadata?',self) + self.updatemeta.setToolTip('Update title, author, URL, tags, custom columns, etc for story in Calibre from web site.') + self.updatemeta.setChecked(prefs['updatemeta']) + self.l.addWidget(self.updatemeta) + + self.updatecover = QCheckBox('Update Cover when Updating Metadata?',self) + self.updatecover.setToolTip("Update cover image from EPUB when metadata is updated. (EPUB only.)\nDoesn't go looking for new images on 'Update Calibre Metadata Only'.") + self.updatecover.setChecked(prefs['updatecover']) + self.l.addWidget(self.updatecover) + + self.keeptags = QCheckBox('Keep Existing Tags when Updating Metadata?',self) + self.keeptags.setToolTip('Existing tags will be kept and any new tags added.\nCompleted and In-Progress tags will be still be updated, if known.\nLast Updated tags will be updated if lastupdate in include_subject_tags.') + self.keeptags.setChecked(prefs['keeptags']) + self.l.addWidget(self.keeptags) + + self.urlsfromclip = QCheckBox('Take URLs from Clipboard?',self) + self.urlsfromclip.setToolTip('Prefill URLs from valid URLs in Clipboard when Adding New.') + self.urlsfromclip.setChecked(prefs['urlsfromclip']) + self.l.addWidget(self.urlsfromclip) + + self.updatedefault = QCheckBox('Default to Update when books selected?',self) + self.updatedefault.setToolTip('The top FanFictionDownLoader plugin button will start Update if\n'+ + 'books are selected. If unchecked, it will always bring up \'Add New\'.') + self.updatedefault.setChecked(prefs['updatedefault']) + self.l.addWidget(self.updatedefault) + + self.deleteotherforms = QCheckBox('Delete other existing formats?',self) + self.deleteotherforms.setToolTip('Check this to automatically delete all other ebook formats when updating an existing book.\nHandy if you have both a Nook(epub) and Kindle(mobi), for example.') + self.deleteotherforms.setChecked(prefs['deleteotherforms']) + self.l.addWidget(self.deleteotherforms) + + self.adddialogstaysontop = QCheckBox("Keep 'Add New from URL(s)' dialog on top?",self) + self.adddialogstaysontop.setToolTip("Instructs the OS and Window Manager to keep the 'Add New from URL(s)'\ndialog on top of all other windows. Useful for dragging URLs onto it.") + self.adddialogstaysontop.setChecked(prefs['adddialogstaysontop']) + self.l.addWidget(self.adddialogstaysontop) + + # this is a cheat to make it easier for users to realize there's a new include_images features. + self.includeimages = QCheckBox("Include images in EPUBs?",self) + self.includeimages.setToolTip("Download and include images in EPUB stories. This is equivalent to adding:\n\n[epub]\ninclude_images:true\nkeep_summary_html:true\nmake_firstimage_cover:true\n\n ...to the top of personal.ini. Your settings in personal.ini will override this.") + self.includeimages.setChecked(prefs['includeimages']) + self.l.addWidget(self.includeimages) + + self.l.insertStretch(-1) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]: + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def show_defaults(self): + text = get_resources('plugin-defaults.ini') + ShowDefaultsIniDialog(self.windowIcon(),text,self).exec_() + +class PersonalIniTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel('These settings provide more detailed control over what metadata will be displayed inside the ebook as well as let you set is_adult and user/password for different sites.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.label = QLabel('personal.ini:') + self.l.addWidget(self.label) + + self.ini = QTextEdit(self) + try: + self.ini.setFont(QFont("Courier", + self.plugin_action.gui.font().pointSize()+1)); + except Exception as e: + print("Couldn't get font: %s"%e) + self.ini.setLineWrapMode(QTextEdit.NoWrap) + self.ini.setText(prefs['personal.ini']) + self.l.addWidget(self.ini) + + self.defaults = QPushButton('View Defaults', self) + self.defaults.setToolTip("View all of the plugin's configurable settings\nand their default settings.") + self.defaults.clicked.connect(self.show_defaults) + self.l.addWidget(self.defaults) + + # self.l.insertStretch(-1) + # let edit box fill the space. + + def show_defaults(self): + text = get_resources('plugin-defaults.ini') + ShowDefaultsIniDialog(self.windowIcon(),text,self).exec_() + +class ShowDefaultsIniDialog(QDialog): + + def __init__(self, icon, text, parent=None): + QDialog.__init__(self, parent) + self.resize(600, 500) + self.l = QVBoxLayout() + self.setLayout(self.l) + self.label = QLabel("Plugin Defaults (Read-Only)") + self.label.setToolTip("These are all of the plugin's configurable options\nand their default settings.") + self.setWindowTitle(_('Plugin Defaults')) + self.setWindowIcon(icon) + self.l.addWidget(self.label) + + self.ini = QTextEdit(self) + self.ini.setToolTip("These are all of the plugin's configurable options\nand their default settings.") + try: + self.ini.setFont(QFont("Courier", + get_gui().font().pointSize()+1)); + except Exception as e: + print("Couldn't get font: %s"%e) + self.ini.setLineWrapMode(QTextEdit.NoWrap) + self.ini.setText(text) + self.ini.setReadOnly(True) + self.l.addWidget(self.ini) + + self.ok_button = QPushButton('OK', self) + self.ok_button.clicked.connect(self.hide) + self.l.addWidget(self.ok_button) + +class ListTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + try: + rl_plugin = plugin_action.gui.iactions['Reading List'] + reading_lists = rl_plugin.get_list_names() + except KeyError: + reading_lists= [] + + label = QLabel('These settings provide integration with the Reading List Plugin. Reading List can automatically send to devices and change custom columns. You have to create and configure the lists in Reading List to be useful.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.addtolists = QCheckBox('Add new/updated stories to "Send to Device" Reading List(s).',self) + self.addtolists.setToolTip('Automatically add new/updated stories to these lists in the Reading List plugin.') + self.addtolists.setChecked(prefs['addtolists']) + self.l.addWidget(self.addtolists) + + horz = QHBoxLayout() + label = QLabel('"Send to Device" Reading Lists') + label.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + horz.addWidget(label) + self.send_lists_box = MultiCompleteLineEdit(self) + self.send_lists_box.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + self.send_lists_box.update_items_cache(reading_lists) + self.send_lists_box.setText(prefs['send_lists']) + horz.addWidget(self.send_lists_box) + self.l.addLayout(horz) + + self.addtoreadlists = QCheckBox('Add new/updated stories to "To Read" Reading List(s).',self) + self.addtoreadlists.setToolTip('Automatically add new/updated stories to these lists in the Reading List plugin.\nAlso offers menu option to remove stories from the "To Read" lists.') + self.addtoreadlists.setChecked(prefs['addtoreadlists']) + self.l.addWidget(self.addtoreadlists) + + horz = QHBoxLayout() + label = QLabel('"To Read" Reading Lists') + label.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + horz.addWidget(label) + self.read_lists_box = MultiCompleteLineEdit(self) + self.read_lists_box.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + self.read_lists_box.update_items_cache(reading_lists) + self.read_lists_box.setText(prefs['read_lists']) + horz.addWidget(self.read_lists_box) + self.l.addLayout(horz) + + self.addtolistsonread = QCheckBox('Add stories back to "Send to Device" Reading List(s) when marked "Read".',self) + self.addtolistsonread.setToolTip('Menu option to remove from "To Read" lists will also add stories back to "Send to Device" Reading List(s)') + self.addtolistsonread.setChecked(prefs['addtolistsonread']) + self.l.addWidget(self.addtolistsonread) + + self.l.insertStretch(-1) + +class OtherTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel("These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFictionDownLoader confirmation dialogs back again.") + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + keyboard_shortcuts_button = QPushButton('Keyboard shortcuts...', self) + keyboard_shortcuts_button.setToolTip(_( + 'Edit the keyboard shortcuts associated with this plugin')) + keyboard_shortcuts_button.clicked.connect(parent_dialog.edit_shortcuts) + self.l.addWidget(keyboard_shortcuts_button) + + reset_confirmation_button = QPushButton(_('Reset disabled &confirmation dialogs'), self) + reset_confirmation_button.setToolTip(_( + 'Reset all show me again dialogs for the FanFictionDownLoader plugin')) + reset_confirmation_button.clicked.connect(self.reset_dialogs) + self.l.addWidget(reset_confirmation_button) + + self.l.insertStretch(-1) + + def reset_dialogs(self): + for key in dynamic.keys(): + if key.startswith('fanfictiondownloader_') and key.endswith('_again') \ + and dynamic[key] is False: + dynamic[key] = True + info_dialog(self, _('Done'), + _('Confirmation dialogs have all been reset'), + show=True, + show_copy_button=False) + +permitted_values = { + 'int' : ['numWords','numChapters'], + 'float' : ['numWords','numChapters'], + 'bool' : ['status-C','status-I'], + 'datetime' : ['datePublished', 'dateUpdated', 'dateCreated'], + 'series' : ['series'], + 'enumeration' : ['category', + 'genre', + 'language', + 'series', + 'characters', + 'status', + 'datePublished', + 'dateUpdated', + 'dateCreated', + 'rating', + 'warnings', + 'numChapters', + 'numWords', + 'site', + 'storyId', + 'authorId', + 'extratags', + 'title', + 'storyUrl', + 'description', + 'author', + 'authorUrl', + 'formatname' + #,'formatext' # not useful information. + #,'siteabbrev' + #,'version' + ] + } +# no point copying the whole list. +permitted_values['text'] = permitted_values['enumeration'] +permitted_values['comments'] = permitted_values['enumeration'] + +titleLabels = { + 'category':'Category', + 'genre':'Genre', + 'language':'Language', + 'status':'Status', + 'status-C':'Status:Completed', + 'status-I':'Status:In-Progress', + 'series':'Series', + 'characters':'Characters', + 'datePublished':'Published', + 'dateUpdated':'Updated', + 'dateCreated':'Packaged', + 'rating':'Rating', + 'warnings':'Warnings', + 'numChapters':'Chapters', + 'numWords':'Words', + 'site':'Site', + 'storyId':'Story ID', + 'authorId':'Author ID', + 'extratags':'Extra Tags', + 'title':'Title', + 'storyUrl':'Story URL', + 'description':'Summary', + 'author':'Author', + 'authorUrl':'Author URL', + 'formatname':'File Format', + 'formatext':'File Extension', + 'siteabbrev':'Site Abbrev', + 'version':'FFDL Version' + } + +class ColumnsTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel("If you have custom columns defined, they will be listed below. Choose a metadata value type to fill your columns automatically.") + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.custcol_dropdowns = {} + + custom_columns = self.plugin_action.gui.library_view.model().custom_columns + + for key, column in custom_columns.iteritems(): + + if column['datatype'] in permitted_values: + # print("\n============== %s ===========\n"%key) + # for (k,v) in column.iteritems(): + # print("column['%s'] => %s"%(k,v)) + horz = QHBoxLayout() + label = QLabel('%s(%s)'%(column['name'],key)) + label.setToolTip("Update this %s column with..."%column['datatype']) + horz.addWidget(label) + dropdown = QComboBox(self) + dropdown.addItem('',QVariant('none')) + for md in permitted_values[column['datatype']]: + dropdown.addItem(titleLabels[md],QVariant(md)) + self.custcol_dropdowns[key] = dropdown + if key in prefs['custom_cols']: + dropdown.setCurrentIndex(dropdown.findData(QVariant(prefs['custom_cols'][key]))) + if column['datatype'] == 'enumeration': + dropdown.setToolTip("Metadata values valid for this type of column.\nValues that aren't valid for this enumeration column will be ignored.") + else: + dropdown.setToolTip("Metadata values valid for this type of column.") + + horz.addWidget(dropdown) + self.l.addLayout(horz) + + self.l.insertStretch(-1) + + #print("prefs['custom_cols'] %s"%prefs['custom_cols']) diff --git a/calibre-plugin/dialogs.py b/calibre-plugin/dialogs.py new file mode 100644 index 00000000..87c8e415 --- /dev/null +++ b/calibre-plugin/dialogs.py @@ -0,0 +1,663 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Jim Miller' +__docformat__ = 'restructuredtext en' + +import traceback + +from PyQt4 import QtGui +from PyQt4.Qt import (QDialog, QTableWidget, QMessageBox, QVBoxLayout, QHBoxLayout, QGridLayout, + QPushButton, QProgressDialog, QString, QLabel, QCheckBox, QIcon, QTextCursor, + QTextEdit, QLineEdit, QInputDialog, QComboBox, QClipboard, QVariant, + QProgressDialog, QTimer, QDialogButtonBox, QPixmap, Qt, QAbstractItemView ) + +from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dialog +from calibre.gui2.dialogs.confirm_delete import confirm + +from calibre import confirm_config_name +from calibre.gui2 import dynamic + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters,writers,exceptions +from calibre_plugins.fanfictiondownloader_plugin.common_utils \ + import (ReadOnlyTableWidgetItem, ReadOnlyTextIconWidgetItem, SizePersistedDialog, + ImageTitleLayout, get_icon) + +SKIP='Skip' +ADDNEW='Add New Book' +UPDATE='Update EPUB if New Chapters' +UPDATEALWAYS='Update EPUB Always' +OVERWRITE='Overwrite if Newer' +OVERWRITEALWAYS='Overwrite Always' +CALIBREONLY='Update Calibre Metadata Only' +collision_order=[SKIP, + ADDNEW, + UPDATE, + UPDATEALWAYS, + OVERWRITE, + OVERWRITEALWAYS, + CALIBREONLY,] + +class NotGoingToDownload(Exception): + def __init__(self,error,icon='dialog_error.png'): + self.error=error + self.icon=icon + + def __str__(self): + return self.error + +class DroppableQTextEdit(QTextEdit): + def __init__(self,parent): + QTextEdit.__init__(self,parent) + + def canInsertFromMimeData(self, source): + if source.hasUrls(): + return True; + else: + return QTextEdit.canInsertFromMimeData(self,source) + + def insertFromMimeData(self, source): + if source.hasText(): + self.append(source.text()) + else: + return QTextEdit.insertFromMimeData(self, source) + +class AddNewDialog(SizePersistedDialog): + + def __init__(self, gui, prefs, icon, url_list_text): + SizePersistedDialog.__init__(self, gui, 'FanFictionDownLoader plugin:add new dialog') + self.gui = gui + + if prefs['adddialogstaysontop']: + QDialog.setWindowFlags ( self, Qt.Dialog|Qt.WindowStaysOnTopHint ) + + self.setMinimumWidth(300) + self.l = QVBoxLayout() + self.setLayout(self.l) + + self.setWindowTitle('FanFictionDownLoader') + self.setWindowIcon(icon) + + self.l.addWidget(QLabel('Story URL(s), one per line:')) + self.url = DroppableQTextEdit(self) + self.url.setToolTip('URLs for stories, one per line.\nWill take URLs from clipboard, but only valid URLs.') + self.url.setLineWrapMode(QTextEdit.NoWrap) + self.url.setText(url_list_text) + self.l.addWidget(self.url) + + horz = QHBoxLayout() + label = QLabel('Output &Format:') + horz.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip('Choose output format to create. May set default from plugin configuration.') + self.fileform.activated.connect(self.set_collisions) + + label.setBuddy(self.fileform) + horz.addWidget(self.fileform) + self.l.addLayout(horz) + + horz = QHBoxLayout() + label = QLabel('If Story Already Exists?') + label.setToolTip("What to do if there's already an existing story with the same title and author.") + horz.addWidget(label) + self.collision = QComboBox(self) + # add collision options + self.set_collisions() + i = self.collision.findText(prefs['collision']) + if i > -1: + self.collision.setCurrentIndex(i) + # self.collision.setToolTip(OVERWRITE+' will replace the existing story.\n'+ + # UPDATE+' will download new chapters only and add to existing EPUB.\n'+ + # ADDNEW+' will create a new story with the same title and author.\n'+ + # SKIP+' will not download existing stories.\n'+ + # CALIBREONLY+' will not download stories, but will update Calibre metadata.') + label.setBuddy(self.collision) + horz.addWidget(self.collision) + self.l.addLayout(horz) + + self.updatemeta = QCheckBox('Update Calibre &Metadata?',self) + self.updatemeta.setToolTip('Update metadata for story in Calibre from web site?') + self.updatemeta.setChecked(prefs['updatemeta']) + self.l.addWidget(self.updatemeta) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + self.l.addWidget(button_box) + + if url_list_text: + button_box.button(QDialogButtonBox.Ok).setFocus() + + # restore saved size. + self.resize_dialog() + #self.resize(self.sizeHint()) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]: + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def get_ffdl_options(self): + return { + 'fileform': unicode(self.fileform.currentText()), + 'collision': unicode(self.collision.currentText()), + 'updatemeta': self.updatemeta.isChecked(), + } + + def get_urlstext(self): + return unicode(self.url.toPlainText()) + + +class FakeLineEdit(): + def __init__(self): + pass + + def text(self): + pass + +class UserPassDialog(QDialog): + ''' + Need to collect User/Pass for some sites. + ''' + def __init__(self, gui, site, exception=None): + QDialog.__init__(self, gui) + self.gui = gui + self.status=False + + self.l = QGridLayout() + self.setLayout(self.l) + + if exception.passwdonly: + self.setWindowTitle('Password') + self.l.addWidget(QLabel("Author requires a password for this story(%s)."%exception.url),0,0,1,2) + # user isn't used, but it's easier to still have it for + # post processing. + self.user = FakeLineEdit() + else: + self.setWindowTitle('User/Password') + self.l.addWidget(QLabel("%s requires you to login to download this story."%site),0,0,1,2) + + self.l.addWidget(QLabel("User:"),1,0) + self.user = QLineEdit(self) + self.l.addWidget(self.user,1,1) + + self.l.addWidget(QLabel("Password:"),2,0) + self.passwd = QLineEdit(self) + self.passwd.setEchoMode(QLineEdit.Password) + self.l.addWidget(self.passwd,2,1) + + self.ok_button = QPushButton('OK', self) + self.ok_button.clicked.connect(self.ok) + self.l.addWidget(self.ok_button,3,0) + + self.cancel_button = QPushButton('Cancel', self) + self.cancel_button.clicked.connect(self.cancel) + self.l.addWidget(self.cancel_button,3,1) + + self.resize(self.sizeHint()) + + def ok(self): + self.status=True + self.hide() + + def cancel(self): + self.status=False + self.hide() + +class LoopProgressDialog(QProgressDialog): + ''' + ProgressDialog displayed while fetching metadata for each story. + ''' + def __init__(self, gui, + book_list, + foreach_function, + finish_function, + init_label="Fetching metadata for stories...", + win_title="Downloading metadata for stories", + status_prefix="Fetched metadata for"): + QProgressDialog.__init__(self, + init_label, + QString(), 0, len(book_list), gui) + self.setWindowTitle(win_title) + self.setMinimumWidth(500) + self.gui = gui + self.book_list = book_list + self.foreach_function = foreach_function + self.finish_function = finish_function + self.status_prefix = status_prefix + self.i = 0 + + ## self.do_loop does QTimer.singleShot on self.do_loop also. + ## A weird way to do a loop, but that was the example I had. + QTimer.singleShot(0, self.do_loop) + self.exec_() + + def updateStatus(self): + self.setLabelText("%s %d of %d"%(self.status_prefix,self.i+1,len(self.book_list))) + self.setValue(self.i+1) + print(self.labelText()) + + def do_loop(self): + + if self.i == 0: + self.setValue(0) + + book = self.book_list[self.i] + try: + ## collision spec passed into getadapter by partial from ffdl_plugin + ## no retval only if it exists, but collision is SKIP + self.foreach_function(book) + + except NotGoingToDownload as d: + book['good']=False + book['comment']=unicode(d) + book['icon'] = d.icon + + except Exception as e: + book['good']=False + book['comment']=unicode(e) + print("Exception: %s:%s"%(book,unicode(e))) + traceback.print_exc() + + self.updateStatus() + self.i += 1 + + if self.i >= len(self.book_list) or self.wasCanceled(): + return self.do_when_finished() + else: + QTimer.singleShot(0, self.do_loop) + + def do_when_finished(self): + self.hide() + self.gui = None + # Queues a job to process these books in the background. + self.finish_function(self.book_list) + +class AboutDialog(QDialog): + + def __init__(self, parent, icon, text): + QDialog.__init__(self, parent) + self.resize(400, 250) + self.l = QGridLayout() + self.setLayout(self.l) + self.logo = QLabel() + self.logo.setMaximumWidth(110) + self.logo.setPixmap(QPixmap(icon.pixmap(100,100))) + self.label = QLabel(text) + self.label.setOpenExternalLinks(True) + self.label.setWordWrap(True) + self.setWindowTitle(_('About FanFictionDownLoader')) + self.setWindowIcon(icon) + self.l.addWidget(self.logo, 0, 0) + self.l.addWidget(self.label, 0, 1) + self.bb = QDialogButtonBox(self) + b = self.bb.addButton(_('OK'), self.bb.AcceptRole) + b.setDefault(True) + self.l.addWidget(self.bb, 2, 0, 1, -1) + self.bb.accepted.connect(self.accept) + +class IconWidgetItem(ReadOnlyTextIconWidgetItem): + def __init__(self, text, icon, sort_key): + ReadOnlyTextIconWidgetItem.__init__(self, text, icon) + self.sort_key = sort_key + + #Qt uses a simple < check for sorting items, override this to use the sortKey + def __lt__(self, other): + return self.sort_key < other.sort_key + +class AuthorTableWidgetItem(ReadOnlyTableWidgetItem): + def __init__(self, text, sort_key): + ReadOnlyTableWidgetItem.__init__(self, text) + self.sort_key = sort_key + + #Qt uses a simple < check for sorting items, override this to use the sortKey + def __lt__(self, other): + return self.sort_key < other.sort_key + +class UpdateExistingDialog(SizePersistedDialog): + def __init__(self, gui, header, prefs, icon, books, + save_size_name='fanfictiondownloader_plugin:update list dialog'): + SizePersistedDialog.__init__(self, gui, save_size_name) + self.gui = gui + + self.setWindowTitle(header) + self.setWindowIcon(icon) + + layout = QVBoxLayout(self) + self.setLayout(layout) + title_layout = ImageTitleLayout(self, 'images/icon.png', + header) + layout.addLayout(title_layout) + books_layout = QHBoxLayout() + layout.addLayout(books_layout) + + self.books_table = StoryListTableWidget(self) + books_layout.addWidget(self.books_table) + + button_layout = QVBoxLayout() + books_layout.addLayout(button_layout) + # self.move_up_button = QtGui.QToolButton(self) + # self.move_up_button.setToolTip('Move selected books up the list') + # self.move_up_button.setIcon(QIcon(I('arrow-up.png'))) + # self.move_up_button.clicked.connect(self.books_table.move_rows_up) + # button_layout.addWidget(self.move_up_button) + spacerItem = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) + button_layout.addItem(spacerItem) + self.remove_button = QtGui.QToolButton(self) + self.remove_button.setToolTip('Remove selected books from the list') + self.remove_button.setIcon(get_icon('list_remove.png')) + self.remove_button.clicked.connect(self.remove_from_list) + button_layout.addWidget(self.remove_button) + spacerItem1 = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) + button_layout.addItem(spacerItem1) + # self.move_down_button = QtGui.QToolButton(self) + # self.move_down_button.setToolTip('Move selected books down the list') + # self.move_down_button.setIcon(QIcon(I('arrow-down.png'))) + # self.move_down_button.clicked.connect(self.books_table.move_rows_down) + # button_layout.addWidget(self.move_down_button) + + options_layout = QHBoxLayout() + + label = QLabel('Output &Format:') + options_layout.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip('Choose output format to create. May set default from plugin configuration.') + self.fileform.activated.connect(self.set_collisions) + label.setBuddy(self.fileform) + options_layout.addWidget(self.fileform) + + label = QLabel('Update Mode:') + label.setToolTip("What sort of update to perform. May set default from plugin configuration.") + options_layout.addWidget(label) + self.collision = QComboBox(self) + # add collision options + self.set_collisions() + i = self.collision.findText(prefs['collision']) + if i > -1: + self.collision.setCurrentIndex(i) + # self.collision.setToolTip('Overwrite will replace the existing story. Add New will create a new story with the same title and author.') + label.setBuddy(self.collision) + options_layout.addWidget(self.collision) + + self.updatemeta = QCheckBox('Update Calibre &Metadata?',self) + self.updatemeta.setToolTip('Update metadata for story in Calibre from web site? May set default from plugin configuration.') + self.updatemeta.setChecked(prefs['updatemeta']) + options_layout.addWidget(self.updatemeta) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + options_layout.addWidget(button_box) + + layout.addLayout(options_layout) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.books_table.populate_table(books) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if o not in [ADDNEW,SKIP] and \ + (self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]): + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def remove_from_list(self): + self.books_table.remove_selected_rows() + + def get_books(self): + return self.books_table.get_books() + + def get_ffdl_options(self): + return { + 'fileform': unicode(self.fileform.currentText()), + 'collision': unicode(self.collision.currentText()), + 'updatemeta': self.updatemeta.isChecked(), + } + +def display_story_list(gui, header, prefs, icon, books, + label_text='', + save_size_name='fanfictiondownloader_plugin:display list dialog', + offer_skip=False): + all_good = True + for b in books: + if not b['good']: + all_good=False + break + + ## + if all_good and not dynamic.get(confirm_config_name(save_size_name), True): + return True + pass + ## fake accept? + d = DisplayStoryListDialog(gui, header, prefs, icon, books, + label_text, + save_size_name, + offer_skip and all_good) + d.exec_() + return d.result() == d.Accepted + +class DisplayStoryListDialog(SizePersistedDialog): + def __init__(self, gui, header, prefs, icon, books, + label_text='', + save_size_name='fanfictiondownloader_plugin:display list dialog', + offer_skip=False): + SizePersistedDialog.__init__(self, gui, save_size_name) + self.name = save_size_name + self.gui = gui + + self.setWindowTitle(header) + self.setWindowIcon(icon) + + layout = QVBoxLayout(self) + self.setLayout(layout) + title_layout = ImageTitleLayout(self, 'images/icon.png', + header) + layout.addLayout(title_layout) + + self.books_table = StoryListTableWidget(self) + layout.addWidget(self.books_table) + + options_layout = QHBoxLayout() + self.label = QLabel(label_text) + #self.label.setOpenExternalLinks(True) + #self.label.setWordWrap(True) + options_layout.addWidget(self.label) + + if offer_skip: + spacerItem1 = QtGui.QSpacerItem(2, 4, QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Minimum) + options_layout.addItem(spacerItem1) + self.again = QCheckBox('Show this again?',self) + self.again.setChecked(True) + self.again.stateChanged.connect(self.toggle) + self.again.setToolTip('Uncheck to skip review and update stories immediately when no problems.') + options_layout.addWidget(self.again) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + + options_layout.addWidget(button_box) + + layout.addLayout(options_layout) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.books_table.populate_table(books) + + def get_books(self): + return self.books_table.get_books() + + def toggle(self, *args): + dynamic[confirm_config_name(self.name)] = self.again.isChecked() + + + +class StoryListTableWidget(QTableWidget): + + def __init__(self, parent): + QTableWidget.__init__(self, parent) + self.setSelectionBehavior(QAbstractItemView.SelectRows) + + def populate_table(self, books): + self.clear() + self.setAlternatingRowColors(True) + self.setRowCount(len(books)) + header_labels = ['','Title', 'Author', 'URL', 'Comment'] + self.setColumnCount(len(header_labels)) + self.setHorizontalHeaderLabels(header_labels) + self.horizontalHeader().setStretchLastSection(True) + #self.verticalHeader().setDefaultSectionSize(24) + self.verticalHeader().hide() + + self.books={} + for row, book in enumerate(books): + self.populate_table_row(row, book) + self.books[row] = book + + # turning True breaks up/down. Do we need either sorting or up/down? + self.setSortingEnabled(True) + self.resizeColumnsToContents() + self.setMinimumColumnWidth(1, 100) + self.setMinimumColumnWidth(2, 100) + self.setMinimumColumnWidth(3, 100) + self.setMinimumSize(300, 0) + # if len(books) > 0: + # self.selectRow(0) + self.sortItems(1) + self.sortItems(0) + + def setMinimumColumnWidth(self, col, minimum): + if self.columnWidth(col) < minimum: + self.setColumnWidth(col, minimum) + + def populate_table_row(self, row, book): + if book['good']: + icon = get_icon('ok.png') + val = 0 + else: + icon = get_icon('minus.png') + val = 1 + if 'icon' in book: + icon = get_icon(book['icon']) + + status_cell = IconWidgetItem(None,icon,val) + status_cell.setData(Qt.UserRole, QVariant(val)) + self.setItem(row, 0, status_cell) + + title_cell = ReadOnlyTableWidgetItem(book['title']) + title_cell.setData(Qt.UserRole, QVariant(row)) + self.setItem(row, 1, title_cell) + + self.setItem(row, 2, AuthorTableWidgetItem(book['author'], book['author_sort'])) + + url_cell = ReadOnlyTableWidgetItem(book['url']) + #url_cell.setData(Qt.UserRole, QVariant(book['url'])) + self.setItem(row, 3, url_cell) + + comment_cell = ReadOnlyTableWidgetItem(book['comment']) + #comment_cell.setData(Qt.UserRole, QVariant(book)) + self.setItem(row, 4, comment_cell) + + def get_books(self): + books = [] + #print("=========================\nbooks:%s"%self.books) + for row in range(self.rowCount()): + rnum = self.item(row, 1).data(Qt.UserRole).toPyObject() + book = self.books[rnum] + books.append(book) + return books + + def remove_selected_rows(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + message = '

      Are you sure you want to remove this book from the list?' + if len(rows) > 1: + message = '

      Are you sure you want to remove the selected %d books from the list?'%len(rows) + if not confirm(message,'fanfictiondownloader_delete_item', self): + return + first_sel_row = self.currentRow() + for selrow in reversed(rows): + self.removeRow(selrow.row()) + if first_sel_row < self.rowCount(): + self.select_and_scroll_to_row(first_sel_row) + elif self.rowCount() > 0: + self.select_and_scroll_to_row(first_sel_row - 1) + + def select_and_scroll_to_row(self, row): + self.selectRow(row) + self.scrollToItem(self.currentItem()) + + def move_rows_up(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + first_sel_row = rows[0].row() + if first_sel_row <= 0: + return + # Workaround for strange selection bug in Qt which "alters" the selection + # in certain circumstances which meant move down only worked properly "once" + selrows = [] + for row in rows: + selrows.append(row.row()) + selrows.sort() + for selrow in selrows: + self.swap_row_widgets(selrow - 1, selrow + 1) + scroll_to_row = first_sel_row - 1 + if scroll_to_row > 0: + scroll_to_row = scroll_to_row - 1 + self.scrollToItem(self.item(scroll_to_row, 0)) + + def move_rows_down(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + last_sel_row = rows[-1].row() + if last_sel_row == self.rowCount() - 1: + return + # Workaround for strange selection bug in Qt which "alters" the selection + # in certain circumstances which meant move down only worked properly "once" + selrows = [] + for row in rows: + selrows.append(row.row()) + selrows.sort() + for selrow in reversed(selrows): + self.swap_row_widgets(selrow + 2, selrow) + scroll_to_row = last_sel_row + 1 + if scroll_to_row < self.rowCount() - 1: + scroll_to_row = scroll_to_row + 1 + self.scrollToItem(self.item(scroll_to_row, 0)) + + def swap_row_widgets(self, src_row, dest_row): + self.blockSignals(True) + self.insertRow(dest_row) + for col in range(0, self.columnCount()): + self.setItem(dest_row, col, self.takeItem(src_row, col)) + self.removeRow(src_row) + self.blockSignals(False) diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py new file mode 100644 index 00000000..1c73fc3a --- /dev/null +++ b/calibre-plugin/ffdl_plugin.py @@ -0,0 +1,1007 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Jim Miller' +__docformat__ = 'restructuredtext en' + +import time, os, copy, threading +from ConfigParser import SafeConfigParser +from StringIO import StringIO +from functools import partial +from datetime import datetime + +from PyQt4.Qt import (QApplication, QMenu, QToolButton) + +from PyQt4.Qt import QPixmap, Qt +from PyQt4.QtCore import QBuffer + + +from calibre.ptempfile import PersistentTemporaryFile, PersistentTemporaryDirectory, remove_dir +from calibre.ebooks.metadata import MetaInformation, authors_to_string +from calibre.ebooks.metadata.meta import get_metadata +from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dialog +from calibre.gui2.dialogs.message_box import ViewLog +from calibre.gui2.dialogs.confirm_delete import confirm +from calibre.utils.date import local_tz + +# The class that all interface action plugins must inherit from +from calibre.gui2.actions import InterfaceAction + +from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin_icon_resources, get_icon, + create_menu_action_unique, get_library_uuid) + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount + +from calibre_plugins.fanfictiondownloader_plugin.config import (prefs, permitted_values) +from calibre_plugins.fanfictiondownloader_plugin.dialogs import ( + AddNewDialog, UpdateExistingDialog, display_story_list, DisplayStoryListDialog, + LoopProgressDialog, UserPassDialog, AboutDialog, + OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, + NotGoingToDownload ) + +# because calibre immediately transforms html into zip and don't want +# to have an 'if html'. db.has_format is cool with the case mismatch, +# but if I'm doing it anyway... +formmapping = { + 'epub':'EPUB', + 'mobi':'MOBI', + 'html':'ZIP', + 'txt':'TXT' + } + +PLUGIN_ICONS = ['images/icon.png'] + +class FanFictionDownLoaderPlugin(InterfaceAction): + + name = 'FanFictionDownLoader' + + # Declare the main action associated with this plugin + # The keyboard shortcut can be None if you dont want to use a keyboard + # shortcut. Remember that currently calibre has no central management for + # keyboard shortcuts, so try to use an unusual/unused shortcut. + # (text, icon_path, tooltip, keyboard shortcut) + # icon_path isn't in the zip--icon loaded below. + action_spec = (name, None, + 'Download FanFiction stories from various web sites', ()) + # None for keyboard shortcut doesn't allow shortcut. () does, there just isn't one yet + + action_type = 'global' + # make button menu drop down only + #popup_type = QToolButton.InstantPopup + + def genesis(self): + + # This method is called once per plugin, do initial setup here + + # Read the plugin icons and store for potential sharing with the config widget + icon_resources = self.load_resources(PLUGIN_ICONS) + set_plugin_icon_resources(self.name, icon_resources) + + base = self.interface_action_base_plugin + self.version = base.name+" v%d.%d.%d"%base.version + + # Set the icon for this interface action + # The get_icons function is a builtin function defined for all your + # plugin code. It loads icons from the plugin zip file. It returns + # QIcon objects, if you want the actual data, use the analogous + # get_resources builtin function. + + # Note that if you are loading more than one icon, for performance, you + # should pass a list of names to get_icons. In this case, get_icons + # will return a dictionary mapping names to QIcons. Names that + # are not found in the zip file will result in null QIcons. + icon = get_icon('images/icon.png') + + #self.qaction.setText('FFDL') + + # The qaction is automatically created from the action_spec defined + # above + self.qaction.setIcon(icon) + + # Call function when plugin triggered. + self.qaction.triggered.connect(self.plugin_button) + + # Assign our menu to this action + self.menu = QMenu(self.gui) + self.old_actions_unique_map = {} + # menu_actions is just to keep a live reference to the menu + # items to prevent GC removing it. + self.menu_actions = [] + self.qaction.setMenu(self.menu) + self.menu.aboutToShow.connect(self.about_to_show_menu) + + self.menus_lock = threading.RLock() + + def initialization_complete(self): + # otherwise configured hot keys won't work until the menu's + # been displayed once. + self.rebuild_menus() + + def about_to_show_menu(self): + self.rebuild_menus() + + def library_changed(self, db): + # We need to reset our menus after switching libraries + self.rebuild_menus() + + def rebuild_menus(self): + with self.menus_lock: + # Show the config dialog + # The config dialog can also be shown from within + # Preferences->Plugins, which is why the do_user_config + # method is defined on the base plugin class + do_user_config = self.interface_action_base_plugin.do_user_config + self.menu.clear() + self.actions_unique_map = {} + self.menu_actions = [] + self.add_action = self.create_menu_item_ex(self.menu, '&Add New from URL(s)', image='plus.png', + unique_name='Add New FanFiction Book(s) from URL(s)', + shortcut_name='Add New FanFiction Book(s) from URL(s)', + triggered=self.add_dialog ) + + self.update_action = self.create_menu_item_ex(self.menu, '&Update Existing FanFiction Book(s)', image='plusplus.png', + unique_name='Update Existing FanFiction Book(s)', + shortcut_name='Update Existing FanFiction Book(s)', + triggered=self.update_existing) + + if 'Reading List' in self.gui.iactions and (prefs['addtolists'] or prefs['addtoreadlists']) : + self.menu.addSeparator() + addmenutxt, rmmenutxt = None, None + if prefs['addtolists'] and prefs['addtoreadlists'] : + addmenutxt = 'Add to "To Read" and "Send to Device" Lists' + if prefs['addtolistsonread']: + rmmenutxt = 'Remove from "To Read" and add to "Send to Device" Lists' + else: + rmmenutxt = 'Remove from "To Read" Lists' + elif prefs['addtolists'] : + addmenutxt = 'Add Selected to "Send to Device" Lists' + elif prefs['addtoreadlists']: + addmenutxt = 'Add to "To Read" Lists' + rmmenutxt = 'Remove from "To Read" Lists' + + if addmenutxt: + self.add_send_action = self.create_menu_item_ex(self.menu, addmenutxt, image='plusplus.png', + unique_name=addmenutxt, + shortcut_name=addmenutxt, + triggered=partial(self.update_lists,add=True)) + + if rmmenutxt: + self.add_remove_action = self.create_menu_item_ex(self.menu, rmmenutxt, image='minusminus.png', + unique_name=rmmenutxt, + shortcut_name=rmmenutxt, + triggered=partial(self.update_lists,add=False)) + + # try: + # self.add_send_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 ) + # except: + # pass + # try: + # self.add_remove_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 ) + # except: + # pass + + self.menu.addSeparator() + self.get_list_action = self.create_menu_item_ex(self.menu, 'Get URLs from Selected Books', image='bookmarks.png', + unique_name='Get URLs from Selected Books', + shortcut_name='Get URLs from Selected Books', + triggered=self.get_list_urls) + + self.menu.addSeparator() + self.config_action = create_menu_action_unique(self, self.menu, '&Configure Plugin', shortcut=False, + image= 'config.png', + unique_name='Configure FanFictionDownLoader', + shortcut_name='Configure FanFictionDownLoader', + triggered=partial(do_user_config,parent=self.gui)) + + self.config_action = create_menu_action_unique(self, self.menu, '&About Plugin', shortcut=False, + image= 'images/icon.png', + unique_name='About FanFictionDownLoader', + shortcut_name='About FanFictionDownLoader', + triggered=self.about) + + # Before we finalize, make sure we delete any actions for menus that are no longer displayed + for menu_id, unique_name in self.old_actions_unique_map.iteritems(): + if menu_id not in self.actions_unique_map: + self.gui.keyboard.unregister_shortcut(unique_name) + self.old_actions_unique_map = self.actions_unique_map + self.gui.keyboard.finalize() + + def about(self): + # Get the about text from a file inside the plugin zip file + # The get_resources function is a builtin function defined for all your + # plugin code. It loads files from the plugin zip file. It returns + # the bytes from the specified file. + # + # Note that if you are loading more than one file, for performance, you + # should pass a list of names to get_resources. In this case, + # get_resources will return a dictionary mapping names to bytes. Names that + # are not found in the zip file will not be in the returned dictionary. + + text = get_resources('about.txt') + AboutDialog(self.gui,self.qaction.icon(),self.version + text).exec_() + + def create_menu_item_ex(self, parent_menu, menu_text, image=None, tooltip=None, + shortcut=None, triggered=None, is_checked=None, shortcut_name=None, + unique_name=None): + ac = create_menu_action_unique(self, parent_menu, menu_text, image, tooltip, + shortcut, triggered, is_checked, shortcut_name, unique_name) + self.actions_unique_map[ac.calibre_shortcut_unique_name] = ac.calibre_shortcut_unique_name + self.menu_actions.append(ac) + return ac + + def plugin_button(self): + if len(self.gui.library_view.get_selected_ids()) > 0 and prefs['updatedefault']: + self.update_existing() + else: + self.add_dialog() + + def update_lists(self,add=True): + if len(self.gui.library_view.get_selected_ids()) > 0 and \ + (prefs['addtolists'] or prefs['addtoreadlists']) : + self._update_reading_lists(self.gui.library_view.get_selected_ids(),add) + + def get_list_urls(self): + if len(self.gui.library_view.get_selected_ids()) > 0: + book_list = map( partial(self._convert_id_to_book, good=False), self.gui.library_view.get_selected_ids() ) + + LoopProgressDialog(self.gui, + book_list, + partial(self._get_story_url_for_list, db=self.gui.current_db), + self._finish_get_list_urls, + init_label="Collecting URLs for stories...", + win_title="Get URLs for stories", + status_prefix="URL retrieved") + + def _get_story_url_for_list(self,book,db=None): + book['url'] = self._get_story_url(db,book['calibre_id']) + if book['url'] == None: + book['good']=False + else: + book['good']=True + + def _finish_get_list_urls(self, book_list): + url_list = [ x['url'] for x in book_list if x['good'] ] + if url_list: + d = ViewLog(_("List of URLs"),"\n".join(url_list),parent=self.gui) + d.setWindowIcon(get_icon('bookmarks.png')) + d.exec_() + else: + info_dialog(self.gui, _('List of URLs'), + _('No URLs found in selected books.'), + show=True, + show_copy_button=False) + + def add_dialog(self): + + #print("add_dialog()") + + url_list = self.get_urls_clip() + url_list_text = "\n".join(url_list) + + # self.gui is the main calibre GUI. It acts as the gateway to access + # all the elements of the calibre user interface, it should also be the + # parent of the dialog + # AddNewDialog just collects URLs, format and presents buttons. + d = AddNewDialog(self.gui, + prefs, + self.qaction.icon(), + url_list_text, + ) + d.exec_() + if d.result() != d.Accepted: + return + + url_list = get_url_list(d.get_urlstext()) + add_books = self._convert_urls_to_books(url_list) + #print("add_books:%s"%add_books) + #print("options:%s"%d.get_ffdl_options()) + + options = d.get_ffdl_options() + options['version'] = self.version + print(self.version) + + self.start_downloads( options, add_books ) + + def update_existing(self): + if len(self.gui.library_view.get_selected_ids()) == 0: + return + #print("update_existing()") + + db = self.gui.current_db + book_list = map( partial(self._convert_id_to_book, good=False), self.gui.library_view.get_selected_ids() ) + #book_ids = self.gui.library_view.get_selected_ids() + + LoopProgressDialog(self.gui, + book_list, + partial(self._populate_book_from_calibre_id, db=self.gui.current_db), + self._update_existing_2, + init_label="Collecting stories for update...", + win_title="Get stories for updates", + status_prefix="URL retrieved") + + #books = self._convert_calibre_ids_to_books(db, book_ids) + #print("update books:%s"%books) + + def _update_existing_2(self,book_list): + + d = UpdateExistingDialog(self.gui, + 'Update Existing List', + prefs, + self.qaction.icon(), + book_list, + ) + d.exec_() + if d.result() != d.Accepted: + return + + update_books = d.get_books() + + #print("update_books:%s"%update_books) + #print("options:%s"%d.get_ffdl_options()) + # only if there's some good ones. + if 0 < len(filter(lambda x : x['good'], update_books)): + options = d.get_ffdl_options() + options['version'] = self.version + print(self.version) + self.start_downloads( options, update_books ) + + def get_urls_clip(self): + url_list = [] + if prefs['urlsfromclip']: + for url in unicode(QApplication.instance().clipboard().text()).split(): + if( self._is_good_downloader_url(url) ): + url_list.append(url) + return url_list + + def apply_settings(self): + # No need to do anything with perfs here, but we could. + prefs + + def start_downloads(self, options, books): + + #print("start_downloads:%s"%books) + + # create and pass temp dir. + tdir = PersistentTemporaryDirectory(prefix='fanfictiondownloader_') + options['tdir']=tdir + + self.gui.status_bar.show_message(_('Started fetching metadata for %s stories.'%len(books)), 3000) + + if 0 < len(filter(lambda x : x['good'], books)): + LoopProgressDialog(self.gui, + books, + partial(self.get_metadata_for_book, options = options), + partial(self.start_download_list, options = options)) + # LoopProgressDialog calls get_metadata_for_book for each 'good' story, + # get_metadata_for_book updates book for each, + # LoopProgressDialog calls start_download_list at the end which goes + # into the BG, or shows list if no 'good' books. + + def get_metadata_for_book(self,book, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True}): + ''' + Update passed in book dict with metadata from website and + necessary data. To be called from LoopProgressDialog + 'loop'. Also pops dialogs for is adult, user/pass. + ''' + + # The current database shown in the GUI + # db is an instance of the class LibraryDatabase2 from database.py + # This class has many, many methods that allow you to do a lot of + # things. + db = self.gui.current_db + + fileform = options['fileform'] + collision = options['collision'] + updatemeta= options['updatemeta'] + + if not book['good']: + # book has already been flagged bad for whatever reason. + return + + url = book['url'] + print("url:%s"%url) + skip_date_update = False + + ## was self.ffdlconfig, but we need to be able to change it + ## when doing epub update. + ffdlconfig = SafeConfigParser() + ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) + ffdlconfig.readfp(StringIO(prefs['personal.ini'])) + adapter = adapters.getAdapter(ffdlconfig,url,fileform) + + options['personal.ini'] = prefs['personal.ini'] + if prefs['includeimages']: + # this is a cheat to make it easier for users. + options['personal.ini'] = '''[epub] +include_images:true +keep_summary_html:true +make_firstimage_cover:true +''' + options['personal.ini'] + + ## three tries, that's enough if both user/pass & is_adult needed, + ## or a couple tries of one or the other + for x in range(0,2): + try: + adapter.getStoryMetadataOnly() + except exceptions.FailedToLogin, f: + print("Login Failed, Need Username/Password.") + userpass = UserPassDialog(self.gui,url,f) + userpass.exec_() # exec_ will make it act modal + if userpass.status: + adapter.username = userpass.user.text() + adapter.password = userpass.passwd.text() + + except exceptions.AdultCheckRequired: + if question_dialog(self.gui, 'Are You Adult?', '

      '+ + "%s requires that you be an adult. Please confirm you are an adult in your locale:"%url, + show_copy_button=False): + adapter.is_adult=True + + # let other exceptions percolate up. + story = adapter.getStoryMetadataOnly() + writer = writers.getWriter(options['fileform'],adapter.config,adapter) + + book['all_metadata'] = story.getAllMetadata(removeallentities=True) + book['title'] = story.getMetadata("title", removeallentities=True) + book['author_sort'] = book['author'] = story.getMetadata("author", removeallentities=True) + book['publisher'] = story.getMetadata("site") + book['tags'] = writer.getTags() # getTags could be moved up into adapter now. Adapter didn't used to know the fileform + book['comments'] = stripHTML(story.getMetadata("description")) #, removeallentities=True) comments handles entities better. + book['series'] = story.getMetadata("series") + + # adapter.opener is the element with a threadlock. But del + # adapter.opener doesn't work--subproc fails when it tries + # to pull in the adapter object that hasn't been imported yet. + # book['adapter'] = adapter + + book['is_adult'] = adapter.is_adult + book['username'] = adapter.username + book['password'] = adapter.password + + book['icon'] = 'plus.png' + if story.getMetadataRaw('datePublished'): + # should only happen when an adapter is broken, but better to + # fail gracefully. + book['pubdate'] = story.getMetadataRaw('datePublished').replace(tzinfo=local_tz) + book['timestamp'] = None # filled below if not skipped. + + if collision in (CALIBREONLY): + book['icon'] = 'metadata.png' + + # Dialogs should prevent this case now. + if collision in (UPDATE,UPDATEALWAYS) and fileform != 'epub': + raise NotGoingToDownload("Cannot update non-epub format.") + + book_id = None + + if book['calibre_id'] != None: + # updating an existing book. Update mode applies. + print("update existing id:%s"%book['calibre_id']) + book_id = book['calibre_id'] + # No handling needed: OVERWRITEALWAYS,CALIBREONLY + + # only care about collisions when not ADDNEW + elif collision != ADDNEW: + # 'new' book from URL. collision handling applies. + print("from URL") + + # find dups + mi = MetaInformation(story.getMetadata("title", removeallentities=True), + (story.getMetadata("author", removeallentities=True),)) # author is a list. + identicalbooks = db.find_identical_books(mi) + ## removed for being overkill. + # for ib in identicalbooks: + # # only *really* identical if URL matches, too. + # # XXX make an option? + # if self._get_story_url(db,ib) == url: + # identicalbooks.append(ib) + #print("identicalbooks:%s"%identicalbooks) + + if collision == SKIP and identicalbooks: + raise NotGoingToDownload("Skipping duplicate story.","list_remove.png") + + if len(identicalbooks) > 1: + raise NotGoingToDownload("More than one identical book--can't tell which to update/overwrite.","minusminus.png") + + ## changed: add new book when CALIBREONLY if none found. + if collision == CALIBREONLY and not identicalbooks: + collision = ADDNEW + options['collision'] = ADDNEW + # raise NotGoingToDownload("Not updating Calibre Metadata, no existing book to update.","search_delete_saved.png") + + if len(identicalbooks)>0: + book_id = identicalbooks.pop() + book['calibre_id'] = book_id + book['icon'] = 'edit-redo.png' + + if book_id != None and collision != ADDNEW: + if collision in (CALIBREONLY): + book['comment'] = 'Metadata collected.' + # don't need temp file created below. + return + + ## newer/chaptercount checks are the same for both: + # Update epub, but only if more chapters. + if collision in (UPDATE,UPDATEALWAYS): # collision == UPDATE + # 'book' can exist without epub. If there's no existing epub, + # let it go and it will download it. + if db.has_format(book_id,fileform,index_is_id=True): + (epuburl,chaptercount) = \ + get_dcsource_chaptercount(StringIO(db.format(book_id,'EPUB', + index_is_id=True))) + urlchaptercount = int(story.getMetadata('numChapters')) + if chaptercount == urlchaptercount: + if collision == UPDATE: + raise NotGoingToDownload("Already contains %d chapters."%chaptercount,'edit-undo.png') + else: + # UPDATEALWAYS + skip_date_update = True + elif chaptercount > urlchaptercount: + raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png') + + if collision == OVERWRITE and \ + db.has_format(book_id,formmapping[fileform],index_is_id=True): + # check make sure incoming is newer. + lastupdated=story.getMetadataRaw('dateUpdated').date() + fileupdated=datetime.fromtimestamp(os.stat(db.format_abspath(book_id, formmapping[fileform], index_is_id=True))[8]).date() + if fileupdated > lastupdated: + raise NotGoingToDownload("Not Overwriting, web site is not newer.",'edit-undo.png') + + # For update, provide a tmp file copy of the existing epub so + # it can't change underneath us. + if collision in (UPDATE,UPDATEALWAYS) and \ + db.has_format(book['calibre_id'],'EPUB',index_is_id=True): + tmp = PersistentTemporaryFile(prefix='old-%s-'%book['calibre_id'], + suffix='.epub', + dir=options['tdir']) + db.copy_format_to(book_id,fileform,tmp,index_is_id=True) + print("existing epub tmp:"+tmp.name) + book['epub_for_update'] = tmp.name + + if collision != CALIBREONLY and not skip_date_update: + # I'm half convinced this should be dateUpdated instead, but + # this behavior matches how epubs come out when imported + # dateCreated == packaged--epub/etc created. + book['timestamp'] = story.getMetadataRaw('dateCreated').replace(tzinfo=local_tz) + + if book['good']: # there shouldn't be any !'good' books at this point. + # if still 'good', make a temp file to write the output to. + tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'], + suffix='.'+options['fileform'], + dir=options['tdir']) + print("title:"+book['title']) + print("outfile:"+tmp.name) + book['outfile'] = tmp.name + + return + + def start_download_list(self,book_list, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True}): + ''' + Called by LoopProgressDialog to start story downloads BG processing. + adapter_list is a list of tuples of (url,adapter) + ''' + #print("start_download_list:book_list:%s"%book_list) + + ## No need to BG process when CALIBREONLY! Fake it. + if options['collision'] in (CALIBREONLY): + class NotJob(object): + def __init__(self,result): + self.failed=False + self.result=result + notjob = NotJob(book_list) + self.download_list_completed(notjob,options=options) + return + + for book in book_list: + if book['good']: + break + else: + ## No good stories to try to download, go straight to + ## list. + d = DisplayStoryListDialog(self.gui, + 'Nothing to Download', + prefs, + self.qaction.icon(), + book_list, + label_text='None of the URLs/stories given can be/need to be downloaded.' + ) + d.exec_() + return + + func = 'arbitrary_n' + cpus = self.gui.job_manager.server.pool_size + args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', 'do_download_worker', + (book_list, options, cpus)] + desc = 'Download FanFiction Book' + job = self.gui.job_manager.run_job( + self.Dispatcher(partial(self.download_list_completed,options=options)), + func, args=args, + description=desc) + + self.gui.status_bar.show_message('Starting %d FanFictionDownLoads'%len(book_list),3000) + + def _update_book(self,book,db=None, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True}): + print("add/update %s %s"%(book['title'],book['url'])) + mi = self._make_mi_from_book(book) + + if options['collision'] != CALIBREONLY: + self._add_or_update_book(book,options,prefs,mi) + + if options['collision'] == CALIBREONLY or \ + (options['updatemeta'] and book['good']): + self._update_metadata(db, book['calibre_id'], book, mi, options) + + def _update_books_completed(self, book_list, options={}): + + add_list = filter(lambda x : x['good'] and x['added'], book_list) + add_ids = [ x['calibre_id'] for x in add_list ] + update_list = filter(lambda x : x['good'] and not x['added'], book_list) + update_ids = [ x['calibre_id'] for x in update_list ] + + if len(add_list): + ## even shows up added to searchs. Nice. + self.gui.library_view.model().books_added(len(add_list)) + self.gui.library_view.model().refresh_ids(add_ids) + + if update_ids: + self.gui.library_view.model().refresh_ids(update_ids) + + current = self.gui.library_view.currentIndex() + self.gui.library_view.model().current_changed(current, self.previous) + self.gui.tags_view.recount() + + if self.gui.cover_flow: + self.gui.cover_flow.dataChanged() + + self.gui.status_bar.show_message(_('Finished Adding/Updating %d books.'%(len(update_list) + len(add_list))), 3000) + + if len(update_list) + len(add_list) != len(book_list): + d = DisplayStoryListDialog(self.gui, + 'Updates completed, final status', + prefs, + self.qaction.icon(), + book_list, + label_text='Stories have be added or updated in Calibre, some had additional problems.' + ) + d.exec_() + + print("all done, remove temp dir.") + remove_dir(options['tdir']) + + def download_list_completed(self, job, options={}): + if job.failed: + self.gui.job_exception(job, dialog_title='Failed to Download Stories') + return + + self.previous = self.gui.library_view.currentIndex() + db = self.gui.current_db + + if display_story_list(self.gui, + 'Downloads finished, confirm to update Calibre', + prefs, + self.qaction.icon(), + job.result, + label_text='Stories will not be added or updated in Calibre without confirmation.', + offer_skip=True): + + book_list = job.result + good_list = filter(lambda x : x['good'], book_list) + total_good = len(good_list) + + self.gui.status_bar.show_message(_('Adding/Updating %s books.'%total_good)) + + if total_good > 0: + LoopProgressDialog(self.gui, + good_list, + partial(self._update_book, options=options, db=self.gui.current_db), + partial(self._update_books_completed, options=options), + init_label="Updating calibre for stories...", + win_title="Update calibre for stories", + status_prefix="Updated") + + def _add_or_update_book(self,book,options,prefs,mi=None): + db = self.gui.current_db + + if mi == None: + mi = self._make_mi_from_book(book) + + book_id = book['calibre_id'] + if book_id == None: + book_id = db.create_book_entry(mi, + add_duplicates=True) + book['calibre_id'] = book_id + book['added'] = True + else: + book['added'] = False + + if not db.add_format_with_hooks(book_id, + options['fileform'], + book['outfile'], index_is_id=True): + book['comment'] = "Adding format to book failed for some reason..." + book['good']=False + book['icon']='dialog_error.png' + + if prefs['deleteotherforms']: + fmts = db.formats(book['calibre_id'], index_is_id=True).split(',') + for fmt in fmts: + if fmt != formmapping[options['fileform']]: + print("remove f:"+fmt) + db.remove_format(book['calibre_id'], fmt, index_is_id=True)#, notify=False + + if prefs['addtolists'] or prefs['addtoreadlists']: + self._update_reading_lists([book_id],add=True) + + return book_id + + def _update_metadata(self, db, book_id, book, mi, options): + if prefs['keeptags']: + old_tags = db.get_tags(book_id) + # remove old Completed/In-Progress only if there's a new one. + if 'Completed' in mi.tags or 'In-Progress' in mi.tags: + old_tags = filter( lambda x : x not in ('Completed', 'In-Progress'), old_tags) + # remove old Last Update tags if there are new ones. + if len(filter( lambda x : not x.startswith("Last Update"), mi.tags)) > 0: + old_tags = filter( lambda x : not x.startswith("Last Update"), old_tags) + # mi.tags needs to be list, but set kills dups. + mi.tags = list(set(list(old_tags)+mi.tags)) + + if 'langcode' in book['all_metadata']: + mi.languages=[book['all_metadata']['langcode']] + else: + # Set language english, but only if not already set. + oldmi = db.get_metadata(book_id,index_is_id=True) + if not oldmi.languages: + mi.languages=['eng'] + + if options['fileform'] == 'epub' and prefs['updatecover']: + existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True) + epubmi = get_metadata(existingepub,'EPUB') + if epubmi.cover_data[1] is not None: + db.set_cover(book_id, epubmi.cover_data[1]) + #mi.cover = epubmi.cover_data[1] + + # set author link if found. All current adapters have authorUrl. + if 'authorUrl' in book['all_metadata']: + autid=db.get_author_id(book['author']) + db.set_link_field_for_author(autid, unicode(book['all_metadata']['authorUrl']), + commit=False, notify=False) + + db.set_metadata(book_id,mi) + + # do configured column updates here. + #print("all_metadata: %s"%book['all_metadata']) + custom_columns = self.gui.library_view.model().custom_columns + + #print("prefs['custom_cols'] %s"%prefs['custom_cols']) + for col, meta in prefs['custom_cols'].iteritems(): + #print("setting %s to %s"%(col,meta)) + if col not in custom_columns: + print("%s not an existing column, skipping."%col) + continue + coldef = custom_columns[col] + if not meta.startswith('status-') and meta not in book['all_metadata'] or \ + meta.startswith('status-') and 'status' not in book['all_metadata']: + print("No value for %s, skipping."%meta) + continue + if meta not in permitted_values[coldef['datatype']]: + print("%s not a valid column type for %s, skipping."%(col,meta)) + continue + label = coldef['label'] + if coldef['datatype'] in ('enumeration','text','comments','datetime','series'): + db.set_custom(book_id, book['all_metadata'][meta], label=label, commit=False) + elif coldef['datatype'] in ('int','float'): + num = unicode(book['all_metadata'][meta]).replace(",","") + db.set_custom(book_id, num, label=label, commit=False) + elif coldef['datatype'] == 'bool' and meta.startswith('status-'): + if meta == 'status-C': + val = book['all_metadata']['status'] == 'Completed' + if meta == 'status-I': + val = book['all_metadata']['status'] == 'In-Progress' + db.set_custom(book_id, val, label=label, commit=False) + + db.commit() + + def _get_clean_reading_lists(self,lists): + if lists == None or lists.strip() == "" : + return [] + else: + return filter( lambda x : x, map( lambda x : x.strip(), lists.split(',') ) ) + + def _update_reading_lists(self,book_ids,add=True): + try: + rl_plugin = self.gui.iactions['Reading List'] + except: + if prefs['addtolists'] or prefs['addtoreadlists']: + message="

      You configured FanFictionDownLoader to automatically update Reading Lists, but you don't have the Reading List plugin installed anymore?

      " + confirm(message,'fanfictiondownloader_no_reading_list_plugin', self.gui) + return + + # XXX check for existence of lists, warning if not. + if prefs['addtoreadlists']: + if add: + addremovefunc = rl_plugin.add_books_to_list + else: + addremovefunc = rl_plugin.remove_books_from_list + + lists = self._get_clean_reading_lists(prefs['read_lists']) + if len(lists) < 1 : + message="

      You configured FanFictionDownLoader to automatically update \"To Read\" Reading Lists, but you don't have any lists set?

      " + confirm(message,'fanfictiondownloader_no_read_lists', self.gui) + for l in lists: + if l in rl_plugin.get_list_names(): + #print("add good read l:(%s)"%l) + addremovefunc(l, + book_ids, + display_warnings=False) + else: + if l != '': + message="

      You configured FanFictionDownLoader to automatically update Reading List '%s', but you don't have a list of that name?

      "%l + confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui) + + if prefs['addtolists'] and (add or (prefs['addtolistsonread'] and prefs['addtoreadlists']) ): + lists = self._get_clean_reading_lists(prefs['send_lists']) + if len(lists) < 1 : + message="

      You configured FanFictionDownLoader to automatically update \"Send to Device\" Reading Lists, but you don't have any lists set?

      " + confirm(message,'fanfictiondownloader_no_send_lists', self.gui) + for l in lists: + if l in rl_plugin.get_list_names(): + #print("good send l:(%s)"%l) + rl_plugin.add_books_to_list(l, + book_ids, + display_warnings=False) + else: + if l != '': + message="

      You configured FanFictionDownLoader to automatically update Reading List '%s', but you don't have a list of that name?

      "%l + confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui) + + def _find_existing_book_id(self,db,book,matchurl=True): + mi = MetaInformation(book["title"],(book["author"],)) # author is a list. + identicalbooks = db.find_identical_books(mi) + if matchurl: # only *really* identical if URL matches, too. + for ib in identicalbooks: + if self._get_story_url(db,ib) == book['url']: + return ib + if identicalbooks: + return identicalbooks.pop() + return None + + def _make_mi_from_book(self,book): + mi = MetaInformation(book['title'],(book['author'],)) # author is a list. + mi.set_identifiers({'url':book['url']}) + mi.publisher = book['publisher'] + mi.tags = book['tags'] + #mi.languages = ['en'] # handled in _update_metadata so it can check for existing lang. + mi.pubdate = book['pubdate'] + mi.timestamp = book['timestamp'] + mi.comments = book['comments'] + mi.series = book['series'] + return mi + + + def _convert_urls_to_books(self, urls): + books = [] + uniqueurls = set() + for url in urls: + book = self._convert_url_to_book(url) + if book['url'] in uniqueurls: + book['good'] = False + book['comment'] = "Same story already included." + uniqueurls.add(book['url']) + books.append(book) + return books + + def _convert_url_to_book(self, url): + book = {} + book['good'] = True + book['calibre_id'] = None + book['title'] = 'Unknown' + book['author'] = 'Unknown' + book['author_sort'] = 'Unknown' + + book['comment'] = '' + book['url'] = '' + book['added'] = False + + self._set_book_url_and_comment(book,url) + return book + + def _convert_id_to_book(self, idval, good=True): + book = {} + book['good'] = good + book['calibre_id'] = idval + book['title'] = 'Unknown' + book['author'] = 'Unknown' + book['author_sort'] = 'Unknown' + + book['comment'] = '' + book['url'] = '' + book['added'] = False + + return book + + def _populate_book_from_calibre_id(self, book, db=None): + mi = db.get_metadata(book['calibre_id'], index_is_id=True) + #book = {} + book['good'] = True + book['calibre_id'] = mi.id + book['title'] = mi.title + book['author'] = authors_to_string(mi.authors) + book['author_sort'] = mi.author_sort + book['comment'] = '' + book['url'] = "" + book['added'] = False + + url = self._get_story_url(db,book['calibre_id']) + self._set_book_url_and_comment(book,url) + #return book + + def _set_book_url_and_comment(self,book,url): + if not url: + book['comment'] = "No story URL found." + book['good'] = False + book['icon'] = 'search_delete_saved.png' + else: + # get normalized url or None. + book['url'] = self._is_good_downloader_url(url) + if book['url'] == None: + book['url'] = url + book['comment'] = "URL is not a valid story URL." + book['good'] = False + book['icon']='dialog_error.png' + + def _get_story_url(self, db, book_id): + identifiers = db.get_identifiers(book_id,index_is_id=True) + if 'url' in identifiers: + # identifiers have :->| in url. + #print("url from book:"+identifiers['url'].replace('|',':')) + return identifiers['url'].replace('|',':') + else: + ## only epub has URL in it--at least where I can easily find it. + if db.has_format(book_id,'EPUB',index_is_id=True): + existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True) + mi = get_metadata(existingepub,'EPUB') + identifiers = mi.get_identifiers() + if 'url' in identifiers: + #print("url from epub:"+identifiers['url'].replace('|',':')) + return identifiers['url'].replace('|',':') + # look for dc:source + return get_dcsource(existingepub) + return None + + def _is_good_downloader_url(self,url): + # this is the accepted way to 'check for existance'? really? + try: + self.dummyconfig + except AttributeError: + self.dummyconfig = SafeConfigParser() + # pulling up an adapter is pretty low over-head. If + # it fails, it's a bad url. + try: + adapter = adapters.getAdapter(self.dummyconfig,url) + url = adapter.url + del adapter + return url + except: + return None; + +def get_url_list(urls): + def f(x): + if x.strip(): return True + else: return False + # set removes dups. + return set(filter(f,urls.strip().splitlines())) + diff --git a/calibre-plugin/images/icon.png b/calibre-plugin/images/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..e9715307dd4fe35c686b222262b796828856ff22 GIT binary patch literal 24649 zcmZ6z1ymeC*DX3Y!QCymy9N#J?(Xgq2oM+`xCM6)5Zr>>;O_2j!QJ5w|F^#P{(Bca zt7m$qx~h-us&jS`uB<49j6i?@0059>q{USM00{Yy7d$NZO0HwD5%>h*rYa>0s2nFc z0)K!tm6H+&ynp=VwihRWuY7Wr)^P(jU-)=Ih|*?wfiJ?j%P2^|twO+{<3fzyO|t<2 zWB?iQuj*dQC+*)8DLp+7p4&SY+3W7=Tg%GsC79}U|KW%dN&`VBto^V-uL5`iAfg=n z71Qyw`%j||m zwf#Du-@>RU8cRoYf1mw(KE=#Rk`p(V6!*JY-)QgjfrvXqn6I2L>~LDZ-4|k%5_P8v z+eHhjG}o?`BZUQ!KMlG$ojm6fVaEZ~D0QKem-^@@u|Ba%fA_B&P}$`HOM-vf3AbiK?!pBm z+rf@GdfVV1*_Eb!3j1-%JgCHMF>9<|-d2{~P2?{5C-;7Kf zuuT7&o<2?&hPsP*>W804hreZ~f}gBCa7^3;&yoL9&8&FOKJqI5FS&}=@^dUMszFIL zG{E#ti{U2~U@>N=O>4a2qvyD8;F%h$_I;$ox>V=D)hk-K9J6lrH$Mx%poqX7z^dKW zHTyMTw$IL>@NgP#cre{RUCrIjcijLg0NT{^UU_71H6VI0xoF+lvbXLl9E|7$Z=TK~#UX64<) zJLjLHrsAuF=-;I9x0m$S8o+x5_sgv7yoH4V>5rbt`#pMn(%L;VeJIDDs-2LVDaoxu zMmqyaK|%=i0xN-_cfjVDD|Un&Ge)>Lr2N#=G;A#F9&H;hf$JZmn82pp{4!~tyONA? zEebv~mcVa!+6#j8pTQ0IEW zdT5UZgk|8CygO&_;2N4E$EXZe0@lb8QsCo2*gR}zR-jG&od%qQJ(_t7Ja?D%sYnO(s zV8!Fmh7^}VXi(`{?-5k>PVbK5lyBhF=eqLDJFI^heRqiGhXqt9KJzNr9D~Qj?m2_Bh z&6f;oXyhi`vLHC;pwXo4a&B{i5+MWu^Z5K$(a_k$_CRS}$>|je0sHl*CE@b{-+cdB zcM%-MJDysI z5n$B%0~RNY^|@A&Q`q0{!T}p&xm*1SWy*DUGo21FyuXdh@ZwP*8pwGgj4``3Z-HYX zG8K_%%D5?*jm|k7cxvs>Rg(gK%NW}FPA2;X7g+9JAj}iB)6qFZF?-Yq$v8y@krD)o zo7WWc06tw36LUb!Sh#zwIg_?3Q91M;MAYJg$c4AGe$vPd-DmwXF>)RLC*F zDbT>lNkJHz+TdVyX?)@hk9Qec)X+kn)Mq3UJ3ZSSz&&2wPxp{Tu6#7*%ZuLRLY^oL zAj}eBW(UF!;8;SAg8*?RKarqE!(8t@NHUAK*PHdNslUjH`gsYmn6NSpDp8x z7uRLN&XAACp)VgclB4m5B!~NTB|DD@FL6(l!6lw|dcZHt$Z0%iq=!$mP(ARxA!Sac zk?hj*Q|WdFR9dhKXuN=u_kM9w!K>vd|2PIcaQKoqeHplRg1h(%@bvIDUC~KlrfWi= z)dm*BXJhN71Vg7ALXP8AtJkq!Ly+0R$=U|$uk}fO2hExFS3?jgf(FAqmIShGFeY6I zoh^@I#bpMCKb-(~E@fj3&dH{0ENo~4s41jHjtC{KfuLSZt6xn!UpyvOfZlwn%q082 zguXPn-1rN4a|FBABZ2{7w$fkV{+;zu*xk)k&3KuPQ^QuGUxZQjXTAhHbEeqis0?GK zFycV1u^u!bP!&3oZ_-Iam3HwZ#R>anYZGSwUf|(NaF zYrcGO6;@Zl!u}wCV2S~*pF!mI2C|SOF(hP)hdm@FcaLucPK(Zpl}`+E@tW?~L%*I>;Ns{m=+N_3vfLpRvtEkNPQZLa$zO7){VE}{4z7$Ti5e)2R&lBj|osNb}foI}IH zr_CB?8KzbYL_!1Rxm`0L@AVsNp^PaIZ8l0v;Q z@8R|;X5CaRf!*Fgbg`rNmkyom7U*}{F8(jDmUSohg9buU=6U*8E}90gkU>KJBr8fJ zebL3)QdKBaaFR=l448PJ@9jLxGz~1Er+43?lTe42Z@d z5s*l!rZx6Q_oeU{$VDb3=IPoBCM@11Twx&MmM)(~>x=8h3$>0ABT*YS9cWQ|lu{pw!u{an3GZ zrpT~p7+o~MFe?G<=n=YR8{r4l&R6gq!an#kJ!n2guU|mqc+{+Tuvo>|DqphLvlWL& zS!McYcFRYSw9^QQx{vz>;DAlLU}YlQ77N#vI8Wq|>K%?kNecK4VEObM;R?wrcEm&r z+&HcO{5K|KnfYD261lJ9z>R0@AaCB$KkFG4REym$8x}dCiEhA|`zt-lm?mvtUDmCS zM1IW~ZQX7IdZ%g9By~@tpD(UnN05#+k1Ogt2zS4W12>OZKF|3KiJL??>M~g8CRk`c zQB(v07M7XA(f6T@*RW#S*@=aF;{_wJ&GclN1FZxv5q%r-jsU5zhAJb*f*a>WJI2qk z7A__~G#or(fhe*Lei}V9eC~D-I~(+5fyol%ureu1E|koq$HbWAsll(*13j!GJKHm4 zkfgzcs;?Imd$aD#_3hDF7+^!DYECON zxA+oqQI`=3xIk~71w%|y>z;4>o0Mz`+KHsg2hW_o;9{1>)RhGJPW$UO zv6X-|VW5LvV>eG!{63=+yEMjB(5_heD)$T|g{Ep;c&f|pq@43zE2_!<$_D5*))%w= zx{lQzEe^6Lw^1PW2dWV8;C=(isfXH2i~?Tgvnp&{AzIDXLFf#8bSjR#_%J2i-)7C; zBe1AFm0zGX>;4BC$_k!0SkHc3h%s-eE-r`IELf7aTa;?-sUXqzkXZtO(&(isNP-Ly zd08#<9=X4l%|6MWQZ~*gVC+&zu;P4;i(Yy}C`J4l|5w^!ouiR+ByoO|RlE;1mx(Y* zxxf@pW9$zBnR(FU?A7NPL1ovtm0nt29R0#-j|q=O&Sn@~RHAOoK8yyx1G`067X%?Z zYJ8kSMBh!soIZ6WmT&K)?;*c{BIp8r@Ytpd2|Lry<)$f_8V&=NX^xLRjduVLDW;4I z3fR)>%SVe-9|_-W%`VTB{(gGfjZ4%`POM|6Fw0F8g$K$D7!LvD*%htOKbwajgV-|_ zqRVv>p&=LaQhJ@k(9tcJ$fgC9Sa`&Wh$Y}7wFFEptz!x=gIgvhvlNR9{iPWe3=9W5k8#&Ksm!2eipJrpPOuNYdjTkcA)=J;Uu)q zjfSP7<4c;$3*;<2>jNQ0lCVp~FTTQ~!HN6UqWzLR)4MR~Qp!fG)f0se{)I(eh}IoQ z4O790SBQo$vUC?w$x9vhIT!>W_zCz4{3L`)kCd3E%#DDUsfT0|@JXY$5izbSlQa** z2G7E5gV?jX^}gQA??4C-J9fONo;@^5@_mbRYtfTbQr!?JcbPa{Zb@5=<7uB&jQE9@ zrXu;?o(=+39BwVk>;x9P$E|0p++Cj@+**vGy{iP>^yO$6gc&cLcmQJ<#^H^h09dD* z`o?%2Z17QEK{74~me84P%xfVsxtWQQ96G^nhP6bis*g!@7d_^NjZSzPm|*|v`GXfE zYpxuFM8BMCiiGp`>k=TT_XnzUU)2Hu1QZ2JC*c###wW9d^hV5q0LuFl8F5r!SWX_O ziHMnOf#7mRL(M{=dkEdJIwM)k?CxV8EWoEbcvcC!?PM*$i~o5hV?*O0L0-774trKN z4Mf^_#8E%hc>Z5z%7uTTurjZM@NC*@lmB)-&@;+;oix2tW>BKrwtc-6?gDi!4l%_#nfcV+<}!~<=_(a+=1UzD;QpoY1kGEUFYCx#Wo zfm{7JbpE-8mAG{~!Z;y<13tccbY$`boD~Iq?;h5Wx=Znd*68OwN@h#d-#lwt_H(Wy z@s8Hd+Sd%Kh#ag@A{@RBn|49Awd$M&ncB%|9{Bi)6q0WB}@=@{bm`?k%N zwoBgHrT1QNAkKPjbac~3zFl2!m0w6~&Xs|!MZ9?C@q)B~hVWzGF9^XZVQ6|J};%6GN)w$+p%eJXnnv)i42LHEQ@R5Aum+Y~|M%4f6;I{ftqb>)^R0v@9D z=S)o^=A~QTk9L3fd)fO*32j#IPGjY=4sBdUVfO)HVzyGaq$Az%vnW%hJ|}6|-!l&X zv-1w$+oLbNg_DJ%l=-}k{bf(ah}c{V*2O1$ksd|C!JfmQR)4I#gDT!diL$GbcXKb% zyZV%hC756hkEZLOgQyot=!r3qqu9@v~DpHxW=(}E+s%h0&0mwKzwi$66(H~rMbXfgHbrA4!f#n#C5Iid^9m|b*7gX&Wh zX-yoOE0r_9ZW49y`y~?!{VS%GTGI-$8#K!f8AzFmjh6sZ1evh!3@ML*nUJs~`BJ z3g>bq;UEeI51YjvVmTu6k$~R-FAmw7a1sYiN5p!dDze<%GNBJX9KFPRm;yqVjgu`f z4qgA`sEl;lI4JL^uG)%uzW!M{;r7~qxg@W^QtQFpdE=}Kfztv>&D}yLx!Fjr@Rv@i zCVSBVU0|`jSMxU<#j8wN#`33z_VB(W?P3%aO9vd)#w=joIz|K)!`-=Fd-2n_wL|%+ zt{)bS*r!2PQiy^CA>pFCtG|a~s<7#w;w-IIFNxeStv-pvvPPiAM4MUKLvb^X&c#op zvH98{;O;Y94MY%>4?mng>BxT1Xo-Zrd;aEs%ZbuyyB+jDu*t&CSjSRViak9zY)U;( zCGna5y6(@D1{R~nv0BU`!q~*K;*52zD6zWSNlZpXABgzm()n&wBGKyA3$1o;b`WKh zZ9k;QD$U0*$IjMu=NCO)j0xL-8bMr?Pw8JJz8=;-mDep3b*X`X1Js3pszIbO{UJ-< zIY@&{J1C+JqhLuLNCmJd^;Mhu;xSm0#zkXdh)|~?IwB>)EGoMY%UY&B>SX%u%8U{V zBd@sCbrW8C8B_*Qa4k?*YA};))AMjmtEA@KvfZv)q}uvawW=*UN5cHUUYG7FnYmA! zKvcjyno+0kots6*RNnx&Zx}a_CP0H2(FUvfzjE{@GqDZK$SM%=j;LjfBG|P-+@Jbg zP@;5ttLD;yu$T~&?Z}esGVJIeqeA>2F~)sQDfc_vFw|X>_s$x9MLFcTpu6|teiF_P zPv}PEOw_Mg?A8j=xsBnwz<4Bpetngxx}w}G-GneaUQ&~^dzhmi{C14ESY>do5l{qB zyD{yGpVaOP``2+(V(0i*l2;7|1_0S<_lM~Yr8baECf|IS$BOA$(2X5Z*(%O;jGU`^ zAn6#wFz+s?yM9uTFq#%#q+w+(Hdw@MO*(&f;g7`|2B0?-v3kTol*R}s8Z1K79p z<%uuHzU3WbkkvP0U1VKM^_oJOr*6$@8LWS%Zgq+(F-qBSpj(0Eb9^=ml7Q81wUKH= zvX;HkXOy3cL>@_JkQ-Vgp+KN;HvX#OgpsQwsK%1*hMCtfdX8=WRU4sNmuHx4B=ET9 zg1@?QA(tUuI~`J9MNV7D%M~MPL+1x8cSG4G|FUiQa zxeMLWHCJe8T;!%vG6jRCt4QsESu|%^NP%cN!RYy%d|ebHx#KVg+8l|(hr=dn(bbmH;*9CK$OioF; zQe+#37`!@F=O;MUZ^zIPgjo6mLu1gg`>|?f+ddBSXuy^-GNZRkV| zOw*P!tJ!7?-4yMZmniaK7^m${k9vekO*b=5bswV!143eviqe*?zKSu88sAUl$ent| zV56y@tn+m0ILgD{cx_ZN6zMA;Utp1Imw^yN`o$>>EyhxC!IHJp&Y}JyV)6&mijeBmx^BIDvfEybe;!bPY z-gFTD6dN(PE`yRNT}d>S+0YmKo+#cAGp`i1m8?#MNh_)S%JR#6>`Wx1QNT~SbUzz{ zCL7SSxyeVqTCcz}A?8_7{I!QpjuxS6Ob!~XMPKG7UxDD~UUY$22zE7qhNZp1!eqaU zKd)oZYJ-Rv{5mt-I5$YOuuw`rXILP@TE|gsY4vXd7u`~}{o?s4Uj4UzyuiO5jS#JO z#HVMUkc;_fD!_wEW+D z3Q4TKsqYEQ)F^p1GSf*-eExrTSc(x}*0Z;;k_kYq|5+mbf#H0o3*J8!Gdy~GHDlw{ z;+`tDL{>r0PUkHn=n9kR!Rgw^`=bJh$)5&mO*Nm|N2KRKbbr_Ya)xLkv*wn0mB9`Q z$4`$Rk8qGLx%t7Q=^LbuAuY?E$Zj`-hQ-Fn@ag0L0i_T-RbAJ(7t%g*QyXY8&_0=j z4L6W=s+D1w=s5BdH^ApT)pl;D_NL&bq^=__CQM)e+eRi^Fr< zatS!(H#K^jzUtCDK*>8N28u!Rr!k;%UlbLyooM~rNVn+PXv4O(7sx>Wo$&99IwNx2 zqb^3%ybqqSuN$jkaN3sxI8l{cm{x?l_Y9g>V1WLrg^%O1U8<_tKT1B{4_!f7YhU7< z8=9M7xNDA?s8a0^60{iXz>h6dk1$ihbGrVwa%_%zj0{=1x0-XiuU8@@PA4Z5x`3C? z8>xxAYaS_s7q-Ym#EwY#O~A_=!Eu^;(RtMn573t7qq6*$Z=9Shn|4WtT= zhlr?SRghk8mNx560&)p@7sXT4qfdLg z34CW(Mj{oFmN^~TomVFOr94h-)JvtMxr)(U3!kjuY!7mQPZp~-0A0dYLV1yN-NI+f z(KZcj9Bl(&q`KVT9cv%vzGwRG^0VSj9K`zgW-=0rEu8io?wYS&AZK!UM}j=urqb-x zm|&8@*_I^0SXX^OQROip4aniZo&4AH4}s$HTz?ks%w0nV+oc`n!Ro~Ww?k{|;_tke z)l2gePULA`o7b`5!>uW^Tk>{ZI9PM9SxlOl^cu?Hdos{G7hjEh&;+fN7EY@mH%AY7 zy=kaHxw=Z8F?vV`Bg03&U8p$P2&@uXl$@>qjuLh;_udr`RNR(d+33m%)0V~i$&P|C zf42`J)0~pX&;R3QWn-MVLv?vx?w}45LX3ed%Ha=0R(b;j)o~0tO9f`R68luZ$SI>SdL^~QXK7%klcHDf}v$T`?*y&;Vxp3YR+C2DM0W?NQAs+Sj z43_~(GIOA=P5s05M%v;PYdZ<&iRuR``5_U_e+ZPiilI!se z_a6$$H}CJG;TfwiWF`;jgQ+<>zjJfGU&N~IZM!zkXy3On<0YP!`Dw=`;9!UY*<9Se zys@BEic9l&hZf_rp`@UKor<)W7?-KZ9S!AJzci2gp0Y61@YQ`~{1OkN+ z%YotBQn>SDPG>|{=Q}AGao=;XSF5gFszSn|-@?e&&7Fy1v8Kn%PTUXXN*4LmXmkg> zCgw^$`f4mhwZF-eFQ9}Oe;pkYuFsAp4z9MRRLn&~Ea8Zx8$2~!MQ-;V|JE}`+b8C? z`pV_Z_xx7;EX{HkL7zdUP#MTXcwWX>U55|_p?HFsy@4?m)>(fakP~8(sB2hfCKgVs zdp0uDtO5$P!D~tA{qq{{o2&4J%K5viq#w)8r#1!biuyz4a}M!vLqla(6=VcV)e;pI zRI4tk2na4ChDs_bC)aiVpE%D}FkkK}-_n zSAF$qF8wIkOjoWbwgsnoR^ENX=bB{PXXeodrTf6aN<@Wk#0Vax4&1 zEWSpev;B@G(z+W9_qs3QJ_V-KYeH1OEEDQhjz76T`IgGd{SqlqbL+nLwk| zMUPdjjJ#PK@Q0Fxoe3CaF5P7fHIFWj_nDRjG&=VAizn=xXJ^nB4avYmcmXXlBbhAH zv?vBBZM_LUW&KXke%+s}j&bI1iETVpE{BMlTHeDR7Cm{!2DrHUF(RLi?gBM2b^epzU<3vRMoSWqPLI>XlUqt$8xXZR!7KcDpw+<()i@1r3e;M6Z; zNh58X9S!3|D_JB*?HY{yXtg^u z4btY522u*H^0bP;x($+Vq4QKj-e1Y8DKc2a`@5aNvW=hJN^hK!G>z15JIM5LU#UF$ z5`aoyz^mY$eSgkmf77vNh?fZcEpQyC2a^(FQR2shJNI!c#2voMAUl4~jKOMMw^0}z zWQ~eYi>xJ51Y{PQ2rHzkkzOum^Lp1F!@^<`Su{`s_fAZf(##{1N)3+DrpkJoMv1}7JN7D)8 zfP{D9<4LM((jzKnzNu>TJ?njU*{;x|cP#{0AIiweLf7GPA51$!^~Yieu)t;M`kLXz z+9(4Zo-FJFh(4Ub+8r$&m$WNd$kJu9NdmBe!Ao}4<|d34V@EJ@6Q{YP)k{HZYi5R3 zF|u;NM=8@m2V<1>VJMz2M~<+xW42c{;r6uNk%Ma|KM6Dd*u+5`S$kN@@re`25L z?txqpjJe5FS?Pbq$Rice29Cf|jfkQWSv!84Xib7trUddTeowz8$|NKiNIPw1>pX|O zS@(vcb`k(?Z?cw#sGx9_67|%^E77rw{4tH>lPeKL&Sdx=yQCaAMv7((hQYGj1S8>F zOIo{Q#2S{`lX zseX~{(C-ynP=DwpC7`g`FT%GNBvU!z=VfYJ6t7s2TP9ldMOju}D|UED{Fdfvb|XGS zWjaQ8tX0)?{}Z+`F}~I+#3TUiSAc{1DnWNm&13S9Ltu|wgC1L`liS$;KbQjiQ#s!MFa(ve?ZaRq2~JABfV0Xku69J z$|O-$sBpPp_)@uaF*FZ){fj8B)|b#~dmErd*nry^8nEmapGWc;hzu5Z6UM7qebTG;> z>Y!l5GI?84N8NRZ%-efbDH&|?ULcRT5X-B&bY{6Hsxbsw1}Xs&VF-a^GC5>DGC^FW zT-+nAFVg(sG&8IvCYs1rEW?Z<3F?^P1rJB|D0H;z%0a>)FX1e_&5!;GwV zYH>ZoTDj@3z>HgFEgd#vem1HZSi1<@`9fQ_8g5st;(RQ?~~gShU`v-#FM0*eaoR&TDS%G3`=CDk=Iv=Hajtvb@uh3_(EXQYQ}#Gja}af zfkrSF-+F5ja&1}B54}3L$Ccca+LS2&Bh9d zJnfpRJf}T9^+J**w`mz8l?r3zdt8AQN?qN0l0Fxe$gfR#0}j4V8RB5}92hHPH&@{| zQvkjIQ2&dYT0p;1PIg~$(LSqMxZwpNU2xH`hFzojG`|3Tj{N7;wTY^gU%Mr)dglnK z(WgPpc{DzdG6}ny`C`hISjjxM*capBLTxsjb=24Ca2GrPAxZJr;Us1-V~&|u5O6MC zKCVZ&b_>VMxEZUc-#uhIx%Q1hcd9QjT1xem_je)uqhkw#dL)oXy!)E=^S}L6&Y-c~ z7QQ009BS0QRXnTp3a7xrb^-OfW$yJa^O2r0_a;VlT%Q23fF@GV2HI4c{t_5$mV2&a zpfiP$K{BN86H?5ZGgemfM5z_I;jqr+K~#JCmwgLP?j!s#`=-q9B=L_6uKdc>_a$=1 zG5p+TE8I~$F?ackFP|+j?XLXDpR)D4v^pqJ-2`8s&;^k;O)RseS8x3hjZa{Lxzi(& zGe!l+_2Q6hb20V|oJvEfT|Ce8&~qPmzx@T2*g!2MwQM{;Av_#icJzmKB5s-i-ZaZ) zz?t1XxC0x&dwAMAA}|Lz>JbX?YT(dgs3To)dO|dd(M}k|_vn4xCW>oaA&j?Y8m%gQfP>czN44+oR}!o|THH!yJy+=x8xurzVDrw!Jf zZjLY!-!R6 zcE|!IF&4(N$*a(KJNMRPw>CDqEBG$v;Qi}qvQvV3aB}@80Zu43G7CaI)aA5F29lkx zu|fm&jwgXx7#3&R4%K)7|jh7Pr; z^>!0HfRLDY@osq}n9iDXsI(z8jcS$kZ2-R>Yn^i%4nUf$#$-z+!+_yY04HVeeAph; z&fnqgQn`oN;DtgT*Z-GWYc160p#L+k_y$FO=$`rqHM3%ti3f1zKVaRicZPA%vL*D= z=aKMW#B<&olkeH*2aAdn`)6-QSXc-i;7?|yONj%n@!njv!uj~wP1id_!k^<2f|O!W zjBn^a!A5^a5vud&7y97`>+j|VYZc^Gdao3PWIS6x0rBuR)AWOrs;jxLWBImfbiVSW zmVuzifV&rFxef_xRu%5~<*=1PEqM{3(DQIr`2A&XNzQ8`lCk&ErgdD80vXW<> zg&ub33r~O{oN?@gZ{I$PEj6q%3?LHT3Gb|@*l95$!zU5~D4eXu048i|>KVaa2mlWN zF9bj)@tbetPweRUnp%!nYj_0^_7E3@fU6QQe<^XR-PwpBcqAC~rck2RFd^+!Y&nugcEv0f^E*0SXX|>kAaKP@O>6=ZytYhBC^^qo1)D%4p z*71-&ZF^<>RM{B8xB*E)!4)z72*o0;5Ehbdh94y1zuY z5aebM;3B(5<)y#qQ$Qg|6)ed=YI4^s#p64k-Fcoi?Ij1XY}A{r*ug#_wmJl7yB0<{3BN3c8@-RcU&sEPH-d0xTauJO zDFBnMro3T(CqezzW5j81Y(I4%L?z;NKe$~my8k!THLXw>$Yp-~NWzfm8kq{`f^Cmv z8EE)wmGgQwB>c8$^aBz^1JDP{^{g3eRZ|))bH)O57J)aFc*Dj{@9NT?HisH1eBFB?SQoOScO^4<~ldMWv5wU@0oXp&;-+Pix zH)5Cy*5h{6JR!UYfH@~Adlw(pQ`R+B3AJ0KkP%V!{q?x9^JY@cu)%C)Ukf#{Z7nq4SjHlG7-31p?ZvHM?2vW#z*bZuAjYfw@#N~eHH#7)&&ry6!}Aeh2FvRr_u)n@c| z`f;qsv*x^`&}h);v(R23CJ4qv$;i^>e$NKL#rpW`e|%l=fuz4FMU=d1b}K7 z5TA7V$84k}5Rj%DXR{ppoX*qXaQNi<-*$CC6* zIpiqoP)Mp4?6CXms}FZH>RpPrrR9hZ;Dyy?-^ThL50-74|7w7(J^= zYSU6y7s?^~ml?}ft+g7bUAc{?OFSTQnA=Ar1DAXIZ(?syr&&J z5@?YT3J+RqJR|XYIZEIM+(4zu2szOf8-3iikXpoXt)>1TS*|@`BGdZ%3Bx8}gD(DlpUmX5nn@ zdyhEJ99aAx53?QkQBO7+k7LGmel(&cmXA?D^~D9e<*#h${nVlbMnlj5In(6Unl-dK zdqx#J*2^rcA|rD0i&Y%dmo!d5C_{}mLk0lD*?ukAZCDP z`@KVz-*a{a;ge*jPuANTMns<}q%<*2wA9GGGCmN6{*S&;-Gecur-g83Y2oAW%C}hm z{jLEiKoo@{OYMi9?bkyMmUEk*eM15%0$8A&s~a0V?-v&rL#C!w>VM>u^5{2Ged=eG z$0{$?XmsA9T}(YQUPvlzuDx|y=cy^#Z0;*1wU^2?+!eEY3xiHQ*3>lR!udrRO`+Jo zj|>0i+BK zkDLYDS0BeUu9s7AJ^*x?-!ys$4bVCQ$J}F##VII4af3Il4FGEgPo` zj_q$*JPsL@-7}+$NSoy!lLvi&SBO>UOFYW_(rz;#HPaUX*Sc$5Kr>A{KGsSG| z?16GA%%3Cjkupv2x@#E)JWm+=B|~u5(%3$C4-=95GuM!hFxk|r-Zv9YR#?Aga!1|W zo}?3<3j3Nqc>|Ql9Q8ie`H4f0di-Vszzn!FfHBSjXs?eD49TqD1RNTYkylcph#>!I z62^*@S0A#&>veTuv+6f;dkc(${5dyL1EZ4_j6!eF6VXbjenQ|b$I)DOn?%0qVg0AHr^}*NEr=Vag@ytel*Q&D-A*XRZYVE=TEp`r$86 zM1KuQ;;R3byu6Rt{x*Vows@nJ<6}>ZaH5J9yko#ApC%9Z%ckm3T?h;?5H+JD zO1d84;@~J(XdWA<>82gMK0ba&LPqA&Nmaq>s%0eNGdM#^i zt+4)m?5D=7*XtxcY(hTNt;%8cD0k=-;=n_iG396RABsWmeJp$ZqX3B(-b-bq1d?YO zC4)~dAHm92!OANxhU-r)y2)ona&cWA5akmoIsAL;npC@m^}JI!2{&*I5%$ke*P!X= z@Tz<0nOi9HBYjL327EsrFr9{uxW)1Mw}p=IwC=%7BxN5sWa~?TGRri}E`E@L#Z;(T zGTtH&p-Z2{DkvBYw)cmQ4RZ_n!i$?B9iml(kpcAv#70)Mo|;9}6(%k>MmS>2prz}H zIV~#h0P^iDN=|eY`?2)-Dxwq5_E-J^IRA^2G^ntd$lrge!O~g>rtKR(b;M?8XD7$G z4!E%L@Pu}Cea#g5(K5v4Ndz}A0oSxMiI=$zSQQm!ES|(Z*=NZX!BTI5HaMp7LY^7Y)`9B9RWk4W(R%E&A ztw#IAx)2KQ5)kHQf@e4?R)?N=w2^EPkY=KzX^UKR5jgNDC>*k%toQ zI3HlP4wB1e-g9kcZm0FP)4|CW@b!+H&B8h2mo&S3iNo!CtwjDb%W_+yG}Oi3viBqo zoVs1wqi>PJzLn1z~}B{h!yJ*@nVFHENIvwmG%(ep4Ai6N?|>4%%E%!W4Ct2 z6%jC)T9jB6fAB}_nH%}X@$-Nq^aYf(iV3DmX)z`(m63Vz-M!V_FTt3pf}VKSDT-Z2O<{vZ*8dpB9lka~F0xf2 zB@aC%I|S9+=nO74Fdo|L*L^RTBP$iUdALLXhGH5MNLP{8fQbl#ti=F($crG`dT*r; zXJQfVRt9=lk!Ei4$>9HCEL=}=_UEe8Tg+RT zT^w8t;j6#=u>cnS$SvyWCXR%)B_KWX^@(=tVHybxCucn_dZm}MSb{tS&+%xJzA2%g zl9Ny#SPHlAe%0`cO-?3hDG;36PLsHAmkv6!d-CQ)T+TrlNA3fwO#j;KL4Zj~eEt?OR_8lF# z`tf60N4UcB4f-qa?d`1`UstFd9*`~IU9uDnyNUHQ#nA>c{7nz!n3gMcaNOv5q`(cM zivBM*mAS!!0wIVS;I#hU5FNDB^BU|2Sou6dS@nHr$VfO{mT{ESrh?%s8kU-~clN?J z$L1a0YKK+}@2OdR7~BI<`&CwiDo%LtQM`z%ijcHqBR(_WD)e~OSJu|Cw+v6QhBaA} z82Ct}UIYM;)_nY5028P+tV5el{{jQ8_qG3Uki4#O@gngNJpTC`ZAtj;T{;xgauC;|eynyEN+i`V}hS>=xZ) zOmxOVG;W{5g$#aj zVPQf2-xwa$g2!b~u~L4?VE?SjPd|<)Ts}%Kj%CmAL)`Cb_Q}v}us|t>+5NKYH@9 z0@7T1?xw~XEN%<9os0xHz_%u*RbcU^MCwW2NsV?JnV~-^AqT0;qO?`-;YylwIdQ7(hB3DBJAMH1iwHbJN`NCYi|dy|Tsx@Qh)s z`|D$3e^|O8(R<4l3)wVlZPK9o3)pqt)*>+Ko|aX$d9!qc#EGjU*e_2A>3>F6jP3V4`5=84Je;7iv|1Fu{)D$ziN<^tE&2yz?u zb@&1m=>@?i+Ey0(YEfWM$mahBYiztVjsJj{(gID~2$OY$kSte73DK}a2LDTWNJuv#IY);GgHh7;j_>c?U!TA3J-hcg&pqck=U#s3ZJ|c4dq9#408T~< zzgu)e1xZvUW)cM*kYb(Cv2K?`6F;T9d&I^B2t14Vbp(Ch4H*Imbnj1_rZ?POOpGHF zDKOOVB~_OQdN-msFFYqEDKafXVhO?>xVVv_oT7D*y|$a~34Z+Bo1$XZC~{B5 zJ3|di#7EdhT-)u{Rtg5`IS@K0y-(4h4y9cLSURkvsQ-sFM%4nEOQ~Of;m9z%0@DTK z_XRpUOMTpbTB0G@V9nY=Oba58hvbVFLWP3zqY_N+4_jz{L+yK2&L0K8pZaK#X?=|K zmJhj?l)$dtbr zcR#P6qw$Pd#U`t-C-K=mi`F$&_pz??2`n9ZgkMc3s;PGi|H1TtfM2w!P3vbJ+68HJ zO1cn#-GwKQY2tjhPPbDGtpdI#pkI5{mt%IU(%W|%neurQKTad>Ecp}a52YyyOcgz< zDC0PtYHSBzGG@C^U~9x%c?5e|Z^^(jji3L7h$Y;v#YFz>xCO9fe^Iyu_8Cy2&9whn z*3tnA;iJ-p7#r6Vy+J?q%-$aYU%E9xJ;^%mm5%U(Zo52;nhw?% zgf`|6ybbO7`e0inDqIMu5wXieBS|FH?h0Lz%g=e7;??y}(C_k5?4`S)pE`*k<@mW+{qS z-1t8{Nx$2r3Muov{5C%5;yE;tPZM9X@GHMFQBG_k_oC&!!OX}xVillRYPfPablrwG zJTltgc_YKeogVH35B^>54vYH|wH)N;Ze8fW5n7CkZ2yZyoKh%5*vm{8Bok2O{^K1T z3J(P)MBTL+TE8E>Z(QblHbWv)rSpCx=1~-;@uxIXI;74W7JcF4(xMcwaE##x5PK)C zeFeSrzg6_edqpN1s1xxN76c`V4eQtzX$3VOJFlmRiD^N2knhcXbfhaQ)fY^p8ue;E zYQAHjrR^@t%tTCExZEV5e^lENRr+nqNWfJt0&*teezldnZA=%Ma0p zbND&#e!8=EQ9pQ(PZJc`ATgh`rsrOn44|)0=1~JvJ9dkCuA?VIF#^t0z+}}s`i@Vx z2?B!Vh4bPg4jWM91sm-#%cuJpbaZ#V^*?<{s!_(yDuQ(-Fs6aXJ!Rp7qFK+W{ zUKL%}vAP|xE@2a&LeW@gTC@u&Gh$!+QZzGg)GD^^09BF(A`d<>OqsmbY&`dv`!CN z&K&n7Zuo>hJJB~I3NX^ePTrsY?y_RNi98z->aFIx6^AkSZjc#X_}9QQ7wK8Nl0qcr${>x!b?5lZ0 z{i{s8r?PUsCE|GhGPe9d%-?IKKg(+mWw>KI`%F{IM3&6fgmc$HRX2H=hky*_RrgX<{)uoeep9t-Ks86NO1Oh3bzd; zUM|u{2fMT-E*NHKLJL<4Vn>dBIK_^+ndpMY7Td47y++8%_uxwIAS z`D5Q!Ti+Wrwzw%wMxgns26KEjcd?$vxwN-we~m;qhJjBYl-G-+j=C{#ed#;_ld-&} zEh#_|J|V6tH0IJu#e)n_D|Rv-r^UR%JodbXs(@(-2)B-YtKTFVgdnv_RQ68JlV3z~ z3Ku1WFkG%_-wm$%gh&dMNk>7EGU(l7IIR*JCCv;UF>r8H{74@-=250Ughsw3?=abfOI#t!@oy>4d>}8LX;?sE{gb zMCn*4Sx{wD@0Ea@ z`oDDI5E^a140EFh=sKt#+Lq=8*to5zM9iKY{4n_0y-G^-Exfe+dC{eFNTwQ>E>$E> zo%oEq&B5ajcK+L?z<}c7z1@Shbb3R0K)gRYk!{J1&XkZ4H2RRm{> zeCwMG1SbCx5%Xfenb)u{Sc@+;f67Idw>|4fy-T<{pQhgVs3>mdV)GJ}Ou2OL|CfdJ z5zlg!1LA#>3`3S12>vK)oKS69ls%cmW9TunoOVu*t*ckOufVe+J%1&vXk>XQan>Fw ze=eiyvcn*r^+7Ec?msnBGo5%FHTDUMBRw*j5kAlVRZn;_5VveDH$KMB=8w}7gqv&~ z(Z`nS@V3)i7EjPo5%Lj#4=Ur~Be21h;u^>AIR9`Me&&6tYN^KP9|Ln^j%F-$&$!y+ z>Jp5+ik;PEH_P+eufCjI~XM(=o_mlylNa?Sb)MV>WNJQuSP;a z?&-Qw(JH6^Qq{k|yM{O)Nsm6&I`+KIk=!2dA)?DM-Peo{8 zV6s@={xeSMiC>l(^xn*oDo09%(Bt-$U{9`WLMpW)+39#bU~w7?oSrR{?#sfl@#|68 z{;wCY>+~_(EDobVG04U2r9l<>5+@@%)zVZ`18&Ym>s#tnfi8&>hi0Fe3V2Fs7#MuI zJJ54XW$hsj%gz|Y^3o5TN%s*K1O&0dokYDaCDY~4GPV_H40L(Y;)rh^8(MmLQYjhm z>{iRsixhOH5^&TO@c4%v1%|$%U^;X^=8G*Ck%qw+uYM$EN_tH*LtSXi+W*~sP)1gy z7pgxgLt0sTil$cy>q>T$!m(~wrPt-tHuVnX=uqBi&aP^*=#6C62<3u(GvkE({r!2c z{AF)UYs=7t!|bq|UZtTF?0|y@xc=)$m4&@1LASLC!wfEo^@Pr_5sfQGO)kOmV{EAS z4D6X@mq%4Vyzn71rbny%78kN@7X|GYfF8rjkd32ngraC9;!$IQ7>(GZe&wdtQo8$- z(5S$cmpD*CBl}v@LO!al)S-7qg_2vlc)LHcp&MkddJI{`FgMsP%5_4VP97G9>zc+F zPGUUZrfLm?_t(_uf~lsBjD#yV2nq@ciTbKdf!c7x#((4D>gw7x)-Oj}_o=V{xn0ND zipD!-K|&_Ia#cCP5Ex^4Bm(o{0~2@h@aJ;+*nFN6?x=<43&(dfs>{gy^8U3%94&r< z)1KG^4m)I)$(j0K&7B8(tPSWLiG~qqi|=+N{Lkc?6JHuIvXiFRDMhs#_4_xtn)G=P z4q#$yafleL@ti?0fDmt8}t-&*U`+&WHOsdt2jIw5@_wBG&P7r<$xp zisrTn!s&I%otKiDH_r^9;JFq4jc+n?OCJTP$&6e9 zE@8&b8I^jG7UlunlasOyYJ@~YeZD7e+WRDe_arikQLxxnA-#}S4Azn{#)$WdY-`i- z%;sxSAE>$2-&xxAsEt2rM**R4(h++2ueL~Deh6s6zvTRU4E3JIv-%YC7gsAf%hFU@ zPu;vkbmV~FMMb%$njTtoc68j2E!Pv+ZikEIKkC-&>t~R8Y43=;N-WZT8WGq2z4x+{ zZQM9)rtOsadadt9K_K~Wg%S{VpS}f@!j{STG~$&(ntj)w(UGCK?+2_tZsx`MvmALE zsr0mV%LC=Ld!ASRt;^7*UA)K6=^T>(9gW?MNbq&mAIn`5w`L~bg zrX8IbFRfg;k=9KoaNudehXnlUdV|>`rLX160#6kD9mBTwc=g$T{Ps^6LKZiA>~1tB z4E3MB3B1Y<9zM~^!M={(2HD@$$tp%oZE}?3)BM4DtWHogd0g4hfS4Y{al_I`&H_X0 z&3+85Z>WvH)4z((nR-vO5}EaA|C>GKG|LP8dJFoX7_VW;>%8@Fe5#dMDC0wgiMlpZ zO~>-VbNGn+^wvdT3D?+?q|lJMI)Fi5F2RcSqk7Rb;d*QswRS2U1GkLyOYjqoFUI!v zfAHil&cZ6W3 z?v$tH?(uA!4~f)vi^|J$yH#dpB`9!V%K@>4{6Zf@S@Q7r8(fOG;@`%fE9nI!j0_Aa z@R09{O&v}U@x3J5{PIvdODJ(;<7}_}@okOFsz>Ugw2m%PkNYIyG& za}A2<`ZsA%j4-$L-x7_i1eg5-c(wE>e;J*aI{IT+Q-^YF!WuybXOz&eH_);fR#7{JbFf?%aW>tO8B0PcF5~h zKbs|bjd`C%v7)m3DXTPfskZ&&%ig4CG<(NHPe{1SCi;qY8Gh+jYwJucBgW+& znaC^Qi{jM-Ns`(jywj{u4=VCstVcdOy4TbMuDL%NtFq491thOWk?^1e62n72Pa00A)EZx*|C}sJF#|Ut^l$Ylzpp? zjTNjZb8*n^Nfs2bVB5Ul>97}fDnYQ<#Hs)Up-Aw)QHK?hwInv$vR3LFubyJkH$sEA z1HTe!X&Fo*7&*GM#26al&NXVf)-rYD+Hbz@y1T3}gtI&RWTPMYiZN^RPcxNpvzl=g z?dm&QIdc;h7Jn{I&q)(U8Lupp^Z~c)Al_*alrOmrE%%|(DfLBJw870ahvn*vPE072(pp^Bg{q z!cq``_yvy4m0Jfsxz8l_7vJ4%))LL^?k9VDaJeh?#49!UA~D#`%Zrdd&;DSpCGZFx z{2Q+TP-JIf3|9g(?KbvShf|rj;tfPWPEw!D2<=&((;z0eGMNf>%J;;L?Dv1SI+y9A zTFC{x0?oi# z!v5|Kt1e;HB0Y}OmkfPh^!cpNPf;HSn)b!nxkrW1B2R=hY6(=t8if*k!8mHl{dBLZ z<6FgOhSdJ$U5}Hd>wzji&zq-B^(D4MDvk{d@H{y?Xi!akTU#5-G5PD0oQtDk?#}H% zz%#dOVN-=~bZ>91Ng;n#Ugh7{noB10Qb|{pD;1#6N6pa3nx>YlRU$$bu2-g5rr&u8 z=HlX#488{2*w`?ZIGPw5y?4#B-|(@r1UdpiW#xQ5m0%>od<|PUBrrUZq*{!7?QD8R z!tbANkLl&nl=XbWp?BPw+I;5QE0<$ufb!!I>w4{sd6Gy}zPM_$D(|Ln(Q%nCr!jHI2^KoG%;Ay@(C(wO+Rzx(DZ2fI!pj_grWR4^(%xA$bTAy!< zmllLc#80x4pffMgD4Afaj@6tp{3PVQI!K6{{FxkbuwzMB5CoS1f^%|Rt12qivNazY zxk^(w-`lmiZ@+;Lh{Y}Zuj1fu%8jbLyg0^8hd+#1m%ipQuTo*76d1C$u?aui7)u?n zmDbrNv^y2ntWE76uocavS4;lx^yJap>A#t|wrdf7?TwmzkqXDy_!0aQ`%k`yHy64! zuivaa()9Ua=7VjPW0~=VSJZ4`i%Fwrzj5iQ;rK3X=x&G*_=JALd`xZwiT3O7+W!3jWJ zJ45grO4C-e@FOlbbWmkW%^`-H&B&&y74_TKLJ*>4s^b$oeoSIwVrFfDnYi{tBU}qo z0S{3AI!@mCF*t9+-XU1GOdr#vjE3+`V^MJ7BeAqIR-T#lPM;>&*9c9rS zL z+#CtuZv?=EG*ow5QSCOyps!p-s+r>l-Qaq#2G7r>ciV5|M(k5FGwBd!O`XxqLH&b+ zYNudOaN^Rv13~KBHcg;-35_^e*H_6>6B! z>bvUnWLN<`K!mK%zF)Tdjx6RPA|Dyo_3xOZJ6EA;@rBy06iCviO5*?fY4zmukjaKn z2jFS^(!RiwrX`Pi0^wz$4rho3KcJ8{AH9WV+tT-}tyFA|{4M?(F{uKq@i9#P7XD)y zb$RT+HX6)*j|s}A`|QX879?h$e;wvgx>p>9h>)q4Q1l9X6TxE&;lvK%2zq$PEev>ibIines>A(OnPwwh%*PYkxd@^w*)Ah_)#Ycdfmy1 znOsgsulCO_H$Q0B_kVu2aHNH&pXjg?wfw=GHVey?S75Zj6tqBLMuG2-l^a{+5B%@g zAiIG~F*(qoUNpA7DrJf)|8|$`BJ}+813{rGp~402*;e$sB92>K=v+$G3#58%Ki$Qt z`Va4~)^6pFnXP;ymbmU;<6QNs_eqmLHK>&B2Tv$3rxG&>UrWcUc}_B;S*IRypPX literal 0 HcmV?d00001 diff --git a/calibre-plugin/images/icon.xcf b/calibre-plugin/images/icon.xcf new file mode 100644 index 0000000000000000000000000000000000000000..76d7c0c9aeb81d6cf0b6df17f8bc7e6c60a59e00 GIT binary patch literal 63927 zcmeFa2Y4J+mOolm9i*08om*;Use^LPIY-M;&IXKe#9(j+GXvN#3^44@5FCJE2WED5 zC+*C#gAKMZ#tFuFzzAW2aR3KdmgFcWrS7WtJGZ(OY?RzkMrz<#Xz$y7!)Q z&%Gy9^`gZOEQ?t)Z(+>B#Y+}&9LJB9FQ6P}qy!%SnQ{47ZQ(e&@MpoL#Fd0A*o#v+ zE6PUz8Wy41)Wr|nvuHu;q7{qhBZ)&k&n2u}^~jP1F)NoYSv)^6AYtL6c}tfpT@jPO zKy7N? zIjiLHTt~|uN%Vw!cu_nBM&qUaUP%%qE(YnGWR?U@A$hjW#Qnf;-~9dWejoUI;dh=t z?xuRrK2E7|iDgxly2}2tg4F0BPQiIpnt+IaK7X#Re(t>sbqlX7m_K*UT~nGWOT)CG zkrAO0J&|D+ZtWwl|K?wWf6@N>+Q0txcfWh>Co85E`dLD3A%g8%u%3Hp`G(hCef8)6 z^t$GC{ukH&>D5%p6dyypZg5h zjK}1=16Yy4{%m?|UKz*x^St0us5yc2VxoQZ9(=K2T{WV6-fPaAAeA9$eO^?Z9Hko;gFoG1UO|#W%!o*OOu(K&4WDykpY#MZAoF~dZ_JtP9uwu}k)t@Vlk zxyVpIo>@MwzDeD5b?o>_Q}0^1{OOG(PzG1SODk`pKr(e@PPR z=P1Vqibe}RT;c^jfBl6Q*KgRk@s*$d;(2OGr?=X8CNo!iW#$|HZ-}q|g8xO&&tHMi z-0;E=Q0$7y4AC1IX67FE$=w@}yTASUGfOAs(t}i@pFi*4>!;!xWT}0fr1qVYn=12z zC||8q^{F|Zs4vY+iS)-}=eLR5-&MTZzxzK1cZPreoOXt1XucyP1??SMCgF@C22yd~ z-V(}P?d$FB>+8k;{r!FY#(qy<54H|YyxrDxLf)EsZrz&Ro=#iUzi->B-|F7B=Mp{r z=|?F^b$U&pHb`sGUJud+ zY6CSIe^rn^Oy6hF1q7$BaqjKdnlZg_yIgt2C~3uv){~%N3+|SD z4_mRlSg}eKTavey@UDC}6`)0ZQ;FdcWB@G5*0tI%K+6xw9Y_LAK#-o-_shb`;xG7- zyrCZJuHj^3F%62d9-lZP7DulXDqhJ8W(dH35`Y>uN~L%y3o45hVr-qPN6g@c@-vtb}@I{@~58)eX8e~HItWdL3)GUsK0K|>w>fa z{?YRvUTs~?Kg_S``@sWEi#Y#4YoHL&uknwVyLy#nm1pJhrTo&qIAC=_&Juz`Ev+GJk*2H^Ptz0$O7>aRPu7`)LM?s>?)$;23!%MjRR3W5G@Cuo3 zR|^{!+>|ds+V?6{ew?6C2qxhQSJrrQ5d}$nf!OM5-7WRi#N0e-n#iubC0hm2`}aw{ z_0hz=A}<)x>8e{gO_^r?3RgTz4y1lyFn6`;mdRo*nk+`k6)t(q!kcDF1rGKIS^38N zD_Lc;mW-0?RxVh!)U@>4vK1?bkqJbgs&3`t)hmrF`Bm3fu2?ZdL|?9A(Te4!<@^eM zW$&$->gA6=W_;}Gqia@;R#kb&g87E|SLQ8Pviz3kVuo@GjfGcoDrVk)OOLU>^Gb>h z#hAZQ2xsOzfm&<=SIg>dW-3P2%2Dco*wv#X<4k}9{cRaxG}7LNZH&~CZS|4!(a&R@@T0$A96jUCypGMH! zItfFl*i?)3^XkA_BbWzv-8Pp*4% z^&}`sO#1bDr5{y^PplX_W^7e_&PbVQ+MNd5iOwg9sXcs!!SOMM-$Sqk^|02}B(fB37f)n8ve zeDu`CUN`FX^ouzw|L|+`uLs`vwdjmNHX2bHjnRzi=HyN3(EZ9Wd8qt^2ZkkK5QaO z8k7TE=uN|A13`o4J;k|fhssQ+7LYuoca1F&03+NnWz46m0{Ju(slbU-`W5Q zDC2&2?X2PKKzrwP_h<#&s-&HgRpP~7i7^$Co;@6ORf%s)eWt$QJU1-ctxkLYb~TmG zO-DX8d^+&S=UsiHJ#|1ZHLqE2SU#}iu@5fYS^#?}Pm*GQ3%`kveDkGWKa#&#I;PpH zjD?Iw1si7~0*%Hfw_}9ASNea(P@{68d}eapmV15tf8S^&2EH4yjsnMS8NJlgu>A%I zJy=HyQ^lJXMk_??ThH|;+`suUTmLtAcHra-;U=5fb~Q9S+L=){W5a< zq-UV7*ALr3zgv<#Bq+sR6uWGF^$gq$ov#lG+a7ViEx9kBeur8K{#7Tk&;2IYE2{fE zecx_=vX-)pkL~#4voGI&!k8`#`GA(IpS2wPXy4X zK`U-7e{j+4=IZQ7KQeFg{#qTc>(l!43T!YsojWK%$#$4RdFLtg?L^_UIpH4+(L8UW zO-TiTC$58g2LHjX^kZP2CndTkQ7HN$aUhhOMgrFhUk}_kgnI74Udr8w;dp`L9$(Z% zMU6=Bf9=KlveN0%o7^uvhHUOy3dg;)>6M=@uHzQrwr|7wjq8892u08HuWekvantHq z9GAlX30kX~X(gf0Bxa%>ue%(=+q$;S5vP{S0o8=byTde~4 z4I5sh^n5hCal_A8nWhH_|IwqAKJVcTn>MX~_+G+)_!M>Z{>i|%{RDoWP3vdF6(>CV zGDfxm(__zn66nSa^Eqzm;%8poxbde?tmL@qPra~l4X-?v0{80U zsPfVaKgLA%zKFSd|X2Fzw*+f^KeqR{L(8MpP5<8g-_zRO`D!w zfh4%lUVCIdcfFY7-W7Pd1OaeL7THd+KuW^I30c z{&d@3LLP{ALB4TRYV1V#-KA~LHG z>BT3BGoB_^S@&OJ6+DZvieG@tDr)L4+EZ{)QOR-w3yDe)h+i}?{7|kNbzq?A_h*(B zOb~1puqmPcnj8MGpriB5g5mLpd4cPJ4FcwKf1-J}O2(lX<1|24j&6THIK`DpKiJVV z8j_biJj4Bos69wS4fF#~@#i&ef6y95n5slIfkW#;AsHtf3%DPp6Ha%8^YmQ8AzXn9 zFv#2%?e&%AA=iOxU?W0LyiK!54#$T-UhWoVz}H)-MLc(v+Q1;Njs()+dI=?q8m~Dk zj~+8Y1!z)OKiMpMhRq>*pdm#OJR{FFjfta5co?J&5IreZQ?nBR#JYKJeE& z2>;PV;GXuSgx`M%xafHtxO@5zk3z8c^@5D9&*AL3;MpF#>1%?RiX4ujghZI$hjZLx zm_t$e>s0jaC??G<{aXl@Bc(})S7Kh_H4~-&m$C_kqUrvoi4jARjz1humLXp$-vBO03gog`|8-o3Wa3gJGnXudbB+<$|Bm*5IBj`wG7nD18B@il z2pUeoD`51uC<7E4r9bYVu_$<@LS?3-mf8=yCsY{dkK}jq!_LiX3;}@}A;1rYHG#h> z&|wXGQ=nEG1ieV7SLjqhf{xb<2Dn90qMJ(i`3GoqdZWoq5mQ0If>p2xh@#=Gd@vuP zwvlkTfO0TtYIR0)uq`Y+A~Gr}S{p4yiIEX}1Ru^v^hHKRN5{m(s$#LD^5hs#;ycSG%h! zE6RoP-ioT~n!5V>2BATyr%Ii0nfnFk%%Rb)l&pfXs=CIeapNaUm?%tCOyZjrle?QI zO`I@c{P=NW$BmmXaZ)qi%unJciW5jaqcV;UT4QjeGd>|cB{R36sHCi{yu3oFP*n0& zs_JW171d35PASjN&Cf633xs?=Ps~NlE|}WTRXjB&!4+w=hJ=PiVE)umifAE58QT*T zSF&hEqK%@4#9*reQA!H#3e`bRE#dZLy9p+=UQOcC*`YKBK9K81E`kE|)(B^c3tP29 zFe*e+Wv_GE?8onmrYS}I%1|*!ij%zPycxXI!x>7RUFibj;15Pw1uMrM7G_Vv!&o^m z71yULd+`Txpgwm)5oiszn9U|qfNUl5)_lrdFuETHi2c?1afP z?pin}L&sXE`llZyP+K=lWouU7H~+2~Q&e(dkAXeXZS93#UiLU{yJ}|x|Bs?nC8JB<+k&>F4h6RvcQZ;7E-S;oe zgAtYM8*0E;_Di)M}~%}TVh zBL`K-3Wmg!cvOtT8OJ0_YDRWJc|-H; z`yTYp+_2hrPpr(2j|?U~iJpy~YYi4#M2sUoDJ3mE15ziipro>X!p!;0mKVr`tNry) zygkpodqPE4T!e)UoEN^sya(#dArY}IFi%c?VNo&or>b_$#2It$d-$QkTdL0aWGAn4$(FCXhguq^HP>^1y zhJ4`qjP<`ArK(S;nZsfelMV1%@_a96Y#6O5i7uZ=N`cT(2wH4=Oel?BfwirMXh+*g z`tNO<=g-R{utmouhG=NxGnvhZ#iRYMC&U&S9u=X(;-U#0eFl#Eq?a?n5$#G$PRpSE zDHr<_Ca`Wy^H|sSP9Ui7-G$^0qG_+PnkCJ&bjw4d*tbLIcM?8$JUj})E~w?KI#uLen5~B8>b^aIXx?f_SNE&@~ZlAQ)e$; zy=qj>Wvu6|tyDqaVBLgA+aa+MlT%VL{=8y{t2s-S=F1CyXwl{R_#S#b|DC+xwTR7z zxFBgUC~U^gSm0alkX1LYIXdQ?KxHOuKro9nI0Tdmi->k4W)#*;x_d#kPtJKIwIR>9 z+F?E5Pf}fI@rbh0>GcL9&N^0Gq&+FSeC(__nes+6DC7-;Vn&0i*Re^mLlkQB0eoP$ zR%b9{?W7eoOrP!DqpE%T!;pLnKvns$Mm8kgkN3Y4pw*j0W0Ui1r`(w?KXS0%kXQ}$ z9{B_Wy<8yy6&#h2Q`J1nSK|%u_Qm`LQG|eh|8{=I3o?=yKLw4l`dII zyGDK{%uEz>eub8>Ioy?5K4E&QT(-)$OPb17ExvP7Lq##d1pS7WEgL`8Tek8S-W}Cc z@yN3IvznV~5$ntIR}?CLttr%zUfMJzg|(`y`1y}yT4L{g_`wBtO>g#=RQd%PL+xqB zjgykJ)K^{k%TM_B`A1gYKmV?oQ-+na#ikV2k580KmcI0;e5TMQPnkTfv9_v0E-9c* zLr8Q|UiFxGvV!*o75&4*E;@bmD77{l1#_F%ayK}Q6vb(wR~e)b&Lmkm3v{hu$;gFB z1};?6abZ#r7Y_80(c#8wG(k za|*=3QFz)7KeAAx5UB~+AB=*5*LUlJ5JyP%SY)vroLX!`$c135)nYOlbV0}*%!9~^ z{*W4YKw?X%qg#LtS!&aNQey|mT%xXctIPUT-w-FU6hq%M_wR$NUiHQgiPRYqYeyB zt8S_-$xgC|S@eN0yHhTrScHBU*Pt~C+&RG*S24D(Br7pC)T|5eJ{vZ7<4Hr1KlyV6 z*nc@brhGzOab|pUh*1lxIW?o=#IjPp^khj8A2bG z1>+k_Gvgx%n^ST}t18ONE350rHqV$nXIcsjyC`QXnp9JeVh=U>dZ*wn73AjRN1^HYq2RO%#Damk#n}akoEhy)S3=QE!u7pO!#HHqy zkC`@inw1RLm-S5(DsvK|2A>Q|b%@!>8^1GIL!(@&`Bf9{n4LzO2B)lQt}951wHX3o zdnXET7tHz~SVi<^TU1;|al_QR#)MIm%Iu~wCFzbZ@2HXFHfRH2S2=;xerRk`F50}y zNlhx8HRH>n5txG*^{7dGAl8x}j^@?~S4PR084FUWNg=p&QdLeuq*cf0f~51HkP3$i zoWilQ71U0dn@g^bOMGDd@@iOam9sJSLDG0IEw z5zJv?%rSXRHkl4C3*qJ9_V`H4ARBr4P2=OY>GP6^GZYE6W6LsKSU-b|2^NN;=H=kZ z33tqlWd-xcH5P-8L%i-?6b3Accr1!3vuZ-9psJ?1HlKKHX!T&78H{Y1rR7(TpLHh* z7cj7)X*^cskSxH8WNY_oXhba5ZpD~s_cR-*$$+@=jV0-{$mLFwFo=ncinhBFnJ~C( z2F!dY7hHa`&^Vfs2wP%Gx-484R2Z3XiJdgIJc|i6pAfp7pOc-Pn_moJHRaBG#@RvZ zONN|gm>;mZ<)<>wePuaceyyUqu4&SY*>^WP$=P^GNUCcp&rXO6rV*12B2fyf;P^>X zX52M*MunZU8IIRiPOK?FbC6W>Q&FsW5=3)z^ThG>>7npHK``sGn;IZ+S!1%CS8`%` z8DDm?q_`k6A(F&5ifQ7S##H#^H(P+Pk(f`IjJg15zVe>omsm5dDj)U}>)_U-f^|^# z5=7t>c=HvB(;!_@IJHLO^h#F@PP4H%)jDv>b>YODh}VUaIkU8uU-y4;TRh(ZwL3s> zjf~GKuXT}B*A+auOg3b?6-tmv5l%`7Q|`^-&a}ev7-GsBff)}kieffVr)(3271tJ% zlv5Z^zUN*|!jd~|%(Cj_&JI~tQ?p~uK?Z$j);-e_e8yR_jN@H5))O*-M#rZnx?ljv zET7g~2D@*8P{8L4WcH<=oFxg%%rx;SP>^A69@Eq`mJG{^NdshOW~EM(PRtD1F=R=~ zBeQZvWi>SV`i6#jzMij(^tQzeg`ttLPG+tmVO#urg|o6H(HtBa5k&@B7j*Na zWU?pnsX_|m5_&Bw&^m+35)3OeGvj0bjaA1YhDo;B!t-m5b>4Im&UkFfu z>v(V-Ph3YD6_=21TZok7=kV zNRN-9Bc=D$39E>OH=nTt+am0VITiH{HKn;}anYeTm-^#{BnSwVQ>=K{x{&CAV^kM;Jbun;qj^?u&w@$Pv_GN0T*YJtlU8yyiEY&Hz31$Z@vkL6{Z zB7C?`0YydDDWHv?b{LM{op z&TxSL*B^>_5OooAh7gdUwtOFoh(Vh7uV#f8@%0pasCka#7WRxxulWtfH6t3~E~L-B zgYwqGX%rw?xFjsAG#EcJu*@=USs84)@}|suXyioGLoO;~1-sy&ayUwm{Tgn1d|kpOyIfuH^Tj795)6q# zk}{c$^SsbOdo+3JEg|H0cf~^shdVJngU?WAYO@sC${cdMwfjy2C>I`uBXuKWxRl(bhWcjuqb;xECH~|RFX5kp%IQQoPwC&olF>tX7?nVh4}G`apW3nle?zHvm#>c z_81&?5D$ghn}{LolUm z)7M28_ik zm$2%*a0KDKjDLf}qFDV*czWrajYHA6=IOH+ELr7ydijP&0*@dG znQ&N5VN7wrTB4z|M^ndr+4^_^-d&;oN zv{WE{oNzGE`r?2|?&6Qbz70E6va=XJIVAJyB{S=6!76N){1JcT_^7iyJcI?BN^>v*Jk{v|m zGUgMGjJ2EnSg;wqtZYxDZ7(!DR7)c5=BEr**1$Cy5*}lZ2&HH-ii1f_#~LF;e%;u5 zJ9DQGn{nThdEpqXY?9lUwYcQ;Y*{ijj-5Dp2At_jA6oZ=Vit)poYo*0?azqbp=o1J zmR;c#0#sR3KW6;o86*;Z_(NaaLG~t6`|6S}nl@U91gvR@B*-+jG-ljA>e3({gFc4o zK0dUh!6aZaCQu4WDiC}zW6t8`-k=|>Nv}B26LZt4gW`SUbAZtjj6emp#baIO6qHtv zX`VH2$^AJXD|u6WVoXnqN#Ivg4GOrSsZLDI%qy)KJ7xBwd$Vt@N{g9X;Oqc^ z$0E=NwgWTvpD;Yy5tp2iS5`M+=Dhi`2YpENLWbPIHbh=HK@at#GZYR<*pE>cB0M>> zuoB@-bJArmaoI~=nQh2hG#eF(Cr?sExh9>jOk~)A2{EU%ZX%5Oa=p?Qe<*JihJyR( zi%s%@u8mcsxD?i3a`Y!=7FLg&F-vw|>q=hm?l6Xe2NpmFrL$oho%ZN~bl#(Qs<7y| zG}tSr&VW0F_SKS~dABvTG0htp-eff5BL^&>IjYH?!$bE| zhx1{4C~rF-5^No`4-L7p^_<{P*mz#*Von7*1)hed&mbz`}~s!rY9M zcze{yvXJT~d}PsZ5`kFZ_LRJ`%8C+L|6tx@a#R$4*~VvS`_ zxec8t%u045E#3qVNyq4~3CO`AM%{MfNgO^x-{MR8V`>CnC^y1WiC0Eza9;935cy@c^DP71mW2W+aXXWKeRQ_`qLHSz#{1 z=X3H3OA+1IJY#Y``Pa|;N7dApQB*~0GR|IEd5D7@KV?=E zoNK6|t!SvoC(APR36^E5q2M~EjHxWlPECx5B?3BKZgExP#A#FFpfPjj6$On|MVX1Q z6n)_{7Nf?b`m)@#L}zSFOe{M-;N&rRN`;lxNUB0qMY4lv;j?|C#`qcpVmM#^9 zI85%K5naLcj497eaYcs(BL?(JFb=yoEmSs6nvq8Cq4T^Zy$Lq?1Q^~2&FTv7Y-3qY zGV!_zp@c9C;H*$qKVfWaxhAxBRj^c}~f{EEg&^$CA#jfTYGy}J1sgX0+NRbz8~Jk@ZG|4+u@9XXAo#zi&a z{tL4kPP1FpG`W&OCN3n7t0_j1yEp!atq{!atrRQQ*LYXrAa9|@#5#DF60mloar95h zfY#8|++1X)8pOp=s4#|Kr`m&+mr`@o<` z>6|{kwk$s@Ed`EC=-$PZ_2VYbs1KrN_{Y>h^d{dVwL0&ZJf^y|fW#cz@5nMSJ_A-W z?z}0tmPGGOa&e>}t!`+VG;LPCjtSC;I+DCzDMuSHlZ)r(-7#f6$!dy_s%;$KJZ)NK z1bF0JP)cJZWD^ruv^O!SsKmFW(r}DVW$*8W;K**n(Ve{VK2Q8`NM3p0b+dP#JoI*` z2jr!9LRE-^_cH;W&m?$0)8O^Yg4c5`zx)3O&y!D)=b@fK&%#S$5Nnx5H4~cUaNwY# z=e+hxW68>;m=1h)71mi^d5J9H2=9V`DySSgA%TSiYYU!PDO)#9Yn$YGvwnR4C2DrwI z!BhpKZnPgS4 z{4-=bAPZA)cx-%HW|T>*(*>u_oZz$?WkWbUgcr`Rr|7XmITBOj9get!wBkwQi?g$` zv$OatF%x}}{SjW1QuN)_i#cRjB&&R;*a8V9&0pJ}y`BepFJ@#)_ zfZ1PQsDnu$0d`H8CSjB=DOHqG0XRL#x+A~c6oI!5WMdkf0erTQLqQ3$AA^){W)6wO zsRB+B#62V>3&~VK_OGz_8;AKr3=I~EMbby^l!4AF-`V0&?G zSpjzUC>v=hEY=fURC9jdJr<=*hx!h|=qjv2L$cW zvEihxu{dkm6Ro5a2HN8CDytAOk`foq@04f3N59gyaXCZl54Q2R}Iy0N1Db!3zoI!+S=->iqfLIY{a7= zN}S@zNtI-i!b2X{9UoGyON)zeILJszba_KlhV_gMg7SQK+0bI1hHmXQvvJ{dLuM{E=nduZzjt@M?I4S);^Z3s{#gMytI z6c8y-b^^JFm_Ha4WXr_am}pog!pSm$U`n(z!ZIP7H@a-FZXo={fM8`9NNAL@4MESz z=8+3ba~@_Mfpozav0+#d;H~x5+ zfxws{qmR!FBpZbcJfsC=6Jk#AQ4UF_u_|m9&mEp3e;cI z&2f)laBF{rUv+tv$ofP2;#%|AcENu7hX?pL}@@-|IkWWe+FD*wywt z&fj~9h3r;b;7~jAxZ414k?#TXBqKSK^Wb&607p9Kj_q_!)`=V z9_dX6Q8L$Qa3&++DK9+gFgTK_hC#RF z*}K!UlVvy9=@9#3~N+w|3g?_!i%d{(zfO!{)`2H@-u6Tf*)}3(#f_JhhJdf$S zC|}~Gi)x@^_XYKZ+k*3I)DXMB51dnCP^F7y8=KW$MBl;u1T%ECkMVS~_VOFLat2>|qd}&NZEDB>V*RxrVciZ7dwm{ zG>ttBqGV@>p@XLJiWd&x^P6lMFPzn%rD?puAWC+e)t$vOUSaoQTf3nh)7aK-Y)9AH z+D+}4#qV=XoZJ7- zJ(5PkWF|d-K?;=cflSTpefXMf66a2N>7rzSis+g0A%5Elz5c0Wq1$+o-Fl{U-Z9>x@{^aLL*T|`_-rY-ahVj1@N?c#f=>(D?GEf35Iurjba45cpTo|Hc0@jE9P~3( z4tp#DJ_8-F%{bgHI}fvX5d7C}KGp%$6K{7Y9UfN#tj&5yAiL)Q(VU;%~5%2_5dp6vODc!EJ|uPG0vV%Al8|U zcLYScGlzf+0N*)IcIE=)2g86%k=5Gt; zZ}XPTZ_)g1iRP4BJe#-B^licPNpBPCd3W;`B%=jP-xf^YyKisd0mUtw{Wg2HyoKpA zY!llAS^Tmo+~kiS)p-fzXsZN=!`!HC~p%k6#_v$tKbt$6Ul{WitU zV)@zuxTAO{yWT6_p?D9qb`-yd`}fLsD0U-d*Wk4qdAm@PU6{M>t2B4rSFT*8x$C|n zToJF*+;wB_F4Ej}W9}|?U%o_h*A3q5c3;9LSTT9sn7kWTyJ_;eFL$fD#Va&--I%-X zE8RSx*nQdWvggu8V!ex(b(h6%n!XzsE-No%YNX47m-{c^cjYc#Hc%Gk?<&e;{w}+_ zxf_=QsL-X$PUN8^Chp?pYyy`qW?#hoUAve=-~s^VPP&*2kWc6Zpz8Agn8J&g!V8$H z^SI6_x{3!cOhT8Uvv}~reW#+Mc<{n~hvIDU;D!6MiuU5c3-|4cwqkZ+5|=$slekQh zo~KD%hEJb~&tnpSDG7_^L_MYww`#hL<_0o*evyF*MT$SAL~-I9^liD_Qz+IBsR5Kq zzi3wWFEqcF8WzaSA8R?kSk6@gYNHEt$~>8d!FN~f7dJdQTl$#DS7_m zC^~~_&y=zW{0hJyFBtYQhzIAu?e0;xGYI*ai(6F8mDDxjTxo4!K9uJQ&J*qAN+9LL zILwm(VFwAvzJpFOr-;LH$0bH$@|5->PU%Eu0Rabq%2|j@jKzGa?FG1ynMc57*Fr3D z@$A4xhc>HS%*L(NAx^z5u*-Sh2OQ!Qx%z0p4V4EUmD4zMFrA3WoE@mq?m=PR)`l}1zJd=R3tLg;dxua^E6JY!(EDBHk47%MhT{>D2F@c zk{6kd3K3}A>J;yRi3$&K(*HJmjmF^&=bX+k&gBf{;+!@_F@SP!Z6?ZX5#J}uZ2{#r z^=y5PNVgRN7^K?@(rpztgLD9I5e2uRb{^-qHRx?| zE0Jz1jy|}oTgCTi(%%vH-xjBDvYnyGSdoN}i zblYaz;rW(PXlKX{_s+gS^b478J3`*`Tw%o85&WL}V@9`_9n=m;^`3QydoPk?l-dE+ zcDUb1D3Xzw(SDxc7b%ezzDm-ccO#4B6fPo2k8s) zb&EHMeBB`5W&HFd$VUH}oR{;y51bP(Q}xk-yX)KqqTe}Pm%H75U9IVCX}A77l9CNEzBXqFT7jo zgy+f$50(?2EI^Ml(P;y8OMgjqh9FUpJivmH1ni?!ht=WqVit$Xi01Fju3VP|D#n2H*@^_0(FIqumhzlPFl0`gA2uuxOOQ;`C2PffcIbz2-)c*VB6^+5V{j&0_xc-C%L6dAM6d;%g6*Q zcrPQ<#eMHt-(zIDxNnbT4)?m#4;itJ17XA>gc0i~5KQw|BfvNEofmxhf%*d!`SK(6N4J2(a^m6r>ix(Y4fqfE z2t`mPR%jcRQX7_18)yXRX*=C!1BCJHOj`&Nu?T<#BMFP3_jK@SA`uYlX`&Di%V{DI z5c6rG4-nI7A`cMbX-1vi)2H;O7)hxG$YNt_KRDh*#T> z2OTG(czQdJ2OeiMabG-n-2XVEiFozYG4(Mme;Jo`PMUC#Sna8!L1 zt+bB<{5`kQ1PVt8eDeS{xak2x;j#BbTn7Y!s`^GxzK&!kvah2u6ez%8m_t03*I^dH zECDjJ4Imk2=$4*O9TbURqB{N{B3y=`Hj&Pe;(&*dkKw8RA%)ka!2@Rc&-2^mmiBg*bU z0}2^Zpv#WPFTW`lwOy#`y+sQlolkVT^ndx*o`DBHYY#QjqPQ_kWLg;!A*Pi0N-ivG^dteJ0yl}r8c9FrW ze3xP$tRmhE!`z3OgBR}iC_Y4O?}Z_MSpET?_YtoBssT^$f&F>=G2(Az&<%irf4V<+ zKMnjWgAnwee4O(!8~9-$e#q%&5W{QzIQwI0)>69{_A!WzqX(1+JfD4XAn5>&`_cja zK>uHlUY5Q*;5s1w^?-1I>=5{VK%3%p@!*B-o>rVH9=tHvQ;L(tgID=U#fjp<3#Cse zju#JJxIeBqRy=sE71kZYFpsJ{p8jJ;^NwPe?ML#C09*&aFuyyJdxVC0kwN^(&Cw${ zNI>V$0HI-CXAlYB9?3pJ!@R(tTN?Q0NEQ;P=wany&+#LNlMd53uN@W+51hFsojmM3 z?D_f|+^r2-C#j4cN1v1@;RFA_mq85!9{!kYdRW5%*(Y*frx5=rgIWeW!nd*!;}L*( zX9$=F6K(|_>cxYB+q`%P z7wg4ve)VFZT(lR%@!nzawgz>K^_(RZ(d>-aE1s2kWM|lWp1p*FNA`y9@$Bsc z4j$QS^Wu0(;Jpwp4leOxAcPmgjPJ4R_Uxg0py?j77suoy-F+3^2AAyi9AR9t+w+%g z#2mX}64>oSK41uJ1A9na*@f&~A^Tu@L34!cVu)=YV)6Dtu?ZuvhuCEwtO{V4eW82M zmKWK_kdQs@55D;rX!M@`mUjHc70`%1?gPiK$?USn{b`3=W|uudAG!~GdrfASJ%Jy( z{|Y6Vu?ud*uVr@mQ2a_}mk-@!83DU|D7F&2j07LZi64y)zN8BK#oGeae(};k(16%a z?6O~MmDy##*d;T`e$U4qA9jkEL-DZ(rWP>C$L?e36d|OnTa{oT1XpwY+s+ppkcz0rW`D1H|+$RoOmH;#xWW!^XxQB6y!Emx) z;2zQf&Cdk&0k7jDu1IHQUjVOJBNc^U-FS%s&m|3*AXXA*%ise5yCkL%Gf3hK40tYT zKxNoI?vw#Z3G|}3=$2HNY_7hVjbs>cK3=FrTQKW%znlfL4h%g9=4oN*3BeEIZA?dK zrd4wH+ymziS)u+r3{KmKjYb%_V1sBh1{p;y@d1u%pfJGQ01C--PQxI6#|*!jCwX9m zBPjkJD<7Z$04J7%ATGdcfZ-+|hO7EuXNgnB!5`#wDO^Pa1Q&Qm5+8H%E^5vFhRY>4 z|AtF$_YJfpNTq1B40Swmf|t#U(M zt#U(Mw`u6}FO*-Pp)U?84xynh4he@?LuU^0hgd`B4jqykI(JBJ=-h1@ig;G}EE-yd zb&iI}<5j>7L_GWKdysG=(~Bvamte|-e+(8)&oYTCMH(BOhU=3&71ghtLkW>i@_iK- zH1Qh4S}abh9i>2V6ER_k2i8?I89 zM|#6m;=(Mw;VO24iQaG(xkMTxutIq7SyF*Zkdb^BqW>lFmr%y=wi6+_ct@BaIWDz~ zWYbsUC@af_SEC8Zbm5gaLNZ)9( zqLc69CP_S;p3fj7bD*V%Xd*O@y94>lkXzryD)a=40E!)2>H$? z-yuF4tvG@mEDwRqbZ2phPoWjE1>w#l-!U`L8x-R}dCUa#1_eXV9W(#@B32vA?wIB0 z_1FOUE`?foJ=R~o(}Mx=qr@$rouMC;6~8>-Jka;qz?qf<2atj28}jyk!4mQ7SQoyO z+4T?-#Fim;rCQ>_A$FyEH{#;ZBG^^VC5HU+pz|OOKO?bEoS%rFv9#~SzM$MssT{*T zza4h?Gv{X_%N@eLZgIBAX}86`qV~R`{(VX8`l;ATc+01XPrrPftvRiXUAtPd zTN%4{wPv+4cI|4(Y+>x$)soS|*tM%Atp(e@7f)&Nw0YUJt0k$$-R5K0&Xxoe!*nxt z?QDsY?-;vw(%>nNv1@0GUA|-N+Sw8#-!XRWJQ#IQq~HH#?Am!SLcU|{+WBeNr!0@L zYv-pn`Hrz`=fPmPBxBdkgO-EhDYU}awez4!zGLj#`HA5ZmdDt&^AnwX$Jn*=pjIx) z*tO&1fR9;8#;zS7`^$G;c0G3d`~aRHk|y1E%zggY;lqqw`-xpoutc!ym6I~Neuo6{ z_z=5(OZP{I*p=?xM}6!n=Mp=Nihbq$N<6}HzZW}BxreFT-;N#o()p!a_xrJUwWw8Q*K@6ft&CmIwG^~4 zc0JdU-@@3nvn8*Ev1?~bZVO}A&X$}O#;%<$*)5D+J6p0^7`t{J%sj~0wew)cLB^t; z2h$FUCuDZ*JeYFObHdB6od=T+x=;Anwc}s{ih+QPT{{lO$#;xhJ4lvL9%I*z19th2 zv1|K*82OH|Yy1AF{UZHjI%C)N{SopVW7qbN!aiboj9uG5vdMRhUEB5t%Ox4Rw!z1M z7&kAww(T>?cZ^-z_8Rtz$B@U^wQa9XzGLj#woki{w+ zC>~VyCgSzxX&+MP4V4k`!`uI#!N91`;`h+eYtOTaXJN;caEvOIJP6<^kud0GQZXHV zmPzzuegK7Z^jRj+PyGSVul@nh4*&wt@5KSo*Y5zbX-b!2nga=B;*A&p`r;u#8Yw3X zOCd~8X-R~!=q0AQm?@2^G*-gUL2`QnvlA9d81xBplLCt(ERwJY!lDTaBMc!?E**lGN+S~6fQ_cQaP=Rq;Y{VlFkLlNCqrj>~N9E`O8Qa z=O-iC_|`nj%Hd#~qJv8=eixG=d7MH<@;O093OHUy3gJVg;}8zb1n9UwID)mx$2n$x zz{p^H7)&1;*{9f;F|t;StQ8|`Wor>5YZcF+F^o(`Fftjz$YcZ~lM#$eMliA#wnt%P zGJ=uGSs0m&U}Q3ak;w=~CLQM1<2q631s4Xr2tv@9T$LXct`qL;D*(FE@rhbK*09z(K2ged zthJ6$1o@7&*73ic``uoqhgNEh0C?J~z*z)#Y$zjgQ@*FWKTfBq%M?Y+iv zhx?Gmt>xX^fM)s=zXMP9)~tN(-MxSP9ISgSW6}D}@9%H@^7zps;PUIyGoSv$w!I&J ze)Q-!if>Z5Yo@Ue{c7|32R{4m^tXrAhZDIg)k`-1>79Ko$4<9*oIUfk-`6hgM$YV~ zfBWZM2amL!70wFn;?b}CzKZ2;L`_`ti$A^l@z-tb{_Wz)Fa5uaX783x~6|BxiK;`XLxH5=J#^L&a0iMA zA)Az=&3t3a*?PwF7kMmg_>TUa?AvEwJOD?K8gw7u(Oms~V6CNa?$kxvQ=t*>lo$95 z**8xg-@DDZ&AY{>yrwPkU%Ax9!O~pUM}6`vE5~K4zIyWL9*iRRpRr-<0c#)MrLK#t zEex0F>l@E^@bs*|^%?&&!A*#ugzC-RMX+&-%>qcps6B12jjLDcP2NrH26^ZH4c-k7 z<=ynftCoA9Ws^pWHKxq6a`KM6!M^eF=G{k6o_}-iI`2A%^47rG$H&Xr-pYKEmzsge zd>48krCKeihEZCP~;7Ls?Cb!u4*E9C>;(AOnlrqo=f|vd{0`yfmUVf(`R1Z?(*v zmyhS>vC+~pGP81W=zM}bkv+b1{o)8dA{Zvl^C++R#`Y^bktVa}PFJ4{EjgdOb@jq9 zZCG%gIs}!_r&xMXr8gSUqhY+Pe1)X|u1y1>CUV~{Z@7@+opkZowa)DvEv#Vj*p0jk$=`v7e~ z&`<9JA)zaC0xb;^%~?i|PQb;9P8Ia;8wWn^n>3%`YVPJ{AJ-NY6) zma;UTHOq{tF$psE*q-B`V>fHSb21XT!&9(;N6l8&IJ2yo)68uG5nX@o67&S-m{@4S z>or|qw!cZx#B9`roIW${psfXSP3KNxL4ETqmo@Sl*$tYMxBRJ=uullIWHOHqDQe8D z%+5Bj8@Ojd#4s>l;%H}OG27UJ!__gGHGf9NS=L$J84%%Grgls0t>IzM(`2Ix%_7e* z&IszYAlJ_{D^Oy=+?i(67;5^EO3=2RQ_rc>f`>7Ao^m%W(-{^F9;$CzRUNaAR|_Jl z&MXJe0yfgJ;IcI*8=Fr*SWB-J)M&$_m|=~SFf%dNM)l2^yR3#*qgMUNa-`NH%dsrJ zxVwMk{J7=FZ0#F7KRDdqtF#`;?q5KA>qq?ze;L@E3@Co4$6hobi&S{i?gY77;J=b^X_JfBH z@CVTAz23p>m_qrtx^rwsS$)ri+YcYyXWb|7cDFOz!GJ6ZNiD2yAG!7D!F|Sk+C9Zc z8>0=q5mhd-=H%3Za)#2v+j zbF6b%vw-r;pUg<$-8=j{(%V;ty3RGTn(-zTmZbj<`;PS1<-v~TCQcKzQH3EXzD>U^ z`|RRCMTpj>1JR&$mU;#de4;Tq4d0I7 z480yq#RO9le};L6Rxdx((os*VXVj^(Cc*XedZO-QeM|3!>o?o$h&ooSN{dnoBCJ+g z-`aQKIz+-KhHLR!P7M^DXj2Z>VzsifZG9IZ@Q}QAwg#)=R;#or;`V_{*V)$wT;}HP^QwVxC~vPT^SwjZmupXE9IABB`WPo`DN^~Y^bB5vJ@}Xgd!@e zN@81mSqZyDP^{9X_!o1E1w~k)N}FPvQ^YC~6ygOcZOWcPMxmg9ny=EPtS_Jy2=eK9 zDs75OKAtZtsAwq8Q_sV51(bhGv?(Vl8@tYrwCCmJ@^h&T9Fg{EU73J`A=-Dc* z%2p_#IFrrKW@M?dDt70w^JI5+7B`EPsnV*{4Gk4%axys?U^XUNm7{}A8EP2}iAtN| z*Ox8fOIYcuY|6ZnbbdPfv`U*2cba>edkRDoZOW=s>{I-csw|4tN!Cfh36&NlhOF|aLKQXJYxU{OV zy{(nrs;Ee!CK3C_EzF7RywaMcj`mhoE7^Q-AHI*4h-}Q-gXx(?)lHr4t#~WDMUkC| zC(`$VjY-L?YU=7}Z4tDPb%}dvdkOH1j>1^wH1IerjB|>#J+wX4gg=>?m~)(SjApVa zFM*ms-~FG%72jq-v$U~1^YEVC#BSy;6=tTmnF01kdbUJzaQ80yF4j&Jh9Cvzt!UX`WU(u6n3>hez{#dG3$J5<@36;*l1_U+*B;BQwsWLdvsJAb<%PO(kp zkR>RN7bn{mmz27Vy$z35WoA}@nm1L?(e^?1D^ zKZX~hh*mvr35mgCWbub2rC`3u+MUsyXk4t)#snP7EU#zQV|9vSVvd;Ds?y3VEwAI$ zk)_dFSzBpaRNI)Nb@V!Ftzz#M<`%|gl|z@4si3B&ny8i)pGuD1NZZKWz@vOWarly4U0s!bEK$6Hxq%m@(#k|0-n}`B z9VJ+=(#m+Q=d2g3!y;8$8JqBRtaTcZDs7A?k{cj z_iqfx!Ub#bFqJlDO>A;{N!f|DYs2_q)KHZ+CZP;3Ba5R#`Jwa>l{O{>46h zORQ97W`w?kuV5v01;{20U5sI4TEPmsk4ih^?!)(CE>~$~te10_v%OV~T}-_>-kfEs zEX>rU%b3e}OI6wzr={GbEH71KmpNW+FOFvlmJItXP{H#mB^Ycq9dOTn!09Z=j%qD# z20!jEI1RFp>zjv_Ja`RJW#*J$Fe;Oyp=aWA^7=o1Rlr==9LlfCE2p_64t(=3Id~pT zDZfG$e4Qx^qZ^*QMJoNOWMgw-{z-~Y z^1C()^GrA_Cv-*Cf1KcmPK*;Y(f)CQ z>a$Hr$Y|a{?_f%c35(wsiEaV~aYVC3uIaE6)&R zD+w7-6eS(A0G5pK@cSAl4n-~$^^6mSq9aN|#_|pQ0Mlh0iVS!5`y41PMLti|JWl9| z_9_V(-6!b%m^sLhKDU8_HT|!4#J<=z|CuU$j9s zHV09J$Et6E5>Vup@R*c@q3EcR0DZzfz*L<;sm~Lj)Q~>s#tA)9qLKi8g8wx(8)P!f zQFIq5KZ@KuQV7H959%B8`)}O7beurnBx}vl*2{MuJbwPo^E+E9^C%xY5^6`T-+%b* z+vn8hWD5rK86SjO$_6gqee~rw;DIBbOo!>a-}J+>IxgP0`}sG|zv6zSSO=`%2d~_D z^!4+vzIw)grl_Jq8v2E8NVsn!$7mt;&~JP|;k|1qFwUkbj!KPN}f zj*ZkW1fLU+$%iOJ5dU23G4=?IFKl7*SnvpcsAxnb9tj>&ACR{o7LtNL6g+@QEI%Nz z2U_=udl1(MJD=Pa+@swkFEC-&k+>(g%ebSk2NJnjS+^B?fYiQCyG6EP5RXXQ7TjWd zMqW1t8oMR z{~G-&dF{$o{Ho$0aQIglR}@#TP_K~f1Wb5nT){76S7?{Xi}T=2aRr=`REWdAM8Bli z3=IAy+C}p6B{Ydq4yU|}^b3m17pNDc7cYzqpcMg}3ycvGPN*Y_izC>GVioWNBlxg< zALQfaJ)~AU;SA zH~^as9z$|uc#weKA@n;C4_qF23dzCY0Rn!{58wlMKUs~zv@Umm=qHDV!0AU0oQGec zkL)%9KD(dVM-Ghi5q+}3fxf<8S}z2)LN38RIxMCi=*3};{ZKEpm*{~&QmCaD>yh`3 z^!DIAr1Xm%$CJF%Sa>cC(FmqFOGh1M?XZEv#x z5vv1lCwu$b(QkJ<(N1fVws*I+tp^dKo!UkYcD7+{@{ayi_>a*l?dWQ4%>fy$jn)cP zLGPRF>3}{v(K6c6+0xQz0D-uyR;-2W?`eVG*1i^63+)`)*>(;)r&s|(MhkvU-a6C@ z=VW*LIRf>)yIPyEX81_-DD-ngGuhn-r{uZbW}=zdM0PbdVNGN;8)nw2&9o+RunnDe zwL+(w)=2g=fNzFuumLHvN#58xh)$b(8|jU-2C}Ie95ad-kkK3Q2C}EK0c#*TTN^fUVty3IEwbtWxAJ3dWTUXcC3_MmX z*;5Bj8LWmZQ-?5SqK>E~oBP2Rqv)xn)d?|($ zYNeoptgI-fmqR2qO!!s^%CR!Cvb>Ba!^-7lMLH-rq`Zt@hL@5RWu??oMR_S!s>N zR*IL9r4=RA5=B`FUZU6zJdF~vytJ4oCQD0-@nSNI2{46~s1+;9il{}3k|LsrESwKK zr{<9brFn2(Sdd4{!*XHMg-H}15M*&dE}l!~73ETMi5$f)glD-}jy$&{HwVul z3v+Xb9BMXsnvd`-hsY+23bOHRG8g?avJ^H5&$6jm@|=?FEIdnAn3I*2$;^b81$dT4 z%Ovv)GVx4VZhmHF1|vh3opG2z=#xp$kY^TWW#Ac#!VGE#RU*yEm82_IcV*xbGB;m> zesd&*gqALq8Hs2>{Hk&+3CDfr@(+y4P8`RMlLuJ{&rZ;elZ6s=o}CWA z>|YR7$n*j@mz~HxcI+tosO)&o(W6JOBd{wEM0*`$93^uzk77sV znWv5(;T(~l%s2vWBQgO&8uQ4rfb}zo!)+Nli^ZnR>U~D zPWyycC))>yJIb<79LrgY@XGtZnf`&E>gC%mfY=lke;&N`l&dWe4rR)YXuWx-t{FBx`#Ok^XxIw&|mO;OB3 zcy&OK0*)+SgjXra{A9*{ay!B+{(e>xxevB4q41J8`xHxn;Oyfi!Z$pF!b%kEC3o`> zQ0?XJQCI-M*~3qO?#gruJ3+9UjG+T8F?S1g;X4&mfu!#e?4-uS3zYwJ5FH*q+)v%-~cRxwdA^pwY0TZ7z7>xU;#l!MTFsDYoo$K@lbMISSS%n4IzV=2(Utl5OQ5)2p&R)MZhm(jba)CtPtuNd1zF~8f=X; zWL?ObHNo^?`I_Lx2(#AEg30iRU_4kBwl+98h!G?U2@)wWE0`W64~|+Bgu#E2L93ao zr9ok<1N8uWh#=-_GAwd64!O zB#OWQRsb2iE&vaZg#`r!_%r>nAk3fi<08BYp!<^%A^w=ZEHK>PAG{G{@ak3AD)$_4Ewvw|_zQQ+T1+fB*E(lhVp+S(xl??J< z!CHa)kbXfvgb(Sigr^lBeb@W?_^b@|VfhftN&kT5@aqkk+$l^SG9Y|8www&|Th3Tc z^CkmD-c)Z;U57$nj(LxUu7+*pL%r$VjAdl-ie=Pg(3gPDmN&laqxbq1%a#TB0gt&< zzASXvQtDFDlL62}EF+eV`mKdjvtdhFOQ~L@cd!@arkxBT`qGbHApu@qtNgqeUUW}# zWq>Emlk}JmBAVClo?cPjo?c;Io^(%!2f516gX)2~lgMgKK|ta|78RnIenxH#@HN<28tL*`7&tcGgY!Y4W9YPR_~rZe z_cu-xKms@9e_-*oCRs+eeEohDoE@X`Zx2l6Q2}m{V}z@zKR2}O@tY3{@Onsp8F1HP z1BUz?RcyTCBSU79tME%(a0%r=*7=6SiUE{UttEdOryx3zL&yA57JjH zo|TA^hX<-dp1E~*Hr=qHj5c#NPE}u3| zzHjr=W&vq1CL4;Kgt|)xW76A2#!wep+a`V4Y_Gu>C*|F1^f*9H8k1f5gPpm`$F9F{DPNuO5E0?n1=Z!L>Jb6BMcnr}dw!y0&a@R`P- zxsrU{FdsCB^{ivEUSCigQPLaxk>Watd|ovh6jzd?txll04y-GbUfhHf*GA;yToX`S zN&eKZ02J4T1(33Se^4Az(p!g+;#!D&Sz`u@D@n5585D=*`=EF}Ulz;+#g*i5 zjY3dd6BbuWhk}se8i>4?h7?zluWQUeaSd3UDeH0v#StaF5r-64N96M&Q&3z z#noXwr}V-aq_`TW_B&^?1?=|@ks#R2k2Jonk?H)|)lBFl!z2`4*UZ5IMQ2R6pmVe! z`e90b6}^gIsY7|EF~fS1EhHc_p2uaW>rXSA9#x4t(BKRJ_afKOVlm1-W-@yc9UOKu z=i0*5u|_#u#?Uu&PzFs*pA9~^Ni*h5f&MnXOa~%2G>xt8tu4%EPMgbMsq0OhHR%ZS zyalC-5_r___(m2G)iK9(x|urEIK_CnH+ZTAC0Ma8U3}m3Bu_2YKX)t}ZDNJE$ zfaEPz)7a8_0c1%tnoY3OAhC!}0rbcPMS7^hc?&^eY&4s};A>BwVX(OfgUMm6Ko4Sy z^k;!=w((RGEh;z)r%qp3Sipylw;ttpn$`@V5E5cd#ioA5S!%k5#s>MCAovjz7|sO& zBzc=jFaRW`X`Rf^&*MV}T%Yoeqi;GcGv?7a8u~^CJ}?hV12PD%CCr!l|;4C%L%vQ_BvJ6mdGnE2^>2w-^%M?v<7C#Hm1PMcH z8j=`IGE#%uv<(cbGr5`643H3NlV>X>lk}iA4Lw8M40Z-xG6~+5p0QG(Z-mh}kS0ZN zT%v~fOs%OXzujcLDJ%j|PhWH8IG?IE$rPHHtZ$%>Gq{isd&W2)*5CyqSx+01 z5nSylAk)^=;zFVXNHjh}!jZ@eZs!4-y@%kaDM+TFP9YeOHJpcJJ-s;~dk0_JWCLAo z4K)oaxUsov++7luM3#TAs1`}#8-{EEkbqE~iP0dfEq4);cAuAYBPqrd=;%Q<0W|@n z89=FJG$l-lH1B@77devhCtsdL1;C2(Ab}&KEr9eY zkSv7-!@duhj|VbKF5ka5043hRouf>Vkkt`uLdpaWUOr~nRWc9fYYA(Ie6(=f8Un6uVnMeGqtL1kkH zDu5AmxE@{}lRtU(Og*(}9%1`aPvw=?we()P0Y>^I&d}0gvJ7ThOw*iX8xXtqz_HB2 z>V__G2tWA;$T8;knkE*edYbc=u8Y}~oOU{=w7#`x7@~FGy?cwUpJF~$W4dF&dhzzX z2bGt03|@Ww?W=e19@iu*Z^1<0D=1>qwq5(vPUn`^oa?@N_t|%^UiO@T@>@{>b+r(b z>H6UDto({IT^DbE{_NY!1xdRO?-9+_*MtJ!;|pVUCLKPRRb1OSbp77vS2L4#rX7y7 zo2&(iVkocOlGD=jPzczK>lItgDA-2N8MvcQKb>soXARK@6}ad0T%CNz=#bb6YzgJgb?r(RO0`x1ZDzzw<`P>?WXVM z?gGgK^q;$nx{JS4CHU*@Bw!4v3jQ^>#>Yc60JdEf{4d_Ff%p@^-*7t)F#`IK%;vA) zuO1h-jlT_xRR#ZkvHVy(Miu;<#c*S&(W>BIGnySu7psE*O=2~~p8)@h#`zQAf6_RA z0{ruVj{y5G@XzxX3lM(-{Lc~d#fsf3;a?qza#X=TXN#CCCU@4Wgnwrw%Sl%W|C1n= zLB6wLhbs7IBUw&voGSR|1c_N<*^YC&52=KIb0p0!7AsB11o#(-nPTaVmVM${!1wom zg?|>3W>t%Kw5dXWmWP-w9*t`|Bu-PxC!qgiB+oo67Uy@ULVt#sCjJ=LeoP$Og5(p> zpS1?u4IeXFqQ%F$>Q$n@P)vw_kL!?##r0h|s?eX03S@M}i4%Kzj;lg{h8P!ri0vqd zNviD{XoM0k{)+yLWnyu3Zg1mYcTSj{qY^p)0{%0ORQI2++u>rjGWl=7zw78q8<$Og z5B`RB0ZIRF!2dz%j=u;0@7hwLVs=OT4fu~pwrxs`T{=UJqZ0h%Hzg(pFVN;QRDplE zqk({<3jD#2|DS{ZR8OWS&qF2nyLiw&`0lE}U+7MC7r23QOw}M@h8ylCaK&9zSpa=k z%vIn*IIFY)4$gdM+7eYZVCE9;5{8p18(`qXabh~EvH{wQ9hr{o#j0$;yv3Zwj76$! zz?4PoMN9{kHbBFH>A<#!VZ($WfaXGbnmyM}l?9k#N44YGs*#I+ZzBO*8$_D6LajmG9s%!wyl5I&Bs;~jP`9d|spI`%w#`zO$fZ8~Jf(>8; zUzH7*B@`h31RJ0$~=3+s?rANAZbXHFa${_*Z`)GDU{kJxC*zavH^4? zT_YCSC91Lk%xOZpaMWh+QsHu?e1Z+&BYE(4OJPKkDjPr*(u5yv5`Bf%dyssB4WOF| zslt!Jd#r@M$#E)efS!;L{=P6NNGRNv5~|7uFi?Tj$u>gg0|$as*#N2#7k;qb7jC_D z_wj-(RW^VIBW%maLr1F1lRO|(f{Fov=9-ksn)EeuW-Q*P+6D;Ax7kg#2>*KaR9Js)s`44t!iuD;BCk#c$l!T1!8}<>F;A<*leVzfOMUh*G+Q$h!QIe8Cu|B_y znScz%`aA+k8|hQ4BxKr1A7fyFK7sGXDa=lE3n(3=Poa{K2}GO7u^GTZ`uqTtF4E`X zIAJ6@sU$$3wNhCAuM7IP_`Lv1k0PHh>K-TbAxH~xWb_rmzd`UAP`IP$DNy>b*P^Il zoB(~41XAZ`*g!xZw6+u72g(4cQ=%ls)iDAVsN?^3%m{W#lv{~zj1yBu=}H3B375mp z1CyX3SHB;CnvB#L8Yd=+4k!tv4vKD@tW@U(P*adPZR3QtC|*g(sLNM>fV~T*DAjoa zlp*AegO{og+m(Iy8K4o;Ne;Ufe3ZR@)OvCU0w@{b>HjVGpnsJqe);-*Zqm9{qGgVN zJ@SR3KG-Gr=bs;6sY;6u60LAqXa>nQ3@Ngyq7(%zEq+ISce5pP&-&GY-cHu$C?&Q4 z+^fH#F+82AxjN)7?2`B6FJBDTp4zz~BE-+#0oEWIgS$sp2fU7-gTn!mz(W33O)VW= z{mIKW#6KB>jqmUE6d&0o-Wn0;4GZe5E#^WdYS@&(7&dacg%Kz@Fl3E|9PyCP+IZ%y z*>mioc%iY!0#o2ku5+}#SkYN(m7~cRl5jbpgm6>kmSO_6u zu9J(q=d$I#q1%&_cExUqideJSUxf0CF83bKbm`{myaa-+Kf@W|eA3pRGSzt291F{Z z_Kwc(Udvam3Jl$_b#qjBXz=R50DnJpCAttYWV*S)bv(*TxJ7V2X(L@tX3d$0RJC91 zxWw7T)y)lNE^s%@^;al5uA{x3Ex3(e;Z$f$O+y04~=+bp@9VZG_MXem|Y^|*9sev^>oC;ZYO+`gH ztsJ5{v?z+o3TlOPsJ`M%YXwokDVJCER+LlA@iN6RkkZOW>xRn9yQ{&C%PfNk8SvRs zOUWVwa9QI}NZQ<4R@+fVFJqOGO)cQFrI)}AC+y2nR!Wpgdm2kSt00nsQzC0?0H-aZ z7)F4a6l#gIbhxszvxF$&6_eey#nfVI5t+>f-!`>aTGwCPRs%(uMe@?V@*=#5QK+y6 zk)jxAS!-KacM-jaT_~;UD=I83U>3mqEL>M88>l~5TSzP96v)nW6cpsM^2uX3IKzno zS;@un-U4a?FQ05V2ay#F2&zD-%!qth-9STqJ|mxO z3~7FUMQ&~mI|m}XK$0sT=xxZQ<#Kc6W&MTO+-y>!4j(+8BP+OcJ|99Y_}OG#UpA4= z&VmV35M;}1E}zS0!a2k=Wno#|ObAQ>ewMuDQhpX4&gFe)GBbD?tgic1nLiM(kb1M&8c^ZEvW zNA`4qgO)t42f3YOl9Fr95>1Ibd$>`eE`cC6#Ou9Yg?MEnRfx9@@rrKV%tO4U3;Dn! zkF!wXr{1fVQGNZxAb~dloU0dlQK{UEU5FKhSS`1j5UcufBVxrPR^9Ci#A?4%4t2>h zDt!>M^G+6Gj$Fxsn;Xq4tgT$H4}9{{WyCJHHVnSm-!clS&K4#3I@+is(cK;-I&-ZJ zysN+DRW#Qe3tO=!X}^~(%=c5s`)8H)U9SP}=KJij#-_X&uceWt{THw2`P(m0mMD>E zNM4rTyp|)8WEG!nuS)iF4LsI;{_@Rx4?b^*aI$5?-f`r;3{+&y&p#_fBbKmGQfZ{EIseKXE} zIn3rmQUtiS=Q$N^0}ZJGZh=SI`z~C)b?4z1&tJZJ_v4SRU%maJ#BGH$3ze*uuqDs3 zOPhzfayPiI*xuMRbm{u7dyk)d`~8n^UjO*|&G&uB*ZPL|S!%(JD0097{3^4!VPL2( z!Fy>$VSC^3l^eGneE!w9KfM1D`|+38Z*CQD6NLnOSQuzA;dRm^U*^~MUg|g*1#7hM}Vf1Fv$T*?5VzTS>P#5@J#D*q_1wOdUTd?Px(6>_xdC_g)r%kA{`lt2 z`-j!LgVwEIVL4eF&DedN5#r&ORB^WJ{KYHK`j^jN{_y7Qn|I%J9*GR!8tgP(7aE^T zd9e`|vch7=YquUgdiKpf-$Ic$*YaaSqocg%>T9r|8~J_hxt?KY=JT&!yn6fg-Mc6C z$>CdLf@}?SAd5ET)xg!;@O=OABZ&Svn7(;q{Cc;Udg?UTz31(NXD?o(EAEu+joKOO zH{U>$1)HAy1V!I})0(z%$L@&5Mmpdook#ig^`)%%n4}o*S$Y~Ym^S`hynj!y)g)~; z>|Y{vhUbJsWt=*gp*r}yvAKCqv^U!Ih8Fo}@_p$q_+`$vNX#x7zfB!>sVZdrcn z$wRx?yXZT~G|5idPFg&<8KEb>Yjo$H@}yj}a*7#Go;(px#4~n~+b4lcu`?cvm!%dS zI~>o5XYU}h5AVQsu(t#50Z{IcrAzYSchG?$-zM)8O;Ms#I{T0c2<ywd(_`o{+-UjE!X)rWLTC@bWwbo5Dj}L0 z%@@m)i{cU68L{E4GFq%DmLD&SM$9lJ6N1>pTHs*J0!JsgMGL{BIHC3=Di>FBOsp=3 z3~z{+TbqJ-Nu|k%w+iug*VXS()>jq}JaQ8q{(9v(HC0fbJijDbtRYsIB34aVE-JRY zA`|{ocq7)SGsh7trSd3Zg(6mJLlR=8S0zDZ^60~kh?&v24KYir-;rksa8M%i-ZS7aX)+pE0-DXWiMOyYik(WvJwcEwwZTr%T(#0;;%MN}* z(i)LDZ089`dJf&-%iQ|Ydykjo?-N-&ZT=II4z9I;jqL&XD&S^bMDMGuEkC)=-hOq# ze?sZi^``dPs8}Z4-t!%)1$6~G+!ii9Fab&*=U6VXW}uP>#cc7jxa92ms>1;bmux=! zS1A2QXUa;aDOlygEmA5TIujvut4?K*XNH%MQmT^I#ms>1Sk!Qi}jtW$zuSN z%Ix$Z+y-{AB9usqc~IR)*Z8};hkCjb?i@E+h?kq2E6o)k3wo#SZiL&Y`#Nv8o^{%cVp5R^Nxj;~nGt-$|!a?}x^3iXN%hJ^@F5p`vSFT#Z zTB0ySeCN?nU*}*?=Oz3lawq>4PAn&g34|``5~*{Pn|rVm-wAgl*LXRy97%f=fkAbW zdPg_~csX(%iN*3IVJ?f=ixt{P;3x|SbPidpzF6+Lb}_gQ$02sHG-COxWs4W_7hw+M z@*oGc18Hvs#fe4IC7WEs7CG=72z%1s&z@7rLW=pq$I68o3JDF`n=t^6*EyG6c60yjJuwgDFt?39w zZKXDgHb>dpun@~TY$36bZLQEnXt_}Cxy5fG6V4&t%Nn!hS}9Br-&($Ev%NLlnqwui z4qt9%$+INq|MYrNw${ixzz^4bPLjf)=$}cfwlRDQ2OK|GPKZxlEg_gCK7++{+6~j~$6A(Vo+%d*u8lhK{V@Oyi(p!Ekra z7x`^V+Z$*OZB4+?2gme{XP{;kUP89;nXQlbNZRH_hGwA?F!W{fHapXW3-r;WP}qVu z@|Cq`RH`I#x$$)O%!wF!`H0UfYrEOHG%AiTRQRQ(OW6MGBM}P>7bZ-=(5I#A7YH4N zMr?3wX~3uR!{Wd_IaxcLjm*|egwP*4_PAI$FPa5W8#sc{xXhB&Ri@M3PECN&Tawia z9NmPInN$>3_I#7Ik!9=z2>rG$+QEL=A`=d@J%#eZZz6)8Kj^d2+jD^~YHuQfKF*7@ z_wsd|&LRjv(AR(JjsBx`r@OPim$^E4`DXp^&gj*nYn%dC*&0y6z4r+~pH*yi5rulq z94yY5^hqL8TuJ_rIa8lGPQExg3+{mhHH<1+x6agLjI-C8I!^6(oxq#d+aUw?%BY}?ZG0E*P{6|jr24)3Qt{3f# z^b;*dRT*gUm^2K0di=4s(pL|g4sTs8@^!Z}H=d%S#)hh3Dc;x{#ZO-irtb>#6L~FO z06svd3R;H+Ib&}>e0QTXc|(9`xwF;m>65g1EE*2c_Qcp**{g@m$6|w_E_-tmLtS+? z9rYYgO+#6Gpz)=S3ua9-7{3oQ%1@PtHV2A)oUP6NxCI!7p=;-ve9~M9t2<{+|D>%o zMLMn>?1RZ+G6)C+lhUYEf`H&69D~3Z9YPz^!Za}rOdV5u1EE(uE`%RJ1QUucqCzng zhWgNW1|+zjHf^erk>QjnlP6Cy(AU$|)zQ|{)X-3ekSGXgLRXAm09bTqzQw$G=E&DJ zdzPuGiHY%y>2R&#l<^Cd*FhlGA6KDP;0Z!arWpfT7k z4v~(iA-Fr`%^?-hBl&b|%I5)h23CF6mr(4}Q!s+(X8yp zdamffDxVQ*okl~i45LKAR*aM-pAPsBTRwhJ1gkbgZ&<%`%eL65K9TWa5TU&tmH+uELMIC|NS1Up>O!#?*ZZBfAL`)7ytV`jN8cneh>fs y9{${4`tSGfr|$u5#-E?f{|ODaGKP--+rC!{B?EO~>pmTbUiIlf^z2Uu<$nQOKjGv6 literal 0 HcmV?d00001 diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py new file mode 100644 index 00000000..55c9853e --- /dev/null +++ b/calibre-plugin/jobs.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Jim Miller' +__copyright__ = '2011, Grant Drake ' +__docformat__ = 'restructuredtext en' + +import time, os, traceback + +from ConfigParser import SafeConfigParser +from StringIO import StringIO + +from calibre.utils.ipc.server import Server +from calibre.utils.ipc.job import ParallelJob +from calibre.utils.logging import Log + +from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload, + OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY) +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_update_data + +# ------------------------------------------------------------------------------ +# +# Functions to perform downloads using worker jobs +# +# ------------------------------------------------------------------------------ + +def do_download_worker(book_list, options, + cpus, notification=lambda x,y:x): + ''' + Master job, to launch child jobs to extract ISBN for a set of books + This is run as a worker job in the background to keep the UI more + responsive and get around the memory leak issues as it will launch + a child job for each book as a worker process + ''' + server = Server(pool_size=cpus) + + print(options['version']) + total = 0 + # Queue all the jobs + print("Adding jobs for URLs:") + for book in book_list: + if book['good']: + print("%s"%book['url']) + total += 1 + args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', + 'do_download_for_worker', + (book,options)] + job = ParallelJob('arbitrary', + "url:(%s) id:(%s)"%(book['url'],book['calibre_id']), + done=None, + args=args) + job._book = book + # job._book_id = book_id + # job._title = title + # job._modified_date = modified_date + # job._existing_isbn = existing_isbn + server.add_job(job) + + # This server is an arbitrary_n job, so there is a notifier available. + # Set the % complete to a small number to avoid the 'unavailable' indicator + notification(0.01, 'Downloading FanFiction Stories') + + # dequeue the job results as they arrive, saving the results + count = 0 + while True: + job = server.changed_jobs_queue.get() + # A job can 'change' when it is not finished, for example if it + # produces a notification. Ignore these. + job.update() + if not job.is_finished: + continue + # A job really finished. Get the information. + output_book = job.result + #print("output_book:%s"%output_book) + book_list.remove(job._book) + book_list.append(job.result) + book_id = job._book['calibre_id'] + #title = job._title + count = count + 1 + notification(float(count)/total, 'Downloaded Story') + # Add this job's output to the current log + print('Logfile for book ID %s (%s)'%(book_id, job._book['title'])) + print(job.details) + + if count >= total: + # All done! + break + + server.close() + + # return the book list as the job result + return book_list + +def do_download_for_worker(book,options): + ''' + Child job, to extract isbn from formats for this specific book, + when run as a worker job + ''' + try: + book['comment'] = 'Download started...' + + ffdlconfig = SafeConfigParser() + ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) + ffdlconfig.readfp(StringIO(options['personal.ini'])) + + adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform']) + adapter.is_adult = book['is_adult'] + adapter.username = book['username'] + adapter.password = book['password'] + + story = adapter.getStoryMetadataOnly() + writer = writers.getWriter(options['fileform'],adapter.config,adapter) + + outfile = book['outfile'] + + ## No need to download at all. Shouldn't ever get down here. + if options['collision'] in (CALIBREONLY): + print("Skipping CALIBREONLY 'update' down inside worker--this shouldn't be happening...") + book['comment'] = 'Metadata collected.' + + ## checks were done earlier, it's new or not dup or newer--just write it. + elif options['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \ + ('epub_for_update' not in book and options['collision'] in (UPDATE, UPDATEALWAYS)): + print("write to %s"%outfile) + writer.writeStory(outfilename=outfile, forceOverwrite=True) + book['comment'] = 'Download %s completed, %s chapters.'%(options['fileform'],story.getMetadata("numChapters")) + + ## checks were done earlier, just update it. + elif 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS): + + # update now handled by pre-populating the old images and + # chapters in the adapter rather than merging epubs. + urlchaptercount = int(story.getMetadata('numChapters')) + (url,chaptercount, + adapter.oldchapters, + adapter.oldimgs) = get_update_data(book['epub_for_update']) + + print("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)) + print("write to %s"%outfile) + + writer.writeStory(outfilename=outfile, forceOverwrite=True) + + book['comment'] = 'Update %s completed, added %s chapters for %s total.'%\ + (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount) + + except NotGoingToDownload as d: + book['good']=False + book['comment']=unicode(d) + book['icon'] = d.icon + + except Exception as e: + book['good']=False + book['comment']=unicode(e) + book['icon']='dialog_error.png' + print("Exception: %s:%s"%(book,unicode(e))) + traceback.print_exc() + + #time.sleep(10) + return book diff --git a/calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt b/calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt new file mode 100644 index 00000000..e69de29b diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 00000000..e72999f4 --- /dev/null +++ b/cron.yaml @@ -0,0 +1,10 @@ +cron: +- description: cleanup job + url: /r3m0v3r + schedule: every 2 hours + +# There's a bug in the Python 2.7 runtime that prevents this from +# working properly. In theory, there should never be orphans anyway. +#- description: orphan cleanup job +# url: /r3m0v3rOrphans +# schedule: every 4 hours diff --git a/css/index.css b/css/index.css new file mode 100644 index 00000000..eae546b7 --- /dev/null +++ b/css/index.css @@ -0,0 +1,73 @@ +body +{ + font: 0.9em "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif; +} + +#main +{ + width: 60%; + margin-left: 20%; + background-color: #dae6ff; + padding: 2em; +} + +#greeting +{ +# margin-bottom: 1em; + border-color: #efefef; +} + + + +#logpassword:hover, #logpasswordtable:hover, #urlbox:hover, #typebox:hover, #helpbox:hover, #yourfile:hover +{ + border: thin solid #fffeff; +} + +h1 +{ + text-decoration: none; +} + +#logpasswordtable +{ + padding: 1em; +} + +#logpassword, #logpasswordtable { +// display: none; +} + +#urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile +{ + margin: 1em; + padding: 1em; + border: thin dotted #fffeff; +} + +div.field +{ + margin-bottom: 0.5em; +} + +#submitbtn +{ + padding: 1em; +} + +#typelabel +{ +} + +#typeoptions +{ + margin-top: 0.5em; +} + +#error +{ + color: #f00; +} +.recent { + font-size: large; +} diff --git a/defaults.ini b/defaults.ini new file mode 100644 index 00000000..858f6875 --- /dev/null +++ b/defaults.ini @@ -0,0 +1,508 @@ +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +[defaults] + +## [defaults] section applies to all formats and sites but may be +## overridden at several levels + +## All available titlepage_entries and the label used for them: +## _label:
      +

      + FanFictionDownLoader +

      + +
      + + +
      + +
      + +
      +

      Edit Config

      +
      + Editing configuration for {{ nickname }}. +
      +
      + +
      +
      + +
      + +
      +
      + +
      +

      Default System configuration

      +
      +{{ defaultsini }}
      +
      +
      + +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © Fanficdownloader team +
      + +
      + + +
      +
      + + diff --git a/epubmerge.py b/epubmerge.py new file mode 100644 index 00000000..f7e76b8c --- /dev/null +++ b/epubmerge.py @@ -0,0 +1,25 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if __name__ == "__main__": + print(''' +The this utility has been split out into it's own project. +See: http://code.google.com/p/epubmerge/ +...for a CLI epubmerge.py program and calibre plugin. +''') diff --git a/example.ini b/example.ini new file mode 100644 index 00000000..67392708 --- /dev/null +++ b/example.ini @@ -0,0 +1,40 @@ +## This is an example of what your personal configuration might look +## like. + +[defaults] +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Most common, I expect will be using this to save username/passwords +## for different sites. +[www.twilighted.net] +#username:YourPenname +#password:YourPassword + +[www.ficwad.com] +#username:YourUsername +#password:YourPassword + +[www.adastrafanfic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. +#is_adult:true + +## The [defaults] section here will override the system [defaults], +## but not format, site for site:format sections. +[defaults] +## Directories only useful in commandline or zip files. +#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext} +#output_filename: books/${site}/${authorId}/${title}-${storyId}${formatext} + +## For example, zip_output here will turn on zip for html and txt, but +## not epub because the system [epub] section explicitly says +## zip_output: false (epubs *are* specially formated zip files.) +#zip_output: true +#zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip + +## This section will override anything in the system defaults or other +## sections here. +[overrides] diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py new file mode 100644 index 00000000..4b17b853 --- /dev/null +++ b/fanficdownloader/BeautifulSoup.py @@ -0,0 +1,2014 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.2.0" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
      (No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

      FooBar *

      * should pop to 'p', not 'b'. +

      FooBar *

      * should pop to 'table', not 'p'. +

      Foo

      Bar *

      * should pop to 'tr', not 'p'. + +

      • *
      • * should pop to 'ul', not the first 'li'. +
    4. ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
      ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

      tag should implicitly close the previous

      tag. + +

      Para1

      Para2 + should be transformed into: +

      Para1

      Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

      tag should _not_ implicitly close the previous +
      tag. + + Alice said:
      Bob said:
      Blah + should NOT be transformed into: + Alice said:
      Bob said:
      Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
      , + but not close a tag in another table. + +
      BlahBlah + should be transformed into: +
      BlahBlah + but, + Blah
      Blah + should NOT be transformed into + Blah
      Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + + + + + + + + +
      +

      + FanFictionDownLoader +

      + + +
      +
      + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
      + +
      + Ebook format   +
      + +
      + +
      + + + +
      + + + +
      +
      + +

      + Login and Password +

      +
      + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
      +
      +
      +
      Login
      +
      +
      + +
      +
      Password
      +
      +
      +
      +
      + + +
      + + +
      + +
      +
      + Few things to know, which will make your life substantially easier: +
        +
      1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
      2. +
      3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
      4. +
      5. Paste a URL of the first chapter of the fanfic, not the index page
      6. +
      7. Fics with a single chapter are not supported (you can just copy and paste it)
      8. +
      9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
      10. +
      11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
      12. +
      13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
      14. +
      15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
      16. +
      17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
      18. +
      + Otherwise, just have fun, and if you want to say thank you — use the email above. +
      +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © Fanficdownloader team +
      + +
      + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..43ec8f2b --- /dev/null +++ b/index.html @@ -0,0 +1,352 @@ + + + + + FanFictionDownLoader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + + + + + + +
      +

      + FanFictionDownLoader +

      + +
      + + +
      + + {{yourfile}} + + + {% if authorized %} +
      +
      +
      +

      Hi, {{ nickname }}! This is FanFictionDownLoader, which makes reading stories from various websites + much easier.

      +
      + +

      New Feature Added

      +

      + You can now see a list of downloaded fanfics by all + users by most popular + or most recent. +

      +

      + Questions? Check out our + FAQs. +

      +

      + If you have any problems with this application, please + report them in + the FanFictionDownLoader Google Group. The + Previous Version is also available for you to use if necessary. +

      +
      + {{ error_message }} +
      +
      + +
      +
      URL:
      +
      +
      Ebook format
      +
      + EPub + HTML + Plain Text + Mobi(Kindle) +
      +
      +
      + +

      For most readers, including Sony Reader, Nook and iPad, use EPub.

      +
      +
      +
      +

      + Customize your User Configuration. +

      +

      + Or see your personal list of previously downloaded fanfics. +

      +

      + See a list of downloaded fanfics by all users by most popular or most recent. +

      +
      + + {% else %} +
      +
      +

      + This is a FanFictionDownLoader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so FanFictionDownLoader can remember your fanfics and store them. +

      +

      Login using Google account

      +
      +
      + {% endif %} + +
      +

      + FanFictionDownLoader calibre Plugin +

      + + There's now a version of this downloader that runs + entirely inside the + popular calibre + ebook management package as a plugin. + +

      + + Once you have calibre installed and running, inside + calibre, you can go to 'Get plugins to enhance calibre' or + 'Get new plugins' and + install FanFictionDownLoader. + +

      +
      +
      +

      Supported sites:

      +

      + There's a + Supported + Sites page in our wiki. If you have a site you'd like + to see supported, please check there first. +

      +
      +
      fictionalley.org
      +
      + Use the URL of the story's chapter list, such as +
      http://www.fictionalley.org/authors/drt/DA.html. +
      Or a chapter URL (or one-shot text), such as +
      http://www.fictionalley.org/authors/drt/JOTP01a.html. +
      Both will work for both chaptered and one-shot stories now. +
      +
      fanfiction.net
      +
      + Use the URL of any story chapter, with or without story title such as +
      http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
      http://www.fanfiction.net/s/2345466/3/. +
      +
      fictionpress.com
      +
      + Use the URL of any story chapter, such as +
      http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
      http://www.fictionpress.com/s/2847338/6/. +
      +
      twilighted.net
      +
      + Use the URL of the start of the story, such as +
      http://twilighted.net/viewstory.php?sid=8422. +
      +
      twiwrite.net
      +
      + Use the URL of the start of the story, such as +
      http://twiwrite.net/viewstory.php?sid=427. +
      +
      ficwad.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.ficwad.com/story/74884. +
      Note that this is changed from the previous version. The system will still accept chapter URLs, however. +
      +
      harrypotterfanfiction.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
      +
      potionsandsnitches.net
      +
      + Use the URL of the story's chapter list, such as +
      http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
      +
      mediaminer.org
      +
      + Use the URL of the story's chapter list, such as +
      http://www.mediaminer.org/fanfic/view_st.php/166653. +
      Or the story URL for one-shots, such as +
      http://www.mediaminer.org/fanfic/view_st.php/167618 or +
      http://www.mediaminer.org/fanfic/view_ch.php/1234123/123444#fic_c +
      +
      adastrafanfic.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.adastrafanfic.com/viewstory.php?sid=854. +
      +
      whofic.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.whofic.com/viewstory.php?sid=16334. +
      +
      thewriterscoffeeshop.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.thewriterscoffeeshop.com/library/viewstory.php?sid=2110. +
      +
      fanfiction.tenhawkpresents.com
      +
      + Use the URL of the story's chapter list, such as +
      http://fanfiction.tenhawkpresents.com/viewstory.php?sid=294. +
      +
      castlefans.org
      +
      + Use the URL of the story's chapter list, such as +
      http://castlefans.org/fanfic/viewstory.php?sid=123. +
      +
      fimfiction.net
      +
      + Use the URL of the story's chapter list, such as +
      http://www.fimfiction.com/story/123/ +
      or the URL of any chapter, such as +
      http://www.fimfiction.com/story/123/1/. +
      +
      tthfanfic.org
      +
      + Use the URL of any story, with or without chapter, title and notice, such as +
      http://www.tthfanfic.org/Story-5583 +
      http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm. +
      http://www.tthfanfic.org/T-99999999/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm. +
      +
      www.siye.co.uk
      +
      + Use the URL of the story's chapter list, such as +
      http://www.siye.co.uk/siye/viewstory.php?sid=123. +
      +
      archiveofourown.org
      +
      + Use the URL of the story, or one of it's chapters, such as +
      http://archiveofourown.org/works/76366. +
      http://archiveofourown.org/works/76366/chapters/101584. +
      +
      ficbook.net(Russian)
      +
      + Use the URL of the story, or one of it's chapters, such as +
      http://ficbook.net/readfic/93626. +
      http://ficbook.net/readfic/93626/246417#part_content. +
      +
      fanfiction.mugglenet.com
      +
      + Use the URL of the story's chapter list, such as +
      http://fanfiction.mugglenet.com/viewstory.php?sid=123. +
      +
      www.hpfandom.net
      +
      + Use the URL of the story's chapter list, such as +
      http://www.hpfandom.net/eff/viewstory.php?sid=123. +
      +
      thequidditchpitch.org
      +
      + Use the URL of the story's chapter list, such as +
      http://thequidditchpitch.org/viewstory.php?sid=123. +
      +
      fanfiction.portkey.org
      +
      + Use the URL of the story's chapter list, such as +
      http://fanfiction.portkey.org/story/123. +
      +
      nfacommunity.com
      +
      + Use the URL of the story's chapter list, such as +
      http://nfacommunity.com/viewstory.php?sid=1654. +
      +
      www.midnightwhispers.ca
      +
      + Use the URL of the story's chapter list, such as +
      http://www.midnightwhispers.ca/viewstory.php?sid=1124. +
      +
      ksarchive.com
      +
      + Use the URL of the story's chapter list, such as +
      http://ksarchive.com/viewstory.php?sid=1124. +
      +
      archive.skyehawke.com
      +
      + Use the URL of the story's summary, such as +
      http://archive.skyehawke.com/story.php?no=17466. +
      +
      +

      + A few additional things to know, which will make your life substantially easier: +

      +
        +
      1. + First thing to know: We do not use your Google login and password. In fact, all we know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
      2. +
      3. + + Small post written by Roman + — how to read fiction in Stanza or any other ebook reader. +
      4. +
      5. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
      6. +
      7. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
      8. +
      9. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
      10. +
      11. + If you think that something that should work in fact doesn't, post a message to + our Google Group. we also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
      12. +
      + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
      +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © FanFictionDownLoader team +
      + +
      + + +
      +
      + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..16bcaefe --- /dev/null +++ b/index.yaml @@ -0,0 +1,33 @@ +indexes: + +# AUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadData + properties: + - name: download + - name: index + +- kind: DownloadMeta + properties: + - name: user + - name: date + direction: desc + +- kind: DownloadedFanfic + properties: + - name: cleared + - name: date + +- kind: DownloadedFanfic + properties: + - name: user + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
      " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "
      ", "
      " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + +

      +

      + FanFictionDownLoader +

      + +
      + + +
      + + {% if fic.failure %} +
      + {{ fic.failure }} +
      + {% endif %} +
      + + +
      + + {% if is_login %} + +

      Login and Password

      +
      + {{ site }} requires a Login/Password for this story. + You need to provide your Login/Password for {{ site }} + to download it. +
      +
      +
      Login
      +
      +
      + +
      +
      Password
      +
      +
      + + {% else %} + + + +
      +
      Are you an Adult?
      +
      + + {% endif %} + +
      + +
      + +
      +
      + +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © FanFictionDownLoader team +
      + +
      + + +
      +
      + + diff --git a/main.py b/main.py new file mode 100644 index 00000000..d6884ddc --- /dev/null +++ b/main.py @@ -0,0 +1,621 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google Inc. +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +logging.getLogger().setLevel(logging.DEBUG) + +import os +from os.path import dirname, basename, normpath +import re +import sys +import zlib +import urllib +import datetime + +import traceback +from StringIO import StringIO +import ConfigParser + +## Just to shut up the appengine warning about "You are using the +## default Django version (0.96). The default Django version will +## change in an App Engine release in the near future. Please call +## use_library() to explicitly select a Django version. For more +## information see +## http://code.google.com/appengine/docs/python/tools/libraries.html#Django" +## Note that if you are using the SDK App Engine Launcher and hit an SDK +## Console page first, you will get a django version mismatch error when you +## to go hit one of the application pages. Just change a file again, and +## make sure to hit an app page before the SDK page to clear it. +#os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' +#from google.appengine.dist import use_library +#use_library('django', '1.2') + +from google.appengine.ext import db +from google.appengine.api import taskqueue +from google.appengine.api import users +#from google.appengine.ext import webapp +import webapp2 +from google.appengine.ext.webapp import template +#from google.appengine.ext.webapp2 import util +from google.appengine.runtime import DeadlineExceededError + +from ffstorage import * + +from fanficdownloader import adapters, writers, exceptions + +class UserConfigServer(webapp2.RequestHandler): + def getUserConfig(self,user): + config = ConfigParser.SafeConfigParser() + + logging.debug('reading defaults.ini config file') + config.read('defaults.ini') + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l and l[0].config: + uconfig=l[0] + #logging.debug('reading config from UserConfig(%s)'%uconfig.config) + config.readfp(StringIO(uconfig.config)) + + return config + +class MainHandler(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if user: + error = self.request.get('error') + template_values = {'nickname' : user.nickname(), 'authorized': True} + url = self.request.get('url') + template_values['url'] = url + + if error: + if error == 'login_required': + template_values['error_message'] = 'This story (or one of the chapters) requires you to be logged in.' + elif error == 'bad_url': + template_values['error_message'] = 'Unsupported URL: ' + url + elif error == 'custom': + template_values['error_message'] = 'Error happened: ' + self.request.get('errtext') + elif error == 'configsaved': + template_values['error_message'] = 'Configuration Saved' + elif error == 'recentcleared': + template_values['error_message'] = 'Your Recent Downloads List has been Cleared' + + filename = self.request.get('file') + if len(filename) > 1: + template_values['yourfile'] = '''''' % (filename, self.request.get('name'), self.request.get('author')) + + self.response.headers['Content-Type'] = 'text/html' + path = os.path.join(os.path.dirname(__file__), 'index.html') + + self.response.out.write(template.render(path, template_values)) + else: + logging.debug(users.create_login_url('/')) + url = users.create_login_url(self.request.uri) + template_values = {'login_url' : url, 'authorized': False} + path = os.path.join(os.path.dirname(__file__), 'index.html') + self.response.out.write(template.render(path, template_values)) + + +class EditConfigServer(UserConfigServer): + def get(self): + self.post() + + def post(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + template_values = {'nickname' : user.nickname(), 'authorized': True} + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l: + uconfig=l[0] + else: + uconfig=None + + if self.request.get('update'): + if uconfig is None: + uconfig = UserConfig() + uconfig.user = user + uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case. + uconfig.put() + try: + config = self.getUserConfig(user) + self.redirect("/?error=configsaved") + except Exception, e: + logging.info("Saved Config Failed:%s"%e) + self.redirect("/?error=custom&errtext=%s"%urlEscape(str(e))) + else: # not update, assume display for edit + if uconfig is not None and uconfig.config: + config = uconfig.config + else: + configfile = open("example.ini","rb") + config = configfile.read() + configfile.close() + template_values['config'] = config + + configfile = open("defaults.ini","rb") + config = configfile.read() + configfile.close() + template_values['defaultsini'] = config + + path = os.path.join(os.path.dirname(__file__), 'editconfig.html') + self.response.headers['Content-Type'] = 'text/html' + self.response.out.write(template.render(path, template_values)) + + +class FileServer(webapp2.RequestHandler): + + def get(self): + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + return + + try: + download = getDownloadMeta(id=fileId) + + name = download.name.encode('utf-8') + + logging.info("Serving file: %s" % name) + + if name.endswith('.epub'): + self.response.headers['Content-Type'] = 'application/epub+zip' + elif name.endswith('.html'): + self.response.headers['Content-Type'] = 'text/html' + elif name.endswith('.txt'): + self.response.headers['Content-Type'] = 'text/plain' + elif name.endswith('.mobi'): + self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' + elif name.endswith('.zip'): + self.response.headers['Content-Type'] = 'application/zip' + else: + self.response.headers['Content-Type'] = 'application/octet-stream' + + self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name + + data = DownloadData.all().filter("download =", download).order("index") + # epubs are all already compressed. + # Each chunk is compress individually to avoid having + # to hold the whole in memory just for the + # compress/uncompress + if download.format != 'epub': + def dc(data): + try: + return zlib.decompress(data) + # if error, assume it's a chunk from before we started compessing. + except zlib.error: + return data + else: + def dc(data): + return data + + for datum in data: + self.response.out.write(dc(datum.blob)) + + except Exception, e: + fic = DownloadMeta() + fic.failure = unicode(e) + + template_values = dict(fic = fic, + #nickname = user.nickname(), + #escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + +class FileStatusServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + + escaped_url=False + + try: + download = getDownloadMeta(id=fileId) + + if download: + logging.info("Status url: %s" % download.url) + if download.completed and download.format=='epub': + escaped_url = urlEscape(self.request.host_url+"/file/"+download.name+"."+download.format+"?id="+fileId+"&fake=file."+download.format) + else: + download = DownloadMeta() + download.failure = "Download not found" + + except Exception, e: + download = DownloadMeta() + download.failure = unicode(e) + + template_values = dict(fic = download, + nickname = user.nickname(), + escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + +class ClearRecentServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + logging.info("Clearing Recent List for user: "+user.nickname()) + q = DownloadMeta.all() + q.filter('user =', user) + num=0 + while( True ): + results = q.fetch(100) + if results: + for d in results: + d.delete() + for c in d.data_chunks: + c.delete() + num = num + 1 + logging.debug('Delete '+d.url) + else: + break + logging.info('Deleted %d instances download.' % num) + self.redirect("/?error=recentcleared") + +class RecentFilesServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + q = DownloadMeta.all() + q.filter('user =', user).order('-date') + fics = q.fetch(100) + logging.info("Recent fetched %d downloads for user %s."%(len(fics),user.nickname())) + + for fic in fics: + if fic.completed and fic.format == 'epub': + fic.escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+str(fic.key())+"&fake=file."+fic.format) + + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + +class AllRecentFilesServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + q = SavedMeta.all() + if self.request.get('bydate'): + q.order('-date') + else: + q.order('-count') + + fics = q.fetch(200) + logging.info("Recent fetched %d downloads for user %s."%(len(fics),user.nickname())) + + sendslugs = [] + + for fic in fics: + ficslug = FicSlug(fic) + sendslugs.append(ficslug) + + template_values = dict(fics = sendslugs, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'allrecent.html') + self.response.out.write(template.render(path, template_values)) + +class FicSlug(): + def __init__(self,savedmeta): + self.url = savedmeta.url + self.count = savedmeta.count + for k, v in savedmeta.meta.iteritems(): + setattr(self,k,v) + +class FanfictionDownloader(UserConfigServer): + def get(self): + self.post() + + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + format = self.request.get('format') + url = self.request.get('url') + + if not url or url.strip() == "": + self.redirect('/') + return + + logging.info("Queuing Download: %s" % url) + login = self.request.get('login') + password = self.request.get('password') + is_adult = self.request.get('is_adult') == "on" + + # use existing record if available. Fetched/Created before + # the adapter can normalize the URL in case we need to record + # an exception. + download = getDownloadMeta(url=url,user=user,format=format,new=True) + + adapter = None + try: + try: + config = self.getUserConfig(user) + except Exception, e: + self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e))) + return + + adapter = adapters.getAdapter(config,url,format) + logging.info('Created an adaper: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password + adapter.is_adult=is_adult + + ## This scrapes the metadata, which will be + ## duplicated in the queue task, but it + ## detects bad URLs, bad login, bad story, etc + ## without waiting for the queue. So I think + ## it's worth the double up. Could maybe save + ## it all in the download object someday. + story = adapter.getStoryMetadataOnly() + + ## Fetch again using normalized story URL. The one + ## fetched/created above, if different, will not be saved. + download = getDownloadMeta(url=story.getMetadata('storyUrl'), + user=user,format=format,new=True) + + download.title = story.getMetadata('title') + download.author = story.getMetadata('author') + download.url = story.getMetadata('storyUrl') + download.put() + + taskqueue.add(url='/fdowntask', + queue_name="download", + params={'id':str(download.key()), + 'format':format, + 'url':download.url, + 'login':login, + 'password':password, + 'user':user.email(), + 'is_adult':is_adult}) + + logging.info("enqueued download key: " + str(download.key())) + + except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e: + download.failure = unicode(e) + download.put() + logging.info(unicode(e)) + is_login= ( isinstance(e, exceptions.FailedToLogin) ) + template_values = dict(nickname = user.nickname(), + url = url, + format = format, + site = adapter.getSiteDomain(), + fic = download, + is_login=is_login, + ) + # thewriterscoffeeshop.com can do adult check *and* user required. + if isinstance(e,exceptions.AdultCheckRequired): + template_values['login']=login + template_values['password']=password + + path = os.path.join(os.path.dirname(__file__), 'login.html') + self.response.out.write(template.render(path, template_values)) + return + except (exceptions.InvalidStoryURL,exceptions.UnknownSite,exceptions.StoryDoesNotExist), e: + logging.warn(unicode(e)) + download.failure = unicode(e) + download.put() + except Exception, e: + logging.error("Failure Queuing Download: url:%s" % url) + logging.exception(e) + download.failure = unicode(e) + download.put() + + self.redirect('/status?id='+str(download.key())) + + return + + +class FanfictionDownloaderTask(UserConfigServer): + + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + fileId = self.request.get('id') + # User object can't pass, just email address + user = users.User(self.request.get('user')) + format = self.request.get('format') + url = self.request.get('url') + login = self.request.get('login') + password = self.request.get('password') + is_adult = self.request.get('is_adult') + + logging.info("Downloading: " + url + " for user: "+user.nickname()) + logging.info("ID: " + fileId) + + adapter = None + writerClass = None + + # use existing record if available. + # fileId should have record from /fdown. + download = getDownloadMeta(id=fileId,url=url,user=user,format=format,new=True) + for c in download.data_chunks: + c.delete() + download.put() + + logging.info('Creating adapter...') + + try: + config = self.getUserConfig(user) + adapter = adapters.getAdapter(config,url,format) + + logging.info('Created an adapter: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password + adapter.is_adult=is_adult + + # adapter.getStory() is what does all the heavy lifting. + # adapter.getStoryMetadataOnly() only fetches enough to + # get metadata. writer.writeStory() will call + # adapter.getStory(), too. + writer = writers.getWriter(format,config,adapter) + download.name = writer.getOutputFileName() + #logging.debug('output_filename:'+writer.getConfig('output_filename')) + logging.debug('getOutputFileName:'+writer.getOutputFileName()) + download.title = adapter.getStory().getMetadata('title') + download.author = adapter.getStory().getMetadata('author') + download.url = adapter.getStory().getMetadata('storyUrl') + download.put() + + allmeta = adapter.getStory().getAllMetadata(removeallentities=True,doreplacements=False) + + outbuffer = StringIO() + writer.writeStory(outbuffer) + data = outbuffer.getvalue() + outbuffer.close() + del outbuffer + #del writer.adapter + #del writer.story + del writer + #del adapter.story + del adapter + + # epubs are all already compressed. Each chunk is + # compressed individually to avoid having to hold the + # whole in memory just for the compress/uncompress. + if format != 'epub': + def c(data): + return zlib.compress(data) + else: + def c(data): + return data + + index=0 + while( len(data) > 0 ): + DownloadData(download=download, + index=index, + blob=c(data[:1000000])).put() + index += 1 + data = data[1000000:] + download.completed=True + download.put() + + smetal = SavedMeta.all().filter('url =', allmeta['storyUrl'] ).fetch(1) + if smetal and smetal[0]: + smeta = smetal[0] + smeta.count += 1 + else: + smeta=SavedMeta() + smeta.count = 1 + + smeta.url = allmeta['storyUrl'] + smeta.title = allmeta['title'] + smeta.author = allmeta['author'] + smeta.meta = allmeta + smeta.date = datetime.datetime.now() + smeta.put() + + logging.info("Download finished OK") + del data + + except Exception, e: + logging.exception(e) + download.failure = unicode(e) + download.put() + return + + return + +def getDownloadMeta(id=None,url=None,user=None,format=None,new=False): + ## try to get download rec from passed id first. then fall back + ## to user/url/format + download = None + if id: + try: + download = db.get(db.Key(id)) + logging.info("DownloadMeta found by ID:"+id) + except: + pass + + if not download and url and user and format: + try: + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is not None and len(q) > 0 ): + logging.debug("DownloadMeta found by user:%s url:%s format:%s"%(user,url,format)) + download = q[0] + except: + pass + + if new: + # NOT clearing existing chunks here, because this record may + # never be saved. + if not download: + logging.debug("New DownloadMeta") + download = DownloadMeta() + + download.completed=False + download.failure=None + download.date=datetime.datetime.now() + + download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID']) + if user: + download.user = user + if url: + download.url = url + if format: + download.format = format + + return download + +def toPercentDecimal(match): + "Return the %decimal number for the character for url escaping" + s = match.group(1) + return "%%%02x" % ord(s) + +def urlEscape(data): + "Escape text, including unicode, for use in URLs" + p = re.compile(r'([^\w])') + return p.sub(toPercentDecimal, data.encode("utf-8")) + +logging.getLogger().setLevel(logging.DEBUG) +app = webapp2.WSGIApplication([('/', MainHandler), + ('/fdowntask', FanfictionDownloaderTask), + ('/fdown', FanfictionDownloader), + (r'/file.*', FileServer), + ('/status', FileStatusServer), + ('/allrecent', AllRecentFilesServer), + ('/recent', RecentFilesServer), + ('/editconfig', EditConfigServer), + ('/clearrecent', ClearRecentServer), + ], + debug=False) diff --git a/makeplugin.py b/makeplugin.py new file mode 100644 index 00000000..e4abac41 --- /dev/null +++ b/makeplugin.py @@ -0,0 +1,38 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from glob import glob + +from makezip import createZipFile + +if __name__=="__main__": + filename="FanFictionDownLoader.zip" + exclude=['*.pyc','*~','*.xcf'] + # from top dir. 'w' for overwrite + createZipFile(filename,"w", + ['plugin-defaults.ini','plugin-example.ini','epubmerge.py','fanficdownloader'], + exclude=exclude) + #from calibre-plugin dir. 'a' for append + os.chdir('calibre-plugin') + files=['about.txt','images',] + files.extend(glob('*.py')) + files.extend(glob('plugin-import-name-*.txt')) + createZipFile("../"+filename,"a", + files,exclude=exclude) diff --git a/makezip.py b/makezip.py new file mode 100644 index 00000000..55a10197 --- /dev/null +++ b/makezip.py @@ -0,0 +1,54 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, zipfile, sys +from glob import glob + +def addFolderToZip(myZipFile,folder,exclude=[]): + folder = folder.encode('ascii') #convert path to ascii for ZipFile Method + excludelist=[] + for ex in exclude: + excludelist.extend(glob(folder+"/"+ex)) + for file in glob(folder+"/*"): + if file in excludelist: + continue + if os.path.isfile(file): + #print file + myZipFile.write(file, file, zipfile.ZIP_DEFLATED) + elif os.path.isdir(file): + addFolderToZip(myZipFile,file,exclude=exclude) + +def createZipFile(filename,mode,files,exclude=[]): + myZipFile = zipfile.ZipFile( filename, mode ) # Open the zip file for writing + excludelist=[] + for ex in exclude: + excludelist.extend(glob(ex)) + for file in files: + if file in excludelist: + continue + file = file.encode('ascii') #convert path to ascii for ZipFile Method + if os.path.isfile(file): + (filepath, filename) = os.path.split(file) + #print file + myZipFile.write( file, filename, zipfile.ZIP_DEFLATED ) + if os.path.isdir(file): + addFolderToZip(myZipFile,file,exclude=exclude) + myZipFile.close() + return (1,filename) + diff --git a/plugin-defaults.ini b/plugin-defaults.ini new file mode 100644 index 00000000..2dedcee0 --- /dev/null +++ b/plugin-defaults.ini @@ -0,0 +1,477 @@ +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +[defaults] + +## [defaults] section applies to all formats and sites but may be +## overridden at several levels + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## All available titlepage_entries and the label used for them: +## _label:
      -

      New Feature Added

      +

      Latest Changes

      - You can now see a list of downloaded fanfics by all - users by most popular - or most recent. + Fix for 'Hide this banner' title issue on Archive of Our + Own. +

      +

      + Changed implementation of fimfiction to use provided API, + thanks to Althaine.

      Questions? Check out our @@ -68,7 +71,7 @@ If you have any problems with this application, please report them in the FanFictionDownLoader Google Group. The - Previous Version is also available for you to use if necessary. + Previous Version is also available for you to use if necessary.

      {{ error_message }} From 3cb92f48cc0619a12ab70107c8a14a16090c2903 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 22 Apr 2012 10:10:08 -0500 Subject: [PATCH 451/482] Added tag FanFictionDownLoader-4.4.8 for changeset 8fc7a6cc2d87 From 0a2cff3469d37f7a60068b774314d35b3c7b78ef Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 25 Apr 2012 21:52:56 -0500 Subject: [PATCH 452/482] Yet more fixes for poor parsing of numeric entities--this time, my poor parsing. --- calibre-plugin/__init__.py | 2 +- fanficdownloader/adapters/adapter_test1.py | 1 + fanficdownloader/htmlcleanup.py | 18 ++++++++++++++---- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index a59cecfe..bd75cf1c 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 15) + version = (1, 5, 16) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 0496227b..c6557370 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -184,6 +184,7 @@ br breaks

      Puella Magi Madoka Magica/魔法少女まどか★マギカ
      br breaks

      +Don't—e;ver—d;o—that—a;gain, 法 é
      horizontal rules
      diff --git a/fanficdownloader/htmlcleanup.py b/fanficdownloader/htmlcleanup.py index 2bf42803..4dfb306c 100644 --- a/fanficdownloader/htmlcleanup.py +++ b/fanficdownloader/htmlcleanup.py @@ -19,12 +19,21 @@ import re def _unirepl(match): "Return the unicode string for a decimal number" - if match.group(1)=='x': + if match.group(1).startswith('x'): radix=16 + s = match.group(1)[1:] else: radix=10 - value = int(match.group(2), radix) - return "%s%s"%(unichr(value),match.group(3)) + s = match.group(1) + try: + value = int(s, radix) + retval = "%s%s"%(unichr(value),match.group(2)) + except: + # This way, at least if there's more of entities out there + # that fail, it doesn't blow the entire download. + print "Numeric entity translation failed, skipping: &#x%s%s"%(match.group(1),match.group(2)) + retval = "" + return retval def _replaceNumberEntities(data): # The same brokenish entity parsing in SGMLParser that inserts ';' @@ -33,7 +42,8 @@ def _replaceNumberEntities(data): # "Don't—ever—do—that—again," becomes # "Don't—e;ver—d;o—that—a;gain," # Also need to allow for 5 digit decimal entities 法 - p = re.compile(r'&#(x?)([0-9]{,5}|[0-9a-fA-F]{,4})([0-9a-fA-F]*?);') + # Last expression didn't allow for 2 digit hex correctly: é + p = re.compile(r'&#(x[0-9a-fA-F]{,4}|[0-9]{,5})([0-9a-fA-F]*?);') return p.sub(_unirepl, data) def _replaceNotEntities(data): From ea43dffe0cfcef3589dfe6301d2499300ab7f5cb Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 25 Apr 2012 21:53:09 -0500 Subject: [PATCH 453/482] Added tag calibre-plugin-1.5.16 for changeset d38c36148dc4 From 3719e9860cbc79525393d2ceda6c9562f168f77b Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Wed, 25 Apr 2012 21:53:28 -0500 Subject: [PATCH 454/482] Added tag CLI-4.4.8a for changeset d38c36148dc4 From f0a1e1d4713604f597fd1e852281885ce2851666 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 8 May 2012 10:43:02 -0500 Subject: [PATCH 455/482] Plugin--Add feature to integrate with Generate Cover plugin by kiwidude. --- calibre-plugin/__init__.py | 2 +- calibre-plugin/about.txt | 2 +- calibre-plugin/config.py | 122 ++++++++++++++++++++++---- calibre-plugin/ffdl_plugin.py | 46 +++++++++- fanficdownloader/adapters/__init__.py | 3 + plugin-defaults.ini | 14 +++ 6 files changed, 166 insertions(+), 23 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index bd75cf1c..73d517f5 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 16) + version = (1, 5, 18) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/calibre-plugin/about.txt b/calibre-plugin/about.txt index 9174f8b1..9ea9cd05 100644 --- a/calibre-plugin/about.txt +++ b/calibre-plugin/about.txt @@ -1,6 +1,6 @@
      -

      Created by Jim Miller, borrowing heavily from Grant Drake's +

      Plugin created by Jim Miller, borrowing heavily from Grant Drake's 'Reading List', 'Extract ISBN' and 'Count Pages' diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py index 5d33643b..35d05a0f 100644 --- a/calibre-plugin/config.py +++ b/calibre-plugin/config.py @@ -9,16 +9,17 @@ __docformat__ = 'restructuredtext en' import traceback, copy -from PyQt4.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QFont, - QTextEdit, QComboBox, QCheckBox, QPushButton, QTabWidget, QVariant) +from PyQt4.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QFont, QWidget, + QTextEdit, QComboBox, QCheckBox, QPushButton, QTabWidget, QVariant, QScrollArea) from calibre.gui2 import dynamic, info_dialog from calibre.utils.config import JSONConfig from calibre.gui2.ui import get_gui from calibre_plugins.fanfictiondownloader_plugin.dialogs \ - import (SKIP, ADDNEW, UPDATE, UPDATEALWAYS, OVERWRITE, OVERWRITEALWAYS, - CALIBREONLY,collision_order) + import (UPDATE, UPDATEALWAYS, OVERWRITE, collision_order) + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.adapters import getSiteDomains from calibre_plugins.fanfictiondownloader_plugin.common_utils \ import ( get_library_uuid, KeyboardConfigDialog ) @@ -53,6 +54,9 @@ all_prefs.defaults['addtolists'] = False all_prefs.defaults['addtoreadlists'] = False all_prefs.defaults['addtolistsonread'] = False +all_prefs.defaults['gc_site_settings'] = {} +all_prefs.defaults['allow_gc_from_ini'] = True + all_prefs.defaults['custom_cols'] = {} # The list of settings to copy from all_prefs or the previous library @@ -67,7 +71,9 @@ copylist = ['personal.ini', 'collision', 'deleteotherforms', 'adddialogstaysontop', - 'includeimages'] + 'includeimages', + 'gc_site_settings', + 'allow_gc_from_ini'] # fake out so I don't have to change the prefs calls anywhere. The # Java programmer in me is offended by op-overloading, but it's very @@ -87,7 +93,8 @@ class PrefsFacade(): self.all_prefs[libraryid] = dict(self._get_copylist_prefs(self.all_prefs)) else: self.all_prefs[libraryid] = dict(self._get_copylist_prefs(self.all_prefs[self.lastlibid])) - self.lastlibid = libraryid + + self.lastlibid = libraryid return self.all_prefs[libraryid] @@ -139,10 +146,15 @@ class ConfigWidget(QWidget): self.personalini_tab = PersonalIniTab(self, plugin_action) tab_widget.addTab(self.personalini_tab, 'personal.ini') - self.list_tab = ListTab(self, plugin_action) - tab_widget.addTab(self.list_tab, 'Reading Lists') + self.readinglist_tab = ReadingListTab(self, plugin_action) + tab_widget.addTab(self.readinglist_tab, 'Reading Lists') if 'Reading List' not in plugin_action.gui.iactions: - self.list_tab.setEnabled(False) + self.readinglist_tab.setEnabled(False) + + self.generatecover_tab = GenerateCoverTab(self, plugin_action) + tab_widget.addTab(self.generatecover_tab, 'Generate Cover') + if 'Generate Cover' not in plugin_action.gui.iactions: + self.generatecover_tab.setEnabled(False) self.columns_tab = ColumnsTab(self, plugin_action) tab_widget.addTab(self.columns_tab, 'Custom Columns') @@ -165,15 +177,15 @@ class ConfigWidget(QWidget): prefs['adddialogstaysontop'] = self.basic_tab.adddialogstaysontop.isChecked() prefs['includeimages'] = self.basic_tab.includeimages.isChecked() - if self.list_tab: + if self.readinglist_tab: # lists - prefs['send_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.list_tab.send_lists_box.text()).split(',')))) - prefs['read_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.list_tab.read_lists_box.text()).split(',')))) + prefs['send_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.readinglist_tab.send_lists_box.text()).split(',')))) + prefs['read_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.readinglist_tab.read_lists_box.text()).split(',')))) # print("send_lists: %s"%prefs['send_lists']) # print("read_lists: %s"%prefs['read_lists']) - prefs['addtolists'] = self.list_tab.addtolists.isChecked() - prefs['addtoreadlists'] = self.list_tab.addtoreadlists.isChecked() - prefs['addtolistsonread'] = self.list_tab.addtolistsonread.isChecked() + prefs['addtolists'] = self.readinglist_tab.addtolists.isChecked() + prefs['addtoreadlists'] = self.readinglist_tab.addtoreadlists.isChecked() + prefs['addtolistsonread'] = self.readinglist_tab.addtolistsonread.isChecked() # personal.ini ini = unicode(self.personalini_tab.ini.toPlainText()) @@ -183,6 +195,16 @@ class ConfigWidget(QWidget): # if they've removed everything, reset to default. prefs['personal.ini'] = get_resources('plugin-example.ini') + # Generate Covers tab + gc_site_settings = {} + for (site,combo) in self.generatecover_tab.gc_dropdowns.iteritems(): + val = unicode(combo.itemData(combo.currentIndex()).toString()) + if val != 'none': + gc_site_settings[site] = val + #print("gc_site_settings[%s]:%s"%(site,gc_site_settings[site])) + prefs['gc_site_settings'] = gc_site_settings + prefs['allow_gc_from_ini'] = self.generatecover_tab.allow_gc_from_ini.isChecked() + # Custom Columns tab colsmap = {} for (col,combo) in self.columns_tab.custcol_dropdowns.iteritems(): @@ -191,7 +213,7 @@ class ConfigWidget(QWidget): colsmap[col] = val #print("colsmap[%s]:%s"%(col,colsmap[col])) prefs['custom_cols'] = colsmap - + def edit_shortcuts(self): self.save_settings() # Force the menus to be rebuilt immediately, so we have all our actions registered @@ -286,7 +308,7 @@ class BasicTab(QWidget): self.includeimages.setToolTip("Download and include images in EPUB stories. This is equivalent to adding:\n\n[epub]\ninclude_images:true\nkeep_summary_html:true\nmake_firstimage_cover:true\n\n ...to the top of personal.ini. Your settings in personal.ini will override this.") self.includeimages.setChecked(prefs['includeimages']) self.l.addWidget(self.includeimages) - + self.l.insertStretch(-1) def set_collisions(self): @@ -372,7 +394,7 @@ class ShowDefaultsIniDialog(QDialog): self.ok_button.clicked.connect(self.hide) self.l.addWidget(self.ok_button) -class ListTab(QWidget): +class ReadingListTab(QWidget): def __init__(self, parent_dialog, plugin_action): self.parent_dialog = parent_dialog @@ -432,6 +454,70 @@ class ListTab(QWidget): self.l.insertStretch(-1) +class GenerateCoverTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + try: + gc_plugin = plugin_action.gui.iactions['Generate Cover'] + gc_settings = gc_plugin.get_saved_setting_names() + except KeyError: + gc_settings= [] + + label = QLabel('The Generate Cover plugin can create cover images for books using various metadata and configurations. If you have GC installed, FFDL can run GC on new downloads and metadata updates. Pick a GC setting by site or Default.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + scrollable = QScrollArea() + scrollcontent = QWidget() + scrollable.setWidget(scrollcontent) + scrollable.setWidgetResizable(True) + self.l.addWidget(scrollable) + + self.sl = QVBoxLayout() + scrollcontent.setLayout(self.sl) + + self.gc_dropdowns = {} + + sitelist = getSiteDomains() + sitelist.sort() + sitelist.insert(0,u"Default") + for site in sitelist: + horz = QHBoxLayout() + label = QLabel(site) + if site == u"Default": + s = "On Metadata update, run Generate Cover with this setting, if not selected for specific site." + else: + s = "On Metadata update, run Generate Cover with this setting for site (%s)."%site + + label.setToolTip(s) + horz.addWidget(label) + dropdown = QComboBox(self) + dropdown.setToolTip(s) + dropdown.addItem('',QVariant('none')) + for setting in gc_settings: + dropdown.addItem(setting,QVariant(setting)) + self.gc_dropdowns[site] = dropdown + if site in prefs['gc_site_settings']: + dropdown.setCurrentIndex(dropdown.findData(QVariant(prefs['gc_site_settings'][site]))) + + horz.addWidget(dropdown) + self.sl.addLayout(horz) + + self.allow_gc_from_ini = QCheckBox('Allow generate_cover_settings from personal.ini to override.',self) + self.allow_gc_from_ini.setToolTip("The INI parameter generate_cover_settings allows you to choose a GC setting based on metadata rather than site,\nbut it's much more complex. generate_cover_settings is ignored when this is off.") + self.allow_gc_from_ini.setChecked(prefs['allow_gc_from_ini']) + self.l.addWidget(self.allow_gc_from_ini) + + self.l.insertStretch(-1) + class OtherTab(QWidget): def __init__(self, parent_dialog, plugin_action): diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index 1c73fc3a..a2d2e811 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -7,11 +7,12 @@ __license__ = 'GPL v3' __copyright__ = '2012, Jim Miller' __docformat__ = 'restructuredtext en' -import time, os, copy, threading +import time, os, copy, threading, re from ConfigParser import SafeConfigParser from StringIO import StringIO from functools import partial from datetime import datetime +from string import Template from PyQt4.Qt import (QApplication, QMenu, QToolButton) @@ -772,7 +773,6 @@ make_firstimage_cover:true epubmi = get_metadata(existingepub,'EPUB') if epubmi.cover_data[1] is not None: db.set_cover(book_id, epubmi.cover_data[1]) - #mi.cover = epubmi.cover_data[1] # set author link if found. All current adapters have authorUrl. if 'authorUrl' in book['all_metadata']: @@ -812,8 +812,48 @@ make_firstimage_cover:true if meta == 'status-I': val = book['all_metadata']['status'] == 'In-Progress' db.set_custom(book_id, val, label=label, commit=False) - + db.commit() + + if 'Generate Cover' in self.gui.iactions: + + gc_plugin = self.gui.iactions['Generate Cover'] + setting_name = None + if prefs['allow_gc_from_ini']: + ffdlconfig = SafeConfigParser() + ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) + ffdlconfig.readfp(StringIO(prefs['personal.ini'])) + adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform']) + + # template => regexp to match => GC Setting to use. + # generate_cover_settings: + # ${category} => Buffy:? the Vampire Slayer => Buffy + for line in adapter.getConfig('generate_cover_settings').splitlines(): + if "=>" in line: + (template,regexp,setting) = map( lambda x: x.strip(), line.split("=>") ) + value = Template(template).substitute(book['all_metadata']).encode('utf8') + print("%s(%s) => %s => %s"%(template,value,regexp,setting)) + if re.search(regexp,value): + setting_name = setting + break + + if setting_name: + print("Generate Cover Setting from generate_cover_settings(%s)"%line) + if setting_name not in gc_plugin.get_saved_setting_names(): + print("GC Name %s not found, discarding! (check personal.ini for typos)"%setting_name) + setting_name = None + + if not setting_name and book['all_metadata']['site'] in prefs['gc_site_settings']: + setting_name = prefs['gc_site_settings'][book['all_metadata']['site']] + + if not setting_name and 'Default' in prefs['gc_site_settings']: + setting_name = prefs['gc_site_settings']['Default'] + + if setting_name: + print("Running Generate Cover with settings %s."%setting_name) + realmi = db.get_metadata(book_id, index_is_id=True) + gc_plugin.generate_cover_for_book(realmi,saved_setting_name=setting_name) + def _get_clean_reading_lists(self,lists): if lists == None or lists.strip() == "" : diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py index e8eeb85d..51c9a0c6 100644 --- a/fanficdownloader/adapters/__init__.py +++ b/fanficdownloader/adapters/__init__.py @@ -100,6 +100,9 @@ def getAdapter(config,url,fileform=None): # No adapter found. raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] ) +def getSiteDomains(): + return [cls.getSiteDomain() for cls in __class_list] + def getClassFor(domain): for cls in __class_list: if cls.matchesSite(domain): diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 2dedcee0..ae955c27 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -127,6 +127,20 @@ extratags: FanFiction ## doesn't work on some devices either.) #replace_hr: false +## If you have the Generate Cover plugin installed, you can use the +## generate_cover_settings parameter to intelligently decide which GC +## setting to run. There are three parts 1) a template of which +## metadata part(s) to look at, 2) a regular expression to match the +## template, and 3) the name of the GC setting to use, which must +## match exactly. Use this parameter in [defaults], or by site eg, +## [www.ficwad.com] +## Make sure to keep at least one space at the start of each line and +## to escape % to %%, if used. +## template => regexp to match => GC Setting to use. +#generate_cover_settings: +# ${category} => Buffy:? [tT]he Vampire Slayer => BuffyCover +# ${category} => Star Trek => StarTrekCover + ## Each output format has a section that overrides [defaults] [html] From 3ccb895c7bc0ce1173d433905e699e1fc82dff5e Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 8 May 2012 10:43:26 -0500 Subject: [PATCH 456/482] Added tag calibre-plugin-1.5.18 for changeset 3beaa4617bd3 From 3d735e430d0bb8c2699bfca6795185fe5fd1a3c4 Mon Sep 17 00:00:00 2001 From: Ida Date: Thu, 10 May 2012 17:38:12 -0400 Subject: [PATCH 457/482] First version of Libraryofmoria.com and Wraithbait.com adapters --- .../adapters/adapter_libraryofmoriacom.py | 250 ++++++++++++++++++ .../adapters/adapter_wraithbaitcom.py | 229 ++++++++++++++++ 2 files changed, 479 insertions(+) create mode 100644 fanficdownloader/adapters/adapter_libraryofmoriacom.py create mode 100644 fanficdownloader/adapters/adapter_wraithbaitcom.py diff --git a/fanficdownloader/adapters/adapter_libraryofmoriacom.py b/fanficdownloader/adapters/adapter_libraryofmoriacom.py new file mode 100644 index 00000000..04c9bf02 --- /dev/null +++ b/fanficdownloader/adapters/adapter_libraryofmoriacom.py @@ -0,0 +1,250 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + + +def getClass(): + return LibraryOfMoriaComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class LibraryOfMoriaComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/a/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','lom') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Lord of the Rings") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%B %d, %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.libraryofmoria.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/a/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/a/viewstory.php?sid=")+r"\d+$" + + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + addurl = "&ageconsent=ok&warning=3" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logging.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/a/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
      etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Type' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warning' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=5')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/a/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_wraithbaitcom.py b/fanficdownloader/adapters/adapter_wraithbaitcom.py new file mode 100644 index 00000000..9e4aa57a --- /dev/null +++ b/fanficdownloader/adapters/adapter_wraithbaitcom.py @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + + +def getClass(): + return WraithBaitComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class WraithBaitComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','wb') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Stargate") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d %b %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.wraithbait.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=12" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "for adults only" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + pt = soup.find('div', {'id' : 'pagetitle'}) + a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + self.story.setMetadata('rating', a.nextSibling) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + info = soup.find('div', {'class' : 'small'}) + + word=info.find(text=re.compile("Word count:")).split(':') + self.story.setMetadata('numWords', word[1]) + + cats = info.findAll('a',href=re.compile(r'browse.php\?type=categories&id=\d')) + for cat in cats: + if "General" != cat.string: + self.story.addToList('category',cat.string) + + chars = info.findAll('a',href=re.compile(r'browse.php\?type=characters&charid=\d')) + for char in chars: + self.story.addToList('characters',char.string) + + completed=info.find(text=re.compile("Completed: Yes")) + if completed != None: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + + # Rated: NC-17
      etc + labels = soup.findAll('span',{'class':'label'}) + pub=0 + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Genres' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Published' in label and pub ==0: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + pub=1 + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + info.extract() + summary = soup.find('div', {'class' : 'content'}) + self.setDescription(url,summary) + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) + From 5ceb96c91343b0d78add59a2c1271ba384d19169 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sat, 12 May 2012 15:58:51 -0500 Subject: [PATCH 458/482] Adding WWOMB(www.squidge.org/peja) adapter. Some infrastructure changes to allow config section [www.squidge.org/peja]. More needed to support other sites under www.squidge.org. --- allrecent.html | 78 + app.yaml | 46 + calibre-plugin/__init__.py | 90 + calibre-plugin/about.txt | 28 + calibre-plugin/common_utils.py | 447 ++ calibre-plugin/config.py | 675 +++ calibre-plugin/dialogs.py | 663 +++ calibre-plugin/ffdl_plugin.py | 1047 ++++ calibre-plugin/images/icon.png | Bin 0 -> 24649 bytes calibre-plugin/images/icon.xcf | Bin 0 -> 63927 bytes calibre-plugin/jobs.py | 163 + ...mport-name-fanfictiondownloader_plugin.txt | 0 cron.yaml | 10 + css/index.css | 73 + defaults.ini | 519 ++ delete_fic.py | 59 + downloader.py | 202 + editconfig.html | 89 + epubmerge.py | 25 + example.ini | 40 + fanficdownloader/BeautifulSoup.py | 2014 ++++++++ fanficdownloader/__init__.py | 1 + fanficdownloader/adapters/__init__.py | 110 + .../adapters/adapter_adastrafanficcom.py | 228 + .../adapters/adapter_archiveofourownorg.py | 263 + .../adapters/adapter_archiveskyehawkecom.py | 190 + .../adapters/adapter_castlefansorg.py | 310 ++ .../adapters/adapter_fanfictionnet.py | 278 ++ .../adapters/adapter_ficbooknet.py | 222 + .../adapters/adapter_fictionalleyorg.py | 231 + .../adapters/adapter_fictionpresscom.py | 49 + .../adapters/adapter_ficwadcom.py | 217 + .../adapters/adapter_fimfictionnet.py | 168 + .../adapter_harrypotterfanfictioncom.py | 201 + .../adapters/adapter_hpfandomnet.py | 234 + .../adapters/adapter_ksarchivecom.py | 306 ++ .../adapters/adapter_mediaminerorg.py | 235 + .../adapters/adapter_midnightwhispersca.py | 289 ++ .../adapters/adapter_mugglenetcom.py | 331 ++ .../adapters/adapter_nfacommunitycom.py | 289 ++ .../adapters/adapter_portkeyorg.py | 278 ++ .../adapters/adapter_potionsandsnitchesnet.py | 209 + fanficdownloader/adapters/adapter_siyecouk.py | 238 + .../adapters/adapter_squidgeorgpeja.py | 234 + .../adapters/adapter_tenhawkpresentscom.py | 246 + fanficdownloader/adapters/adapter_test1.py | 200 + .../adapters/adapter_thequidditchpitchorg.py | 293 ++ .../adapter_thewriterscoffeeshopcom.py | 252 + .../adapters/adapter_tthfanficorg.py | 258 + .../adapters/adapter_twilightednet.py | 250 + .../adapters/adapter_twiwritenet.py | 276 ++ .../adapters/adapter_whoficcom.py | 232 + fanficdownloader/adapters/base_adapter.py | 369 ++ fanficdownloader/chardet/__init__.py | 26 + fanficdownloader/chardet/big5freq.py | 923 ++++ fanficdownloader/chardet/big5prober.py | 41 + fanficdownloader/chardet/chardistribution.py | 200 + .../chardet/charsetgroupprober.py | 96 + fanficdownloader/chardet/charsetprober.py | 60 + .../chardet/codingstatemachine.py | 56 + fanficdownloader/chardet/constants.py | 47 + fanficdownloader/chardet/escprober.py | 79 + fanficdownloader/chardet/escsm.py | 240 + fanficdownloader/chardet/eucjpprober.py | 85 + fanficdownloader/chardet/euckrfreq.py | 594 +++ fanficdownloader/chardet/euckrprober.py | 41 + fanficdownloader/chardet/euctwfreq.py | 426 ++ fanficdownloader/chardet/euctwprober.py | 41 + fanficdownloader/chardet/gb2312freq.py | 471 ++ fanficdownloader/chardet/gb2312prober.py | 41 + fanficdownloader/chardet/hebrewprober.py | 269 + fanficdownloader/chardet/jisfreq.py | 567 +++ fanficdownloader/chardet/jpcntx.py | 210 + .../chardet/langbulgarianmodel.py | 228 + fanficdownloader/chardet/langcyrillicmodel.py | 329 ++ fanficdownloader/chardet/langgreekmodel.py | 225 + fanficdownloader/chardet/langhebrewmodel.py | 201 + .../chardet/langhungarianmodel.py | 225 + fanficdownloader/chardet/langthaimodel.py | 200 + fanficdownloader/chardet/latin1prober.py | 136 + fanficdownloader/chardet/mbcharsetprober.py | 82 + fanficdownloader/chardet/mbcsgroupprober.py | 50 + fanficdownloader/chardet/mbcssm.py | 514 ++ fanficdownloader/chardet/sbcharsetprober.py | 106 + fanficdownloader/chardet/sbcsgroupprober.py | 64 + fanficdownloader/chardet/sjisprober.py | 85 + fanficdownloader/chardet/test.py | 20 + fanficdownloader/chardet/universaldetector.py | 154 + fanficdownloader/chardet/utf8prober.py | 76 + fanficdownloader/configurable.py | 77 + fanficdownloader/epubutils.py | 96 + fanficdownloader/exceptions.py | 69 + fanficdownloader/gziphttp.py | 38 + fanficdownloader/html.py | 126 + fanficdownloader/html2text.py | 452 ++ fanficdownloader/htmlcleanup.py | 478 ++ fanficdownloader/mobi.py | 384 ++ fanficdownloader/story.py | 406 ++ fanficdownloader/translit.py | 57 + fanficdownloader/writers/__init__.py | 38 + fanficdownloader/writers/base_writer.py | 277 ++ fanficdownloader/writers/writer_epub.py | 478 ++ fanficdownloader/writers/writer_html.py | 103 + fanficdownloader/writers/writer_mobi.py | 202 + fanficdownloader/writers/writer_txt.py | 157 + ffstorage.py | 63 + index-ajax.html | 109 + index.html | 359 ++ index.yaml | 28 + js/fdownloader.js | 116 + js/jquery-1.3.2.js | 4376 +++++++++++++++++ login.html | 110 + main.py | 621 +++ makeplugin.py | 38 + makezip.py | 54 + plugin-defaults.ini | 502 ++ plugin-example.ini | 97 + queue.yaml | 7 + readme.txt | 19 + recent.html | 85 + settings.py | 25 + simplejson/__init__.py | 318 ++ simplejson/_speedups.c | 2329 +++++++++ simplejson/decoder.py | 354 ++ simplejson/encoder.py | 440 ++ simplejson/scanner.py | 65 + simplejson/tests/__init__.py | 23 + simplejson/tests/test_check_circular.py | 30 + simplejson/tests/test_decode.py | 22 + simplejson/tests/test_default.py | 9 + simplejson/tests/test_dump.py | 21 + .../tests/test_encode_basestring_ascii.py | 38 + simplejson/tests/test_fail.py | 76 + simplejson/tests/test_float.py | 15 + simplejson/tests/test_indent.py | 41 + simplejson/tests/test_pass1.py | 76 + simplejson/tests/test_pass2.py | 14 + simplejson/tests/test_pass3.py | 20 + simplejson/tests/test_recursion.py | 67 + simplejson/tests/test_scanstring.py | 111 + simplejson/tests/test_separators.py | 42 + simplejson/tests/test_unicode.py | 64 + simplejson/tool.py | 37 + static/ajax-loader.gif | Bin 0 -> 10819 bytes static/favicon.ico | Bin 0 -> 21792 bytes status.html | 94 + utils/__init__.py | 1 + utils/remover.py | 109 + utils/tally.py | 64 + 149 files changed, 35490 insertions(+) create mode 100644 allrecent.html create mode 100644 app.yaml create mode 100644 calibre-plugin/__init__.py create mode 100644 calibre-plugin/about.txt create mode 100644 calibre-plugin/common_utils.py create mode 100644 calibre-plugin/config.py create mode 100644 calibre-plugin/dialogs.py create mode 100644 calibre-plugin/ffdl_plugin.py create mode 100644 calibre-plugin/images/icon.png create mode 100644 calibre-plugin/images/icon.xcf create mode 100644 calibre-plugin/jobs.py create mode 100644 calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt create mode 100644 cron.yaml create mode 100644 css/index.css create mode 100644 defaults.ini create mode 100644 delete_fic.py create mode 100644 downloader.py create mode 100644 editconfig.html create mode 100644 epubmerge.py create mode 100644 example.ini create mode 100644 fanficdownloader/BeautifulSoup.py create mode 100644 fanficdownloader/__init__.py create mode 100644 fanficdownloader/adapters/__init__.py create mode 100644 fanficdownloader/adapters/adapter_adastrafanficcom.py create mode 100644 fanficdownloader/adapters/adapter_archiveofourownorg.py create mode 100644 fanficdownloader/adapters/adapter_archiveskyehawkecom.py create mode 100644 fanficdownloader/adapters/adapter_castlefansorg.py create mode 100644 fanficdownloader/adapters/adapter_fanfictionnet.py create mode 100644 fanficdownloader/adapters/adapter_ficbooknet.py create mode 100644 fanficdownloader/adapters/adapter_fictionalleyorg.py create mode 100644 fanficdownloader/adapters/adapter_fictionpresscom.py create mode 100644 fanficdownloader/adapters/adapter_ficwadcom.py create mode 100644 fanficdownloader/adapters/adapter_fimfictionnet.py create mode 100644 fanficdownloader/adapters/adapter_harrypotterfanfictioncom.py create mode 100644 fanficdownloader/adapters/adapter_hpfandomnet.py create mode 100644 fanficdownloader/adapters/adapter_ksarchivecom.py create mode 100644 fanficdownloader/adapters/adapter_mediaminerorg.py create mode 100644 fanficdownloader/adapters/adapter_midnightwhispersca.py create mode 100644 fanficdownloader/adapters/adapter_mugglenetcom.py create mode 100644 fanficdownloader/adapters/adapter_nfacommunitycom.py create mode 100644 fanficdownloader/adapters/adapter_portkeyorg.py create mode 100644 fanficdownloader/adapters/adapter_potionsandsnitchesnet.py create mode 100644 fanficdownloader/adapters/adapter_siyecouk.py create mode 100644 fanficdownloader/adapters/adapter_squidgeorgpeja.py create mode 100644 fanficdownloader/adapters/adapter_tenhawkpresentscom.py create mode 100644 fanficdownloader/adapters/adapter_test1.py create mode 100644 fanficdownloader/adapters/adapter_thequidditchpitchorg.py create mode 100644 fanficdownloader/adapters/adapter_thewriterscoffeeshopcom.py create mode 100644 fanficdownloader/adapters/adapter_tthfanficorg.py create mode 100644 fanficdownloader/adapters/adapter_twilightednet.py create mode 100644 fanficdownloader/adapters/adapter_twiwritenet.py create mode 100644 fanficdownloader/adapters/adapter_whoficcom.py create mode 100644 fanficdownloader/adapters/base_adapter.py create mode 100644 fanficdownloader/chardet/__init__.py create mode 100644 fanficdownloader/chardet/big5freq.py create mode 100644 fanficdownloader/chardet/big5prober.py create mode 100644 fanficdownloader/chardet/chardistribution.py create mode 100644 fanficdownloader/chardet/charsetgroupprober.py create mode 100644 fanficdownloader/chardet/charsetprober.py create mode 100644 fanficdownloader/chardet/codingstatemachine.py create mode 100644 fanficdownloader/chardet/constants.py create mode 100644 fanficdownloader/chardet/escprober.py create mode 100644 fanficdownloader/chardet/escsm.py create mode 100644 fanficdownloader/chardet/eucjpprober.py create mode 100644 fanficdownloader/chardet/euckrfreq.py create mode 100644 fanficdownloader/chardet/euckrprober.py create mode 100644 fanficdownloader/chardet/euctwfreq.py create mode 100644 fanficdownloader/chardet/euctwprober.py create mode 100644 fanficdownloader/chardet/gb2312freq.py create mode 100644 fanficdownloader/chardet/gb2312prober.py create mode 100644 fanficdownloader/chardet/hebrewprober.py create mode 100644 fanficdownloader/chardet/jisfreq.py create mode 100644 fanficdownloader/chardet/jpcntx.py create mode 100644 fanficdownloader/chardet/langbulgarianmodel.py create mode 100644 fanficdownloader/chardet/langcyrillicmodel.py create mode 100644 fanficdownloader/chardet/langgreekmodel.py create mode 100644 fanficdownloader/chardet/langhebrewmodel.py create mode 100644 fanficdownloader/chardet/langhungarianmodel.py create mode 100644 fanficdownloader/chardet/langthaimodel.py create mode 100644 fanficdownloader/chardet/latin1prober.py create mode 100644 fanficdownloader/chardet/mbcharsetprober.py create mode 100644 fanficdownloader/chardet/mbcsgroupprober.py create mode 100644 fanficdownloader/chardet/mbcssm.py create mode 100644 fanficdownloader/chardet/sbcharsetprober.py create mode 100644 fanficdownloader/chardet/sbcsgroupprober.py create mode 100644 fanficdownloader/chardet/sjisprober.py create mode 100644 fanficdownloader/chardet/test.py create mode 100644 fanficdownloader/chardet/universaldetector.py create mode 100644 fanficdownloader/chardet/utf8prober.py create mode 100644 fanficdownloader/configurable.py create mode 100644 fanficdownloader/epubutils.py create mode 100644 fanficdownloader/exceptions.py create mode 100644 fanficdownloader/gziphttp.py create mode 100644 fanficdownloader/html.py create mode 100644 fanficdownloader/html2text.py create mode 100644 fanficdownloader/htmlcleanup.py create mode 100644 fanficdownloader/mobi.py create mode 100644 fanficdownloader/story.py create mode 100644 fanficdownloader/translit.py create mode 100644 fanficdownloader/writers/__init__.py create mode 100644 fanficdownloader/writers/base_writer.py create mode 100644 fanficdownloader/writers/writer_epub.py create mode 100644 fanficdownloader/writers/writer_html.py create mode 100644 fanficdownloader/writers/writer_mobi.py create mode 100644 fanficdownloader/writers/writer_txt.py create mode 100644 ffstorage.py create mode 100644 index-ajax.html create mode 100644 index.html create mode 100644 index.yaml create mode 100644 js/fdownloader.js create mode 100644 js/jquery-1.3.2.js create mode 100644 login.html create mode 100644 main.py create mode 100644 makeplugin.py create mode 100644 makezip.py create mode 100644 plugin-defaults.ini create mode 100644 plugin-example.ini create mode 100644 queue.yaml create mode 100644 readme.txt create mode 100644 recent.html create mode 100644 settings.py create mode 100644 simplejson/__init__.py create mode 100644 simplejson/_speedups.c create mode 100644 simplejson/decoder.py create mode 100644 simplejson/encoder.py create mode 100644 simplejson/scanner.py create mode 100644 simplejson/tests/__init__.py create mode 100644 simplejson/tests/test_check_circular.py create mode 100644 simplejson/tests/test_decode.py create mode 100644 simplejson/tests/test_default.py create mode 100644 simplejson/tests/test_dump.py create mode 100644 simplejson/tests/test_encode_basestring_ascii.py create mode 100644 simplejson/tests/test_fail.py create mode 100644 simplejson/tests/test_float.py create mode 100644 simplejson/tests/test_indent.py create mode 100644 simplejson/tests/test_pass1.py create mode 100644 simplejson/tests/test_pass2.py create mode 100644 simplejson/tests/test_pass3.py create mode 100644 simplejson/tests/test_recursion.py create mode 100644 simplejson/tests/test_scanstring.py create mode 100644 simplejson/tests/test_separators.py create mode 100644 simplejson/tests/test_unicode.py create mode 100644 simplejson/tool.py create mode 100644 static/ajax-loader.gif create mode 100644 static/favicon.ico create mode 100644 status.html create mode 100644 utils/__init__.py create mode 100644 utils/remover.py create mode 100644 utils/tally.py diff --git a/allrecent.html b/allrecent.html new file mode 100644 index 00000000..477b17b7 --- /dev/null +++ b/allrecent.html @@ -0,0 +1,78 @@ + + + + + FanFictionDownLoader (fanfiction.net, fanficauthors, fictionalley, ficwad to epub and HTML) + + + + +

      +

      + FanFictionDownLoader +

      + + + + + {{yourfile}} + + +
      + {% for fic in fics %} +

      + {{ fic.title }} + by {{ fic.author }} Download Count: {{ fic.count }}
      + Word Count: {{ fic.numWords }} Chapter Count: {{ fic.numChapters }}
      + {% if fic.category %} Categories: {{ fic.category }}
      {% endif %} + {% if fic.genre %} Genres: {{ fic.genre }}
      {% endif %} + {% if fic.language %} Language: {{ fic.language }}
      {% endif %} + {% if fic.series %} Series: {{ fic.series }}
      {% endif %} + {% if fic.characters %} Characters: {{ fic.characters }}
      {% endif %} + {% if fic.status %} Status: {{ fic.status }}
      {% endif %} + {% if fic.datePublished %} Published: {{ fic.datePublished }}
      {% endif %} + {% if fic.dateUpdated %} Last Updated: {{ fic.dateUpdated }}
      {% endif %} + {% if fic.dateCreated %} Last Downloaded: {{ fic.dateCreated }}
      {% endif %} + {% if fic.rating %} Rating: {{ fic.rating }}
      {% endif %} + {% if fic.warnings %} Warnings: {{ fic.warnings }}
      {% endif %} + {% if fic.description %} Summary: {{ fic.description }}
      {% endif %} +

      + {% endfor %} +
      + + + + +
      + + diff --git a/app.yaml b/app.yaml new file mode 100644 index 00000000..a0db5945 --- /dev/null +++ b/app.yaml @@ -0,0 +1,46 @@ +# ffd-retief-hrd fanfictiondownloader +application: fanfictiondownloader +version: 4-4-9 +runtime: python27 +api_version: 1 +threadsafe: true + +handlers: + +- url: /r3m0v3r.* + script: utils.remover.app + login: admin + +- url: /tally.* + script: utils.tally.app + login: admin + +- url: /fdownloadtask + script: main.app + login: admin + +- url: /css + static_dir: css + +- url: /js + static_dir: js + +- url: /static + static_dir: static + +- url: /favicon\.ico + static_files: static/favicon.ico + upload: static/favicon\.ico + +- url: /.* + script: main.app + +#builtins: +#- datastore_admin: on + +libraries: +- name: django + version: "1.2" + +- name: PIL + version: "1.1.7" diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py new file mode 100644 index 00000000..44a7dda2 --- /dev/null +++ b/calibre-plugin/__init__.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +# -*- coding: utf-8 -*- +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Jim Miller' +__docformat__ = 'restructuredtext en' + +# The class that all Interface Action plugin wrappers must inherit from +from calibre.customize import InterfaceActionBase + +## Apparently the name for this class doesn't matter--it was still +## 'demo' for the first few versions. +class FanFictionDownLoaderBase(InterfaceActionBase): + ''' + This class is a simple wrapper that provides information about the + actual plugin class. The actual interface plugin class is called + InterfacePlugin and is defined in the ffdl_plugin.py file, as + specified in the actual_plugin field below. + + The reason for having two classes is that it allows the command line + calibre utilities to run without needing to load the GUI libraries. + ''' + name = 'FanFictionDownLoader' + description = 'UI plugin to download FanFiction stories from various sites.' + supported_platforms = ['windows', 'osx', 'linux'] + author = 'Jim Miller' + version = (1, 5, 20) + minimum_calibre_version = (0, 8, 30) + + #: This field defines the GUI plugin class that contains all the code + #: that actually does something. Its format is module_path:class_name + #: The specified class must be defined in the specified module. + actual_plugin = 'calibre_plugins.fanfictiondownloader_plugin.ffdl_plugin:FanFictionDownLoaderPlugin' + + def is_customizable(self): + ''' + This method must return True to enable customization via + Preferences->Plugins + ''' + return True + + def config_widget(self): + ''' + Implement this method and :meth:`save_settings` in your plugin to + use a custom configuration dialog. + + This method, if implemented, must return a QWidget. The widget can have + an optional method validate() that takes no arguments and is called + immediately after the user clicks OK. Changes are applied if and only + if the method returns True. + + If for some reason you cannot perform the configuration at this time, + return a tuple of two strings (message, details), these will be + displayed as a warning dialog to the user and the process will be + aborted. + + The base class implementation of this method raises NotImplementedError + so by default no user configuration is possible. + ''' + # It is important to put this import statement here rather than at the + # top of the module as importing the config class will also cause the + # GUI libraries to be loaded, which we do not want when using calibre + # from the command line + from calibre_plugins.fanfictiondownloader_plugin.config import ConfigWidget + return ConfigWidget(self.actual_plugin_) + + def save_settings(self, config_widget): + ''' + Save the settings specified by the user with config_widget. + + :param config_widget: The widget returned by :meth:`config_widget`. + ''' + config_widget.save_settings() + + # Apply the changes + ac = self.actual_plugin_ + if ac is not None: + ac.apply_settings() + +# For testing, run from command line with this: +# calibre-debug -e __init__.py +# +if __name__ == '__main__': + from PyQt4.Qt import QApplication + from calibre.gui2.preferences import test_widget + app = QApplication([]) + test_widget('Advanced', 'Plugins') diff --git a/calibre-plugin/about.txt b/calibre-plugin/about.txt new file mode 100644 index 00000000..9ea9cd05 --- /dev/null +++ b/calibre-plugin/about.txt @@ -0,0 +1,28 @@ +
      + +

      Plugin created by Jim Miller, borrowing heavily from Grant Drake's +'Reading List', +'Extract ISBN' and +'Count Pages' +plugins.

      + +

      +Calibre officially distributes plugins from the mobileread.com forum site. +The official distro channel for this plugin is there: FanFictionDownLoader +

      + +

      I also monitor the +general users +group for the downloader. That covers the web application and CLI, too. +

      + +The source for this plugin is available at it's +project home. +
      + +

      +See the list of supported sites. +

      +

      +Read the FAQs. +

      diff --git a/calibre-plugin/common_utils.py b/calibre-plugin/common_utils.py new file mode 100644 index 00000000..19e8697e --- /dev/null +++ b/calibre-plugin/common_utils.py @@ -0,0 +1,447 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Grant Drake ' +__docformat__ = 'restructuredtext en' + +import os +from PyQt4 import QtGui +from PyQt4.Qt import (Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout, + QTableWidgetItem, QFont, QLineEdit, QComboBox, + QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime) +from calibre.constants import iswindows +from calibre.gui2 import gprefs, error_dialog, UNDEFINED_QDATETIME +from calibre.gui2.actions import menu_action_unique_name +from calibre.gui2.keyboard import ShortcutConfig +from calibre.utils.config import config_dir +from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE + +# Global definition of our plugin name. Used for common functions that require this. +plugin_name = None +# Global definition of our plugin resources. Used to share between the xxxAction and xxxBase +# classes if you need any zip images to be displayed on the configuration dialog. +plugin_icon_resources = {} + + +def set_plugin_icon_resources(name, resources): + ''' + Set our global store of plugin name and icon resources for sharing between + the InterfaceAction class which reads them and the ConfigWidget + if needed for use on the customization dialog for this plugin. + ''' + global plugin_icon_resources, plugin_name + plugin_name = name + plugin_icon_resources = resources + + +def get_icon(icon_name): + ''' + Retrieve a QIcon for the named image from the zip file if it exists, + or if not then from Calibre's image cache. + ''' + if icon_name: + pixmap = get_pixmap(icon_name) + if pixmap is None: + # Look in Calibre's cache for the icon + return QIcon(I(icon_name)) + else: + return QIcon(pixmap) + return QIcon() + + +def get_pixmap(icon_name): + ''' + Retrieve a QPixmap for the named image + Any icons belonging to the plugin must be prefixed with 'images/' + ''' + global plugin_icon_resources, plugin_name + + if not icon_name.startswith('images/'): + # We know this is definitely not an icon belonging to this plugin + pixmap = QPixmap() + pixmap.load(I(icon_name)) + return pixmap + + # Check to see whether the icon exists as a Calibre resource + # This will enable skinning if the user stores icons within a folder like: + # ...\AppData\Roaming\calibre\resources\images\Plugin Name\ + if plugin_name: + local_images_dir = get_local_images_dir(plugin_name) + local_image_path = os.path.join(local_images_dir, icon_name.replace('images/', '')) + if os.path.exists(local_image_path): + pixmap = QPixmap() + pixmap.load(local_image_path) + return pixmap + + # As we did not find an icon elsewhere, look within our zip resources + if icon_name in plugin_icon_resources: + pixmap = QPixmap() + pixmap.loadFromData(plugin_icon_resources[icon_name]) + return pixmap + return None + + +def get_local_images_dir(subfolder=None): + ''' + Returns a path to the user's local resources/images folder + If a subfolder name parameter is specified, appends this to the path + ''' + images_dir = os.path.join(config_dir, 'resources/images') + if subfolder: + images_dir = os.path.join(images_dir, subfolder) + if iswindows: + images_dir = os.path.normpath(images_dir) + return images_dir + + +def create_menu_item(ia, parent_menu, menu_text, image=None, tooltip=None, + shortcut=(), triggered=None, is_checked=None): + ''' + Create a menu action with the specified criteria and action + Note that if no shortcut is specified, will not appear in Preferences->Keyboard + This method should only be used for actions which either have no shortcuts, + or register their menus only once. Use create_menu_action_unique for all else. + ''' + if shortcut is not None: + if len(shortcut) == 0: + shortcut = () + else: + shortcut = _(shortcut) + ac = ia.create_action(spec=(menu_text, None, tooltip, shortcut), + attr=menu_text) + if image: + ac.setIcon(get_icon(image)) + if triggered is not None: + ac.triggered.connect(triggered) + if is_checked is not None: + ac.setCheckable(True) + if is_checked: + ac.setChecked(True) + + parent_menu.addAction(ac) + return ac + + +def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=None, + shortcut=None, triggered=None, is_checked=None, shortcut_name=None, + unique_name=None): + ''' + Create a menu action with the specified criteria and action, using the new + InterfaceAction.create_menu_action() function which ensures that regardless of + whether a shortcut is specified it will appear in Preferences->Keyboard + ''' + orig_shortcut = shortcut + kb = ia.gui.keyboard + if unique_name is None: + unique_name = menu_text + if not shortcut == False: + full_unique_name = menu_action_unique_name(ia, unique_name) + if full_unique_name in kb.shortcuts: + shortcut = False + else: + if shortcut is not None and not shortcut == False: + if len(shortcut) == 0: + shortcut = None + else: + shortcut = _(shortcut) + + if shortcut_name is None: + shortcut_name = menu_text.replace('&','') + + ac = ia.create_menu_action(parent_menu, unique_name, menu_text, icon=None, shortcut=shortcut, + description=tooltip, triggered=triggered, shortcut_name=shortcut_name) + if shortcut == False and not orig_shortcut == False: + if ac.calibre_shortcut_unique_name in ia.gui.keyboard.shortcuts: + kb.replace_action(ac.calibre_shortcut_unique_name, ac) + if image: + ac.setIcon(get_icon(image)) + if is_checked is not None: + ac.setCheckable(True) + if is_checked: + ac.setChecked(True) + return ac + + +def swap_author_names(author): + if author.find(',') == -1: + return author + name_parts = author.strip().partition(',') + return name_parts[2].strip() + ' ' + name_parts[0] + + +def get_library_uuid(db): + try: + library_uuid = db.library_id + except: + library_uuid = '' + return library_uuid + + +class ImageLabel(QLabel): + + def __init__(self, parent, icon_name, size=16): + QLabel.__init__(self, parent) + pixmap = get_pixmap(icon_name) + self.setPixmap(pixmap) + self.setMaximumSize(size, size) + self.setScaledContents(True) + + +class ImageTitleLayout(QHBoxLayout): + ''' + A reusable layout widget displaying an image followed by a title + ''' + def __init__(self, parent, icon_name, title): + QHBoxLayout.__init__(self) + title_image_label = QLabel(parent) + pixmap = get_pixmap(icon_name) + if pixmap is None: + pixmap = get_pixmap('library.png') + # error_dialog(parent, _('Restart required'), + # _('You must restart Calibre before using this plugin!'), show=True) + else: + title_image_label.setPixmap(pixmap) + title_image_label.setMaximumSize(32, 32) + title_image_label.setScaledContents(True) + self.addWidget(title_image_label) + + title_font = QFont() + title_font.setPointSize(16) + shelf_label = QLabel(title, parent) + shelf_label.setFont(title_font) + self.addWidget(shelf_label) + self.insertStretch(-1) + + +class SizePersistedDialog(QDialog): + ''' + This dialog is a base class for any dialogs that want their size/position + restored when they are next opened. + ''' + def __init__(self, parent, unique_pref_name): + QDialog.__init__(self, parent) + self.unique_pref_name = unique_pref_name + self.geom = gprefs.get(unique_pref_name, None) + self.finished.connect(self.dialog_closing) + + def resize_dialog(self): + if self.geom is None: + self.resize(self.sizeHint()) + else: + self.restoreGeometry(self.geom) + + def dialog_closing(self, result): + geom = bytearray(self.saveGeometry()) + gprefs[self.unique_pref_name] = geom + + +class ReadOnlyTableWidgetItem(QTableWidgetItem): + + def __init__(self, text): + if text is None: + text = '' + QTableWidgetItem.__init__(self, text, QtGui.QTableWidgetItem.UserType) + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + + +class RatingTableWidgetItem(QTableWidgetItem): + + def __init__(self, rating, is_read_only=False): + QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType) + self.setData(Qt.DisplayRole, rating) + if is_read_only: + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + + +class DateTableWidgetItem(QTableWidgetItem): + + def __init__(self, date_read, is_read_only=False, default_to_today=False): + if date_read == UNDEFINED_DATE and default_to_today: + date_read = now() + if is_read_only: + QTableWidgetItem.__init__(self, format_date(date_read, None), QtGui.QTableWidgetItem.UserType) + self.setFlags(Qt.ItemIsSelectable|Qt.ItemIsEnabled) + else: + QTableWidgetItem.__init__(self, '', QtGui.QTableWidgetItem.UserType) + self.setData(Qt.DisplayRole, QDateTime(date_read)) + + +class NoWheelComboBox(QComboBox): + + def wheelEvent (self, event): + # Disable the mouse wheel on top of the combo box changing selection as plays havoc in a grid + event.ignore() + + +class CheckableTableWidgetItem(QTableWidgetItem): + + def __init__(self, checked=False, is_tristate=False): + QTableWidgetItem.__init__(self, '') + self.setFlags(Qt.ItemFlags(Qt.ItemIsSelectable | Qt.ItemIsUserCheckable | Qt.ItemIsEnabled )) + if is_tristate: + self.setFlags(self.flags() | Qt.ItemIsTristate) + if checked: + self.setCheckState(Qt.Checked) + else: + if is_tristate and checked is None: + self.setCheckState(Qt.PartiallyChecked) + else: + self.setCheckState(Qt.Unchecked) + + def get_boolean_value(self): + ''' + Return a boolean value indicating whether checkbox is checked + If this is a tristate checkbox, a partially checked value is returned as None + ''' + if self.checkState() == Qt.PartiallyChecked: + return None + else: + return self.checkState() == Qt.Checked + + +class TextIconWidgetItem(QTableWidgetItem): + + def __init__(self, text, icon): + QTableWidgetItem.__init__(self, text) + if icon: + self.setIcon(icon) + + +class ReadOnlyTextIconWidgetItem(ReadOnlyTableWidgetItem): + + def __init__(self, text, icon): + ReadOnlyTableWidgetItem.__init__(self, text) + if icon: + self.setIcon(icon) + + +class ReadOnlyLineEdit(QLineEdit): + + def __init__(self, text, parent): + if text is None: + text = '' + QLineEdit.__init__(self, text, parent) + self.setEnabled(False) + + +class KeyValueComboBox(QComboBox): + + def __init__(self, parent, values, selected_key): + QComboBox.__init__(self, parent) + self.values = values + self.populate_combo(selected_key) + + def populate_combo(self, selected_key): + self.clear() + selected_idx = idx = -1 + for key, value in self.values.iteritems(): + idx = idx + 1 + self.addItem(value) + if key == selected_key: + selected_idx = idx + self.setCurrentIndex(selected_idx) + + def selected_key(self): + for key, value in self.values.iteritems(): + if value == unicode(self.currentText()).strip(): + return key + + +class CustomColumnComboBox(QComboBox): + + def __init__(self, parent, custom_columns, selected_column, initial_items=['']): + QComboBox.__init__(self, parent) + self.populate_combo(custom_columns, selected_column, initial_items) + + def populate_combo(self, custom_columns, selected_column, initial_items=['']): + self.clear() + self.column_names = initial_items + if len(initial_items) > 0: + self.addItems(initial_items) + selected_idx = 0 + for idx, value in enumerate(initial_items): + if value == selected_column: + selected_idx = idx + for key in sorted(custom_columns.keys()): + self.column_names.append(key) + self.addItem('%s (%s)'%(key, custom_columns[key]['name'])) + if key == selected_column: + selected_idx = len(self.column_names) - 1 + self.setCurrentIndex(selected_idx) + + def get_selected_column(self): + return self.column_names[self.currentIndex()] + + +class KeyboardConfigDialog(SizePersistedDialog): + ''' + This dialog is used to allow editing of keyboard shortcuts. + ''' + def __init__(self, gui, group_name): + SizePersistedDialog.__init__(self, gui, 'Keyboard shortcut dialog') + self.gui = gui + self.setWindowTitle('Keyboard shortcuts') + layout = QVBoxLayout(self) + self.setLayout(layout) + + self.keyboard_widget = ShortcutConfig(self) + layout.addWidget(self.keyboard_widget) + self.group_name = group_name + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.commit) + button_box.rejected.connect(self.reject) + layout.addWidget(button_box) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.initialize() + + def initialize(self): + self.keyboard_widget.initialize(self.gui.keyboard) + self.keyboard_widget.highlight_group(self.group_name) + + def commit(self): + self.keyboard_widget.commit() + self.accept() + + +class DateDelegate(QStyledItemDelegate): + ''' + Delegate for dates. Because this delegate stores the + format as an instance variable, a new instance must be created for each + column. This differs from all the other delegates. + ''' + def __init__(self, parent): + QStyledItemDelegate.__init__(self, parent) + self.format = 'dd MMM yyyy' + + def displayText(self, val, locale): + d = val.toDateTime() + if d <= UNDEFINED_QDATETIME: + return '' + return format_date(qt_to_dt(d, as_utc=False), self.format) + + def createEditor(self, parent, option, index): + qde = QStyledItemDelegate.createEditor(self, parent, option, index) + qde.setDisplayFormat(self.format) + qde.setMinimumDateTime(UNDEFINED_QDATETIME) + qde.setSpecialValueText(_('Undefined')) + qde.setCalendarPopup(True) + return qde + + def setEditorData(self, editor, index): + val = index.model().data(index, Qt.DisplayRole).toDateTime() + if val is None or val == UNDEFINED_QDATETIME: + val = now() + editor.setDateTime(val) + + def setModelData(self, editor, model, index): + val = editor.dateTime() + if val <= UNDEFINED_QDATETIME: + model.setData(index, UNDEFINED_QDATETIME, Qt.EditRole) + else: + model.setData(index, QDateTime(val), Qt.EditRole) diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py new file mode 100644 index 00000000..336eb937 --- /dev/null +++ b/calibre-plugin/config.py @@ -0,0 +1,675 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Jim Miller' +__docformat__ = 'restructuredtext en' + +import traceback, copy + +from PyQt4.Qt import (QDialog, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QFont, QWidget, + QTextEdit, QComboBox, QCheckBox, QPushButton, QTabWidget, QVariant, QScrollArea) + +from calibre.gui2 import dynamic, info_dialog +from calibre.utils.config import JSONConfig +from calibre.gui2.ui import get_gui + +from calibre_plugins.fanfictiondownloader_plugin.dialogs \ + import (UPDATE, UPDATEALWAYS, OVERWRITE, collision_order) + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.adapters import getConfigSections + +from calibre_plugins.fanfictiondownloader_plugin.common_utils \ + import ( get_library_uuid, KeyboardConfigDialog ) + +from calibre.gui2.complete import MultiCompleteLineEdit + +# This is where all preferences for this plugin will be stored +# Remember that this name (i.e. plugins/fanfictiondownloader_plugin) is also +# in a global namespace, so make it as unique as possible. +# You should always prefix your config file name with plugins/, +# so as to ensure you dont accidentally clobber a calibre config file +all_prefs = JSONConfig('plugins/fanfictiondownloader_plugin') + +# Set defaults used by all. Library specific settings continue to +# take from here. +all_prefs.defaults['personal.ini'] = get_resources('plugin-example.ini') + +all_prefs.defaults['updatemeta'] = True +all_prefs.defaults['updatecover'] = False +all_prefs.defaults['keeptags'] = False +all_prefs.defaults['urlsfromclip'] = True +all_prefs.defaults['updatedefault'] = True +all_prefs.defaults['fileform'] = 'epub' +all_prefs.defaults['collision'] = OVERWRITE +all_prefs.defaults['deleteotherforms'] = False +all_prefs.defaults['adddialogstaysontop'] = False +all_prefs.defaults['includeimages'] = False + +all_prefs.defaults['send_lists'] = '' +all_prefs.defaults['read_lists'] = '' +all_prefs.defaults['addtolists'] = False +all_prefs.defaults['addtoreadlists'] = False +all_prefs.defaults['addtolistsonread'] = False + +all_prefs.defaults['gc_site_settings'] = {} +all_prefs.defaults['allow_gc_from_ini'] = True + +all_prefs.defaults['custom_cols'] = {} + +# The list of settings to copy from all_prefs or the previous library +# when config is called for the first time on a library. +copylist = ['personal.ini', + 'updatemeta', + 'updatecover', + 'keeptags', + 'urlsfromclip', + 'updatedefault', + 'fileform', + 'collision', + 'deleteotherforms', + 'adddialogstaysontop', + 'includeimages', + 'gc_site_settings', + 'allow_gc_from_ini'] + +# fake out so I don't have to change the prefs calls anywhere. The +# Java programmer in me is offended by op-overloading, but it's very +# tidy. +class PrefsFacade(): + def __init__(self,all_prefs): + self.all_prefs = all_prefs + self.lastlibid = None + + def _get_copylist_prefs(self,frompref): + return filter( lambda x : x[0] in copylist, frompref.items() ) + + def _get_prefs(self): + libraryid = get_library_uuid(get_gui().current_db) + if libraryid not in self.all_prefs: + if self.lastlibid == None: + self.all_prefs[libraryid] = dict(self._get_copylist_prefs(self.all_prefs)) + else: + self.all_prefs[libraryid] = dict(self._get_copylist_prefs(self.all_prefs[self.lastlibid])) + + self.lastlibid = libraryid + + return self.all_prefs[libraryid] + + def _save_prefs(self,prefs): + libraryid = get_library_uuid(get_gui().current_db) + self.all_prefs[libraryid] = prefs + + def __getitem__(self,k): + prefs = self._get_prefs() + if k not in prefs: + # pulls from all_prefs.defaults automatically if not set + # in all_prefs + return self.all_prefs[k] + return prefs[k] + + def __setitem__(self,k,v): + prefs = self._get_prefs() + prefs[k]=v + self._save_prefs(prefs) + + # to be avoided--can cause unexpected results as possibly ancient + # all_pref settings may be pulled. + def __delitem__(self,k): + prefs = self._get_prefs() + del prefs[k] + self._save_prefs(prefs) + +prefs = PrefsFacade(all_prefs) + +class ConfigWidget(QWidget): + + def __init__(self, plugin_action): + QWidget.__init__(self) + self.plugin_action = plugin_action + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel('List of Supported Sites -- FAQs') + label.setOpenExternalLinks(True) + self.l.addWidget(label) + + tab_widget = QTabWidget(self) + self.l.addWidget(tab_widget) + + self.basic_tab = BasicTab(self, plugin_action) + tab_widget.addTab(self.basic_tab, 'Basic') + + self.personalini_tab = PersonalIniTab(self, plugin_action) + tab_widget.addTab(self.personalini_tab, 'personal.ini') + + self.readinglist_tab = ReadingListTab(self, plugin_action) + tab_widget.addTab(self.readinglist_tab, 'Reading Lists') + if 'Reading List' not in plugin_action.gui.iactions: + self.readinglist_tab.setEnabled(False) + + self.generatecover_tab = GenerateCoverTab(self, plugin_action) + tab_widget.addTab(self.generatecover_tab, 'Generate Cover') + if 'Generate Cover' not in plugin_action.gui.iactions: + self.generatecover_tab.setEnabled(False) + + self.columns_tab = ColumnsTab(self, plugin_action) + tab_widget.addTab(self.columns_tab, 'Custom Columns') + + self.other_tab = OtherTab(self, plugin_action) + tab_widget.addTab(self.other_tab, 'Other') + + + def save_settings(self): + + # basic + prefs['fileform'] = unicode(self.basic_tab.fileform.currentText()) + prefs['collision'] = unicode(self.basic_tab.collision.currentText()) + prefs['updatemeta'] = self.basic_tab.updatemeta.isChecked() + prefs['updatecover'] = self.basic_tab.updatecover.isChecked() + prefs['keeptags'] = self.basic_tab.keeptags.isChecked() + prefs['urlsfromclip'] = self.basic_tab.urlsfromclip.isChecked() + prefs['updatedefault'] = self.basic_tab.updatedefault.isChecked() + prefs['deleteotherforms'] = self.basic_tab.deleteotherforms.isChecked() + prefs['adddialogstaysontop'] = self.basic_tab.adddialogstaysontop.isChecked() + prefs['includeimages'] = self.basic_tab.includeimages.isChecked() + + if self.readinglist_tab: + # lists + prefs['send_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.readinglist_tab.send_lists_box.text()).split(',')))) + prefs['read_lists'] = ', '.join(map( lambda x : x.strip(), filter( lambda x : x.strip() != '', unicode(self.readinglist_tab.read_lists_box.text()).split(',')))) + # print("send_lists: %s"%prefs['send_lists']) + # print("read_lists: %s"%prefs['read_lists']) + prefs['addtolists'] = self.readinglist_tab.addtolists.isChecked() + prefs['addtoreadlists'] = self.readinglist_tab.addtoreadlists.isChecked() + prefs['addtolistsonread'] = self.readinglist_tab.addtolistsonread.isChecked() + + # personal.ini + ini = unicode(self.personalini_tab.ini.toPlainText()) + if ini: + prefs['personal.ini'] = ini + else: + # if they've removed everything, reset to default. + prefs['personal.ini'] = get_resources('plugin-example.ini') + + # Generate Covers tab + gc_site_settings = {} + for (site,combo) in self.generatecover_tab.gc_dropdowns.iteritems(): + val = unicode(combo.itemData(combo.currentIndex()).toString()) + if val != 'none': + gc_site_settings[site] = val + #print("gc_site_settings[%s]:%s"%(site,gc_site_settings[site])) + prefs['gc_site_settings'] = gc_site_settings + prefs['allow_gc_from_ini'] = self.generatecover_tab.allow_gc_from_ini.isChecked() + + # Custom Columns tab + colsmap = {} + for (col,combo) in self.columns_tab.custcol_dropdowns.iteritems(): + val = unicode(combo.itemData(combo.currentIndex()).toString()) + if val != 'none': + colsmap[col] = val + #print("colsmap[%s]:%s"%(col,colsmap[col])) + prefs['custom_cols'] = colsmap + + def edit_shortcuts(self): + self.save_settings() + # Force the menus to be rebuilt immediately, so we have all our actions registered + self.plugin_action.rebuild_menus() + d = KeyboardConfigDialog(self.plugin_action.gui, self.plugin_action.action_spec[0]) + if d.exec_() == d.Accepted: + self.plugin_action.gui.keyboard.finalize() + +class BasicTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel('These settings control the basic features of the plugin--downloading FanFiction.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + horz = QHBoxLayout() + label = QLabel('Default Output &Format:') + horz.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip('Choose output format to create. May set default from plugin configuration.') + self.fileform.activated.connect(self.set_collisions) + label.setBuddy(self.fileform) + horz.addWidget(self.fileform) + self.l.addLayout(horz) + + horz = QHBoxLayout() + label = QLabel('Default If Story Already Exists?') + label.setToolTip("What to do if there's already an existing story with the same title and author.") + horz.addWidget(label) + self.collision = QComboBox(self) + # add collision options + self.set_collisions() + i = self.collision.findText(prefs['collision']) + if i > -1: + self.collision.setCurrentIndex(i) + # self.collision.setToolTip('Overwrite will replace the existing story. Add New will create a new story with the same title and author.') + label.setBuddy(self.collision) + horz.addWidget(self.collision) + self.l.addLayout(horz) + + self.updatemeta = QCheckBox('Default Update Calibre &Metadata?',self) + self.updatemeta.setToolTip('Update title, author, URL, tags, custom columns, etc for story in Calibre from web site.') + self.updatemeta.setChecked(prefs['updatemeta']) + self.l.addWidget(self.updatemeta) + + self.updatecover = QCheckBox('Update Cover when Updating Metadata?',self) + self.updatecover.setToolTip("Update cover image from EPUB when metadata is updated. (EPUB only.)\nDoesn't go looking for new images on 'Update Calibre Metadata Only'.") + self.updatecover.setChecked(prefs['updatecover']) + self.l.addWidget(self.updatecover) + + self.keeptags = QCheckBox('Keep Existing Tags when Updating Metadata?',self) + self.keeptags.setToolTip('Existing tags will be kept and any new tags added.\nCompleted and In-Progress tags will be still be updated, if known.\nLast Updated tags will be updated if lastupdate in include_subject_tags.') + self.keeptags.setChecked(prefs['keeptags']) + self.l.addWidget(self.keeptags) + + self.urlsfromclip = QCheckBox('Take URLs from Clipboard?',self) + self.urlsfromclip.setToolTip('Prefill URLs from valid URLs in Clipboard when Adding New.') + self.urlsfromclip.setChecked(prefs['urlsfromclip']) + self.l.addWidget(self.urlsfromclip) + + self.updatedefault = QCheckBox('Default to Update when books selected?',self) + self.updatedefault.setToolTip('The top FanFictionDownLoader plugin button will start Update if\n'+ + 'books are selected. If unchecked, it will always bring up \'Add New\'.') + self.updatedefault.setChecked(prefs['updatedefault']) + self.l.addWidget(self.updatedefault) + + self.deleteotherforms = QCheckBox('Delete other existing formats?',self) + self.deleteotherforms.setToolTip('Check this to automatically delete all other ebook formats when updating an existing book.\nHandy if you have both a Nook(epub) and Kindle(mobi), for example.') + self.deleteotherforms.setChecked(prefs['deleteotherforms']) + self.l.addWidget(self.deleteotherforms) + + self.adddialogstaysontop = QCheckBox("Keep 'Add New from URL(s)' dialog on top?",self) + self.adddialogstaysontop.setToolTip("Instructs the OS and Window Manager to keep the 'Add New from URL(s)'\ndialog on top of all other windows. Useful for dragging URLs onto it.") + self.adddialogstaysontop.setChecked(prefs['adddialogstaysontop']) + self.l.addWidget(self.adddialogstaysontop) + + # this is a cheat to make it easier for users to realize there's a new include_images features. + self.includeimages = QCheckBox("Include images in EPUBs?",self) + self.includeimages.setToolTip("Download and include images in EPUB stories. This is equivalent to adding:\n\n[epub]\ninclude_images:true\nkeep_summary_html:true\nmake_firstimage_cover:true\n\n ...to the top of personal.ini. Your settings in personal.ini will override this.") + self.includeimages.setChecked(prefs['includeimages']) + self.l.addWidget(self.includeimages) + + self.l.insertStretch(-1) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]: + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def show_defaults(self): + text = get_resources('plugin-defaults.ini') + ShowDefaultsIniDialog(self.windowIcon(),text,self).exec_() + +class PersonalIniTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel('These settings provide more detailed control over what metadata will be displayed inside the ebook as well as let you set is_adult and user/password for different sites.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.label = QLabel('personal.ini:') + self.l.addWidget(self.label) + + self.ini = QTextEdit(self) + try: + self.ini.setFont(QFont("Courier", + self.plugin_action.gui.font().pointSize()+1)); + except Exception as e: + print("Couldn't get font: %s"%e) + self.ini.setLineWrapMode(QTextEdit.NoWrap) + self.ini.setText(prefs['personal.ini']) + self.l.addWidget(self.ini) + + self.defaults = QPushButton('View Defaults', self) + self.defaults.setToolTip("View all of the plugin's configurable settings\nand their default settings.") + self.defaults.clicked.connect(self.show_defaults) + self.l.addWidget(self.defaults) + + # self.l.insertStretch(-1) + # let edit box fill the space. + + def show_defaults(self): + text = get_resources('plugin-defaults.ini') + ShowDefaultsIniDialog(self.windowIcon(),text,self).exec_() + +class ShowDefaultsIniDialog(QDialog): + + def __init__(self, icon, text, parent=None): + QDialog.__init__(self, parent) + self.resize(600, 500) + self.l = QVBoxLayout() + self.setLayout(self.l) + self.label = QLabel("Plugin Defaults (Read-Only)") + self.label.setToolTip("These are all of the plugin's configurable options\nand their default settings.") + self.setWindowTitle(_('Plugin Defaults')) + self.setWindowIcon(icon) + self.l.addWidget(self.label) + + self.ini = QTextEdit(self) + self.ini.setToolTip("These are all of the plugin's configurable options\nand their default settings.") + try: + self.ini.setFont(QFont("Courier", + get_gui().font().pointSize()+1)); + except Exception as e: + print("Couldn't get font: %s"%e) + self.ini.setLineWrapMode(QTextEdit.NoWrap) + self.ini.setText(text) + self.ini.setReadOnly(True) + self.l.addWidget(self.ini) + + self.ok_button = QPushButton('OK', self) + self.ok_button.clicked.connect(self.hide) + self.l.addWidget(self.ok_button) + +class ReadingListTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + try: + rl_plugin = plugin_action.gui.iactions['Reading List'] + reading_lists = rl_plugin.get_list_names() + except KeyError: + reading_lists= [] + + label = QLabel('These settings provide integration with the Reading List Plugin. Reading List can automatically send to devices and change custom columns. You have to create and configure the lists in Reading List to be useful.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.addtolists = QCheckBox('Add new/updated stories to "Send to Device" Reading List(s).',self) + self.addtolists.setToolTip('Automatically add new/updated stories to these lists in the Reading List plugin.') + self.addtolists.setChecked(prefs['addtolists']) + self.l.addWidget(self.addtolists) + + horz = QHBoxLayout() + label = QLabel('"Send to Device" Reading Lists') + label.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + horz.addWidget(label) + self.send_lists_box = MultiCompleteLineEdit(self) + self.send_lists_box.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + self.send_lists_box.update_items_cache(reading_lists) + self.send_lists_box.setText(prefs['send_lists']) + horz.addWidget(self.send_lists_box) + self.l.addLayout(horz) + + self.addtoreadlists = QCheckBox('Add new/updated stories to "To Read" Reading List(s).',self) + self.addtoreadlists.setToolTip('Automatically add new/updated stories to these lists in the Reading List plugin.\nAlso offers menu option to remove stories from the "To Read" lists.') + self.addtoreadlists.setChecked(prefs['addtoreadlists']) + self.l.addWidget(self.addtoreadlists) + + horz = QHBoxLayout() + label = QLabel('"To Read" Reading Lists') + label.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + horz.addWidget(label) + self.read_lists_box = MultiCompleteLineEdit(self) + self.read_lists_box.setToolTip("When enabled, new/updated stories will be automatically added to these lists.") + self.read_lists_box.update_items_cache(reading_lists) + self.read_lists_box.setText(prefs['read_lists']) + horz.addWidget(self.read_lists_box) + self.l.addLayout(horz) + + self.addtolistsonread = QCheckBox('Add stories back to "Send to Device" Reading List(s) when marked "Read".',self) + self.addtolistsonread.setToolTip('Menu option to remove from "To Read" lists will also add stories back to "Send to Device" Reading List(s)') + self.addtolistsonread.setChecked(prefs['addtolistsonread']) + self.l.addWidget(self.addtolistsonread) + + self.l.insertStretch(-1) + +class GenerateCoverTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + try: + gc_plugin = plugin_action.gui.iactions['Generate Cover'] + gc_settings = gc_plugin.get_saved_setting_names() + except KeyError: + gc_settings= [] + + label = QLabel('The Generate Cover plugin can create cover images for books using various metadata and configurations. If you have GC installed, FFDL can run GC on new downloads and metadata updates. Pick a GC setting by site or Default.') + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + scrollable = QScrollArea() + scrollcontent = QWidget() + scrollable.setWidget(scrollcontent) + scrollable.setWidgetResizable(True) + self.l.addWidget(scrollable) + + self.sl = QVBoxLayout() + scrollcontent.setLayout(self.sl) + + self.gc_dropdowns = {} + + sitelist = getConfigSections() + sitelist.sort() + sitelist.insert(0,u"Default") + for site in sitelist: + horz = QHBoxLayout() + label = QLabel(site) + if site == u"Default": + s = "On Metadata update, run Generate Cover with this setting, if not selected for specific site." + else: + s = "On Metadata update, run Generate Cover with this setting for site (%s)."%site + + label.setToolTip(s) + horz.addWidget(label) + dropdown = QComboBox(self) + dropdown.setToolTip(s) + dropdown.addItem('',QVariant('none')) + for setting in gc_settings: + dropdown.addItem(setting,QVariant(setting)) + self.gc_dropdowns[site] = dropdown + if site in prefs['gc_site_settings']: + dropdown.setCurrentIndex(dropdown.findData(QVariant(prefs['gc_site_settings'][site]))) + + horz.addWidget(dropdown) + self.sl.addLayout(horz) + + self.allow_gc_from_ini = QCheckBox('Allow generate_cover_settings from personal.ini to override.',self) + self.allow_gc_from_ini.setToolTip("The INI parameter generate_cover_settings allows you to choose a GC setting based on metadata rather than site,\nbut it's much more complex. generate_cover_settings is ignored when this is off.") + self.allow_gc_from_ini.setChecked(prefs['allow_gc_from_ini']) + self.l.addWidget(self.allow_gc_from_ini) + + self.l.insertStretch(-1) + +class OtherTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel("These controls aren't plugin settings as such, but convenience buttons for setting Keyboard shortcuts and getting all the FanFictionDownLoader confirmation dialogs back again.") + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + keyboard_shortcuts_button = QPushButton('Keyboard shortcuts...', self) + keyboard_shortcuts_button.setToolTip(_( + 'Edit the keyboard shortcuts associated with this plugin')) + keyboard_shortcuts_button.clicked.connect(parent_dialog.edit_shortcuts) + self.l.addWidget(keyboard_shortcuts_button) + + reset_confirmation_button = QPushButton(_('Reset disabled &confirmation dialogs'), self) + reset_confirmation_button.setToolTip(_( + 'Reset all show me again dialogs for the FanFictionDownLoader plugin')) + reset_confirmation_button.clicked.connect(self.reset_dialogs) + self.l.addWidget(reset_confirmation_button) + + self.l.insertStretch(-1) + + def reset_dialogs(self): + for key in dynamic.keys(): + if key.startswith('fanfictiondownloader_') and key.endswith('_again') \ + and dynamic[key] is False: + dynamic[key] = True + info_dialog(self, _('Done'), + _('Confirmation dialogs have all been reset'), + show=True, + show_copy_button=False) + +permitted_values = { + 'int' : ['numWords','numChapters'], + 'float' : ['numWords','numChapters'], + 'bool' : ['status-C','status-I'], + 'datetime' : ['datePublished', 'dateUpdated', 'dateCreated'], + 'series' : ['series'], + 'enumeration' : ['category', + 'genre', + 'language', + 'series', + 'characters', + 'status', + 'datePublished', + 'dateUpdated', + 'dateCreated', + 'rating', + 'warnings', + 'numChapters', + 'numWords', + 'site', + 'storyId', + 'authorId', + 'extratags', + 'title', + 'storyUrl', + 'description', + 'author', + 'authorUrl', + 'formatname' + #,'formatext' # not useful information. + #,'siteabbrev' + #,'version' + ] + } +# no point copying the whole list. +permitted_values['text'] = permitted_values['enumeration'] +permitted_values['comments'] = permitted_values['enumeration'] + +titleLabels = { + 'category':'Category', + 'genre':'Genre', + 'language':'Language', + 'status':'Status', + 'status-C':'Status:Completed', + 'status-I':'Status:In-Progress', + 'series':'Series', + 'characters':'Characters', + 'datePublished':'Published', + 'dateUpdated':'Updated', + 'dateCreated':'Packaged', + 'rating':'Rating', + 'warnings':'Warnings', + 'numChapters':'Chapters', + 'numWords':'Words', + 'site':'Site', + 'storyId':'Story ID', + 'authorId':'Author ID', + 'extratags':'Extra Tags', + 'title':'Title', + 'storyUrl':'Story URL', + 'description':'Summary', + 'author':'Author', + 'authorUrl':'Author URL', + 'formatname':'File Format', + 'formatext':'File Extension', + 'siteabbrev':'Site Abbrev', + 'version':'FFDL Version' + } + +class ColumnsTab(QWidget): + + def __init__(self, parent_dialog, plugin_action): + self.parent_dialog = parent_dialog + self.plugin_action = plugin_action + QWidget.__init__(self) + + self.l = QVBoxLayout() + self.setLayout(self.l) + + label = QLabel("If you have custom columns defined, they will be listed below. Choose a metadata value type to fill your columns automatically.") + label.setWordWrap(True) + self.l.addWidget(label) + self.l.addSpacing(5) + + self.custcol_dropdowns = {} + + custom_columns = self.plugin_action.gui.library_view.model().custom_columns + + for key, column in custom_columns.iteritems(): + + if column['datatype'] in permitted_values: + # print("\n============== %s ===========\n"%key) + # for (k,v) in column.iteritems(): + # print("column['%s'] => %s"%(k,v)) + horz = QHBoxLayout() + label = QLabel('%s(%s)'%(column['name'],key)) + label.setToolTip("Update this %s column with..."%column['datatype']) + horz.addWidget(label) + dropdown = QComboBox(self) + dropdown.addItem('',QVariant('none')) + for md in permitted_values[column['datatype']]: + dropdown.addItem(titleLabels[md],QVariant(md)) + self.custcol_dropdowns[key] = dropdown + if key in prefs['custom_cols']: + dropdown.setCurrentIndex(dropdown.findData(QVariant(prefs['custom_cols'][key]))) + if column['datatype'] == 'enumeration': + dropdown.setToolTip("Metadata values valid for this type of column.\nValues that aren't valid for this enumeration column will be ignored.") + else: + dropdown.setToolTip("Metadata values valid for this type of column.") + + horz.addWidget(dropdown) + self.l.addLayout(horz) + + self.l.insertStretch(-1) + + #print("prefs['custom_cols'] %s"%prefs['custom_cols']) diff --git a/calibre-plugin/dialogs.py b/calibre-plugin/dialogs.py new file mode 100644 index 00000000..87c8e415 --- /dev/null +++ b/calibre-plugin/dialogs.py @@ -0,0 +1,663 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Jim Miller' +__docformat__ = 'restructuredtext en' + +import traceback + +from PyQt4 import QtGui +from PyQt4.Qt import (QDialog, QTableWidget, QMessageBox, QVBoxLayout, QHBoxLayout, QGridLayout, + QPushButton, QProgressDialog, QString, QLabel, QCheckBox, QIcon, QTextCursor, + QTextEdit, QLineEdit, QInputDialog, QComboBox, QClipboard, QVariant, + QProgressDialog, QTimer, QDialogButtonBox, QPixmap, Qt, QAbstractItemView ) + +from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dialog +from calibre.gui2.dialogs.confirm_delete import confirm + +from calibre import confirm_config_name +from calibre.gui2 import dynamic + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters,writers,exceptions +from calibre_plugins.fanfictiondownloader_plugin.common_utils \ + import (ReadOnlyTableWidgetItem, ReadOnlyTextIconWidgetItem, SizePersistedDialog, + ImageTitleLayout, get_icon) + +SKIP='Skip' +ADDNEW='Add New Book' +UPDATE='Update EPUB if New Chapters' +UPDATEALWAYS='Update EPUB Always' +OVERWRITE='Overwrite if Newer' +OVERWRITEALWAYS='Overwrite Always' +CALIBREONLY='Update Calibre Metadata Only' +collision_order=[SKIP, + ADDNEW, + UPDATE, + UPDATEALWAYS, + OVERWRITE, + OVERWRITEALWAYS, + CALIBREONLY,] + +class NotGoingToDownload(Exception): + def __init__(self,error,icon='dialog_error.png'): + self.error=error + self.icon=icon + + def __str__(self): + return self.error + +class DroppableQTextEdit(QTextEdit): + def __init__(self,parent): + QTextEdit.__init__(self,parent) + + def canInsertFromMimeData(self, source): + if source.hasUrls(): + return True; + else: + return QTextEdit.canInsertFromMimeData(self,source) + + def insertFromMimeData(self, source): + if source.hasText(): + self.append(source.text()) + else: + return QTextEdit.insertFromMimeData(self, source) + +class AddNewDialog(SizePersistedDialog): + + def __init__(self, gui, prefs, icon, url_list_text): + SizePersistedDialog.__init__(self, gui, 'FanFictionDownLoader plugin:add new dialog') + self.gui = gui + + if prefs['adddialogstaysontop']: + QDialog.setWindowFlags ( self, Qt.Dialog|Qt.WindowStaysOnTopHint ) + + self.setMinimumWidth(300) + self.l = QVBoxLayout() + self.setLayout(self.l) + + self.setWindowTitle('FanFictionDownLoader') + self.setWindowIcon(icon) + + self.l.addWidget(QLabel('Story URL(s), one per line:')) + self.url = DroppableQTextEdit(self) + self.url.setToolTip('URLs for stories, one per line.\nWill take URLs from clipboard, but only valid URLs.') + self.url.setLineWrapMode(QTextEdit.NoWrap) + self.url.setText(url_list_text) + self.l.addWidget(self.url) + + horz = QHBoxLayout() + label = QLabel('Output &Format:') + horz.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip('Choose output format to create. May set default from plugin configuration.') + self.fileform.activated.connect(self.set_collisions) + + label.setBuddy(self.fileform) + horz.addWidget(self.fileform) + self.l.addLayout(horz) + + horz = QHBoxLayout() + label = QLabel('If Story Already Exists?') + label.setToolTip("What to do if there's already an existing story with the same title and author.") + horz.addWidget(label) + self.collision = QComboBox(self) + # add collision options + self.set_collisions() + i = self.collision.findText(prefs['collision']) + if i > -1: + self.collision.setCurrentIndex(i) + # self.collision.setToolTip(OVERWRITE+' will replace the existing story.\n'+ + # UPDATE+' will download new chapters only and add to existing EPUB.\n'+ + # ADDNEW+' will create a new story with the same title and author.\n'+ + # SKIP+' will not download existing stories.\n'+ + # CALIBREONLY+' will not download stories, but will update Calibre metadata.') + label.setBuddy(self.collision) + horz.addWidget(self.collision) + self.l.addLayout(horz) + + self.updatemeta = QCheckBox('Update Calibre &Metadata?',self) + self.updatemeta.setToolTip('Update metadata for story in Calibre from web site?') + self.updatemeta.setChecked(prefs['updatemeta']) + self.l.addWidget(self.updatemeta) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + self.l.addWidget(button_box) + + if url_list_text: + button_box.button(QDialogButtonBox.Ok).setFocus() + + # restore saved size. + self.resize_dialog() + #self.resize(self.sizeHint()) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]: + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def get_ffdl_options(self): + return { + 'fileform': unicode(self.fileform.currentText()), + 'collision': unicode(self.collision.currentText()), + 'updatemeta': self.updatemeta.isChecked(), + } + + def get_urlstext(self): + return unicode(self.url.toPlainText()) + + +class FakeLineEdit(): + def __init__(self): + pass + + def text(self): + pass + +class UserPassDialog(QDialog): + ''' + Need to collect User/Pass for some sites. + ''' + def __init__(self, gui, site, exception=None): + QDialog.__init__(self, gui) + self.gui = gui + self.status=False + + self.l = QGridLayout() + self.setLayout(self.l) + + if exception.passwdonly: + self.setWindowTitle('Password') + self.l.addWidget(QLabel("Author requires a password for this story(%s)."%exception.url),0,0,1,2) + # user isn't used, but it's easier to still have it for + # post processing. + self.user = FakeLineEdit() + else: + self.setWindowTitle('User/Password') + self.l.addWidget(QLabel("%s requires you to login to download this story."%site),0,0,1,2) + + self.l.addWidget(QLabel("User:"),1,0) + self.user = QLineEdit(self) + self.l.addWidget(self.user,1,1) + + self.l.addWidget(QLabel("Password:"),2,0) + self.passwd = QLineEdit(self) + self.passwd.setEchoMode(QLineEdit.Password) + self.l.addWidget(self.passwd,2,1) + + self.ok_button = QPushButton('OK', self) + self.ok_button.clicked.connect(self.ok) + self.l.addWidget(self.ok_button,3,0) + + self.cancel_button = QPushButton('Cancel', self) + self.cancel_button.clicked.connect(self.cancel) + self.l.addWidget(self.cancel_button,3,1) + + self.resize(self.sizeHint()) + + def ok(self): + self.status=True + self.hide() + + def cancel(self): + self.status=False + self.hide() + +class LoopProgressDialog(QProgressDialog): + ''' + ProgressDialog displayed while fetching metadata for each story. + ''' + def __init__(self, gui, + book_list, + foreach_function, + finish_function, + init_label="Fetching metadata for stories...", + win_title="Downloading metadata for stories", + status_prefix="Fetched metadata for"): + QProgressDialog.__init__(self, + init_label, + QString(), 0, len(book_list), gui) + self.setWindowTitle(win_title) + self.setMinimumWidth(500) + self.gui = gui + self.book_list = book_list + self.foreach_function = foreach_function + self.finish_function = finish_function + self.status_prefix = status_prefix + self.i = 0 + + ## self.do_loop does QTimer.singleShot on self.do_loop also. + ## A weird way to do a loop, but that was the example I had. + QTimer.singleShot(0, self.do_loop) + self.exec_() + + def updateStatus(self): + self.setLabelText("%s %d of %d"%(self.status_prefix,self.i+1,len(self.book_list))) + self.setValue(self.i+1) + print(self.labelText()) + + def do_loop(self): + + if self.i == 0: + self.setValue(0) + + book = self.book_list[self.i] + try: + ## collision spec passed into getadapter by partial from ffdl_plugin + ## no retval only if it exists, but collision is SKIP + self.foreach_function(book) + + except NotGoingToDownload as d: + book['good']=False + book['comment']=unicode(d) + book['icon'] = d.icon + + except Exception as e: + book['good']=False + book['comment']=unicode(e) + print("Exception: %s:%s"%(book,unicode(e))) + traceback.print_exc() + + self.updateStatus() + self.i += 1 + + if self.i >= len(self.book_list) or self.wasCanceled(): + return self.do_when_finished() + else: + QTimer.singleShot(0, self.do_loop) + + def do_when_finished(self): + self.hide() + self.gui = None + # Queues a job to process these books in the background. + self.finish_function(self.book_list) + +class AboutDialog(QDialog): + + def __init__(self, parent, icon, text): + QDialog.__init__(self, parent) + self.resize(400, 250) + self.l = QGridLayout() + self.setLayout(self.l) + self.logo = QLabel() + self.logo.setMaximumWidth(110) + self.logo.setPixmap(QPixmap(icon.pixmap(100,100))) + self.label = QLabel(text) + self.label.setOpenExternalLinks(True) + self.label.setWordWrap(True) + self.setWindowTitle(_('About FanFictionDownLoader')) + self.setWindowIcon(icon) + self.l.addWidget(self.logo, 0, 0) + self.l.addWidget(self.label, 0, 1) + self.bb = QDialogButtonBox(self) + b = self.bb.addButton(_('OK'), self.bb.AcceptRole) + b.setDefault(True) + self.l.addWidget(self.bb, 2, 0, 1, -1) + self.bb.accepted.connect(self.accept) + +class IconWidgetItem(ReadOnlyTextIconWidgetItem): + def __init__(self, text, icon, sort_key): + ReadOnlyTextIconWidgetItem.__init__(self, text, icon) + self.sort_key = sort_key + + #Qt uses a simple < check for sorting items, override this to use the sortKey + def __lt__(self, other): + return self.sort_key < other.sort_key + +class AuthorTableWidgetItem(ReadOnlyTableWidgetItem): + def __init__(self, text, sort_key): + ReadOnlyTableWidgetItem.__init__(self, text) + self.sort_key = sort_key + + #Qt uses a simple < check for sorting items, override this to use the sortKey + def __lt__(self, other): + return self.sort_key < other.sort_key + +class UpdateExistingDialog(SizePersistedDialog): + def __init__(self, gui, header, prefs, icon, books, + save_size_name='fanfictiondownloader_plugin:update list dialog'): + SizePersistedDialog.__init__(self, gui, save_size_name) + self.gui = gui + + self.setWindowTitle(header) + self.setWindowIcon(icon) + + layout = QVBoxLayout(self) + self.setLayout(layout) + title_layout = ImageTitleLayout(self, 'images/icon.png', + header) + layout.addLayout(title_layout) + books_layout = QHBoxLayout() + layout.addLayout(books_layout) + + self.books_table = StoryListTableWidget(self) + books_layout.addWidget(self.books_table) + + button_layout = QVBoxLayout() + books_layout.addLayout(button_layout) + # self.move_up_button = QtGui.QToolButton(self) + # self.move_up_button.setToolTip('Move selected books up the list') + # self.move_up_button.setIcon(QIcon(I('arrow-up.png'))) + # self.move_up_button.clicked.connect(self.books_table.move_rows_up) + # button_layout.addWidget(self.move_up_button) + spacerItem = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) + button_layout.addItem(spacerItem) + self.remove_button = QtGui.QToolButton(self) + self.remove_button.setToolTip('Remove selected books from the list') + self.remove_button.setIcon(get_icon('list_remove.png')) + self.remove_button.clicked.connect(self.remove_from_list) + button_layout.addWidget(self.remove_button) + spacerItem1 = QtGui.QSpacerItem(20, 40, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) + button_layout.addItem(spacerItem1) + # self.move_down_button = QtGui.QToolButton(self) + # self.move_down_button.setToolTip('Move selected books down the list') + # self.move_down_button.setIcon(QIcon(I('arrow-down.png'))) + # self.move_down_button.clicked.connect(self.books_table.move_rows_down) + # button_layout.addWidget(self.move_down_button) + + options_layout = QHBoxLayout() + + label = QLabel('Output &Format:') + options_layout.addWidget(label) + self.fileform = QComboBox(self) + self.fileform.addItem('epub') + self.fileform.addItem('mobi') + self.fileform.addItem('html') + self.fileform.addItem('txt') + self.fileform.setCurrentIndex(self.fileform.findText(prefs['fileform'])) + self.fileform.setToolTip('Choose output format to create. May set default from plugin configuration.') + self.fileform.activated.connect(self.set_collisions) + label.setBuddy(self.fileform) + options_layout.addWidget(self.fileform) + + label = QLabel('Update Mode:') + label.setToolTip("What sort of update to perform. May set default from plugin configuration.") + options_layout.addWidget(label) + self.collision = QComboBox(self) + # add collision options + self.set_collisions() + i = self.collision.findText(prefs['collision']) + if i > -1: + self.collision.setCurrentIndex(i) + # self.collision.setToolTip('Overwrite will replace the existing story. Add New will create a new story with the same title and author.') + label.setBuddy(self.collision) + options_layout.addWidget(self.collision) + + self.updatemeta = QCheckBox('Update Calibre &Metadata?',self) + self.updatemeta.setToolTip('Update metadata for story in Calibre from web site? May set default from plugin configuration.') + self.updatemeta.setChecked(prefs['updatemeta']) + options_layout.addWidget(self.updatemeta) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + options_layout.addWidget(button_box) + + layout.addLayout(options_layout) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.books_table.populate_table(books) + + def set_collisions(self): + prev=self.collision.currentText() + self.collision.clear() + for o in collision_order: + if o not in [ADDNEW,SKIP] and \ + (self.fileform.currentText() == 'epub' or o not in [UPDATE,UPDATEALWAYS]): + self.collision.addItem(o) + i = self.collision.findText(prev) + if i > -1: + self.collision.setCurrentIndex(i) + + def remove_from_list(self): + self.books_table.remove_selected_rows() + + def get_books(self): + return self.books_table.get_books() + + def get_ffdl_options(self): + return { + 'fileform': unicode(self.fileform.currentText()), + 'collision': unicode(self.collision.currentText()), + 'updatemeta': self.updatemeta.isChecked(), + } + +def display_story_list(gui, header, prefs, icon, books, + label_text='', + save_size_name='fanfictiondownloader_plugin:display list dialog', + offer_skip=False): + all_good = True + for b in books: + if not b['good']: + all_good=False + break + + ## + if all_good and not dynamic.get(confirm_config_name(save_size_name), True): + return True + pass + ## fake accept? + d = DisplayStoryListDialog(gui, header, prefs, icon, books, + label_text, + save_size_name, + offer_skip and all_good) + d.exec_() + return d.result() == d.Accepted + +class DisplayStoryListDialog(SizePersistedDialog): + def __init__(self, gui, header, prefs, icon, books, + label_text='', + save_size_name='fanfictiondownloader_plugin:display list dialog', + offer_skip=False): + SizePersistedDialog.__init__(self, gui, save_size_name) + self.name = save_size_name + self.gui = gui + + self.setWindowTitle(header) + self.setWindowIcon(icon) + + layout = QVBoxLayout(self) + self.setLayout(layout) + title_layout = ImageTitleLayout(self, 'images/icon.png', + header) + layout.addLayout(title_layout) + + self.books_table = StoryListTableWidget(self) + layout.addWidget(self.books_table) + + options_layout = QHBoxLayout() + self.label = QLabel(label_text) + #self.label.setOpenExternalLinks(True) + #self.label.setWordWrap(True) + options_layout.addWidget(self.label) + + if offer_skip: + spacerItem1 = QtGui.QSpacerItem(2, 4, QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Minimum) + options_layout.addItem(spacerItem1) + self.again = QCheckBox('Show this again?',self) + self.again.setChecked(True) + self.again.stateChanged.connect(self.toggle) + self.again.setToolTip('Uncheck to skip review and update stories immediately when no problems.') + options_layout.addWidget(self.again) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + + options_layout.addWidget(button_box) + + layout.addLayout(options_layout) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.books_table.populate_table(books) + + def get_books(self): + return self.books_table.get_books() + + def toggle(self, *args): + dynamic[confirm_config_name(self.name)] = self.again.isChecked() + + + +class StoryListTableWidget(QTableWidget): + + def __init__(self, parent): + QTableWidget.__init__(self, parent) + self.setSelectionBehavior(QAbstractItemView.SelectRows) + + def populate_table(self, books): + self.clear() + self.setAlternatingRowColors(True) + self.setRowCount(len(books)) + header_labels = ['','Title', 'Author', 'URL', 'Comment'] + self.setColumnCount(len(header_labels)) + self.setHorizontalHeaderLabels(header_labels) + self.horizontalHeader().setStretchLastSection(True) + #self.verticalHeader().setDefaultSectionSize(24) + self.verticalHeader().hide() + + self.books={} + for row, book in enumerate(books): + self.populate_table_row(row, book) + self.books[row] = book + + # turning True breaks up/down. Do we need either sorting or up/down? + self.setSortingEnabled(True) + self.resizeColumnsToContents() + self.setMinimumColumnWidth(1, 100) + self.setMinimumColumnWidth(2, 100) + self.setMinimumColumnWidth(3, 100) + self.setMinimumSize(300, 0) + # if len(books) > 0: + # self.selectRow(0) + self.sortItems(1) + self.sortItems(0) + + def setMinimumColumnWidth(self, col, minimum): + if self.columnWidth(col) < minimum: + self.setColumnWidth(col, minimum) + + def populate_table_row(self, row, book): + if book['good']: + icon = get_icon('ok.png') + val = 0 + else: + icon = get_icon('minus.png') + val = 1 + if 'icon' in book: + icon = get_icon(book['icon']) + + status_cell = IconWidgetItem(None,icon,val) + status_cell.setData(Qt.UserRole, QVariant(val)) + self.setItem(row, 0, status_cell) + + title_cell = ReadOnlyTableWidgetItem(book['title']) + title_cell.setData(Qt.UserRole, QVariant(row)) + self.setItem(row, 1, title_cell) + + self.setItem(row, 2, AuthorTableWidgetItem(book['author'], book['author_sort'])) + + url_cell = ReadOnlyTableWidgetItem(book['url']) + #url_cell.setData(Qt.UserRole, QVariant(book['url'])) + self.setItem(row, 3, url_cell) + + comment_cell = ReadOnlyTableWidgetItem(book['comment']) + #comment_cell.setData(Qt.UserRole, QVariant(book)) + self.setItem(row, 4, comment_cell) + + def get_books(self): + books = [] + #print("=========================\nbooks:%s"%self.books) + for row in range(self.rowCount()): + rnum = self.item(row, 1).data(Qt.UserRole).toPyObject() + book = self.books[rnum] + books.append(book) + return books + + def remove_selected_rows(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + message = '

      Are you sure you want to remove this book from the list?' + if len(rows) > 1: + message = '

      Are you sure you want to remove the selected %d books from the list?'%len(rows) + if not confirm(message,'fanfictiondownloader_delete_item', self): + return + first_sel_row = self.currentRow() + for selrow in reversed(rows): + self.removeRow(selrow.row()) + if first_sel_row < self.rowCount(): + self.select_and_scroll_to_row(first_sel_row) + elif self.rowCount() > 0: + self.select_and_scroll_to_row(first_sel_row - 1) + + def select_and_scroll_to_row(self, row): + self.selectRow(row) + self.scrollToItem(self.currentItem()) + + def move_rows_up(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + first_sel_row = rows[0].row() + if first_sel_row <= 0: + return + # Workaround for strange selection bug in Qt which "alters" the selection + # in certain circumstances which meant move down only worked properly "once" + selrows = [] + for row in rows: + selrows.append(row.row()) + selrows.sort() + for selrow in selrows: + self.swap_row_widgets(selrow - 1, selrow + 1) + scroll_to_row = first_sel_row - 1 + if scroll_to_row > 0: + scroll_to_row = scroll_to_row - 1 + self.scrollToItem(self.item(scroll_to_row, 0)) + + def move_rows_down(self): + self.setFocus() + rows = self.selectionModel().selectedRows() + if len(rows) == 0: + return + last_sel_row = rows[-1].row() + if last_sel_row == self.rowCount() - 1: + return + # Workaround for strange selection bug in Qt which "alters" the selection + # in certain circumstances which meant move down only worked properly "once" + selrows = [] + for row in rows: + selrows.append(row.row()) + selrows.sort() + for selrow in reversed(selrows): + self.swap_row_widgets(selrow + 2, selrow) + scroll_to_row = last_sel_row + 1 + if scroll_to_row < self.rowCount() - 1: + scroll_to_row = scroll_to_row + 1 + self.scrollToItem(self.item(scroll_to_row, 0)) + + def swap_row_widgets(self, src_row, dest_row): + self.blockSignals(True) + self.insertRow(dest_row) + for col in range(0, self.columnCount()): + self.setItem(dest_row, col, self.takeItem(src_row, col)) + self.removeRow(src_row) + self.blockSignals(False) diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py new file mode 100644 index 00000000..a2d2e811 --- /dev/null +++ b/calibre-plugin/ffdl_plugin.py @@ -0,0 +1,1047 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Jim Miller' +__docformat__ = 'restructuredtext en' + +import time, os, copy, threading, re +from ConfigParser import SafeConfigParser +from StringIO import StringIO +from functools import partial +from datetime import datetime +from string import Template + +from PyQt4.Qt import (QApplication, QMenu, QToolButton) + +from PyQt4.Qt import QPixmap, Qt +from PyQt4.QtCore import QBuffer + + +from calibre.ptempfile import PersistentTemporaryFile, PersistentTemporaryDirectory, remove_dir +from calibre.ebooks.metadata import MetaInformation, authors_to_string +from calibre.ebooks.metadata.meta import get_metadata +from calibre.gui2 import error_dialog, warning_dialog, question_dialog, info_dialog +from calibre.gui2.dialogs.message_box import ViewLog +from calibre.gui2.dialogs.confirm_delete import confirm +from calibre.utils.date import local_tz + +# The class that all interface action plugins must inherit from +from calibre.gui2.actions import InterfaceAction + +from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin_icon_resources, get_icon, + create_menu_action_unique, get_library_uuid) + +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount + +from calibre_plugins.fanfictiondownloader_plugin.config import (prefs, permitted_values) +from calibre_plugins.fanfictiondownloader_plugin.dialogs import ( + AddNewDialog, UpdateExistingDialog, display_story_list, DisplayStoryListDialog, + LoopProgressDialog, UserPassDialog, AboutDialog, + OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, + NotGoingToDownload ) + +# because calibre immediately transforms html into zip and don't want +# to have an 'if html'. db.has_format is cool with the case mismatch, +# but if I'm doing it anyway... +formmapping = { + 'epub':'EPUB', + 'mobi':'MOBI', + 'html':'ZIP', + 'txt':'TXT' + } + +PLUGIN_ICONS = ['images/icon.png'] + +class FanFictionDownLoaderPlugin(InterfaceAction): + + name = 'FanFictionDownLoader' + + # Declare the main action associated with this plugin + # The keyboard shortcut can be None if you dont want to use a keyboard + # shortcut. Remember that currently calibre has no central management for + # keyboard shortcuts, so try to use an unusual/unused shortcut. + # (text, icon_path, tooltip, keyboard shortcut) + # icon_path isn't in the zip--icon loaded below. + action_spec = (name, None, + 'Download FanFiction stories from various web sites', ()) + # None for keyboard shortcut doesn't allow shortcut. () does, there just isn't one yet + + action_type = 'global' + # make button menu drop down only + #popup_type = QToolButton.InstantPopup + + def genesis(self): + + # This method is called once per plugin, do initial setup here + + # Read the plugin icons and store for potential sharing with the config widget + icon_resources = self.load_resources(PLUGIN_ICONS) + set_plugin_icon_resources(self.name, icon_resources) + + base = self.interface_action_base_plugin + self.version = base.name+" v%d.%d.%d"%base.version + + # Set the icon for this interface action + # The get_icons function is a builtin function defined for all your + # plugin code. It loads icons from the plugin zip file. It returns + # QIcon objects, if you want the actual data, use the analogous + # get_resources builtin function. + + # Note that if you are loading more than one icon, for performance, you + # should pass a list of names to get_icons. In this case, get_icons + # will return a dictionary mapping names to QIcons. Names that + # are not found in the zip file will result in null QIcons. + icon = get_icon('images/icon.png') + + #self.qaction.setText('FFDL') + + # The qaction is automatically created from the action_spec defined + # above + self.qaction.setIcon(icon) + + # Call function when plugin triggered. + self.qaction.triggered.connect(self.plugin_button) + + # Assign our menu to this action + self.menu = QMenu(self.gui) + self.old_actions_unique_map = {} + # menu_actions is just to keep a live reference to the menu + # items to prevent GC removing it. + self.menu_actions = [] + self.qaction.setMenu(self.menu) + self.menu.aboutToShow.connect(self.about_to_show_menu) + + self.menus_lock = threading.RLock() + + def initialization_complete(self): + # otherwise configured hot keys won't work until the menu's + # been displayed once. + self.rebuild_menus() + + def about_to_show_menu(self): + self.rebuild_menus() + + def library_changed(self, db): + # We need to reset our menus after switching libraries + self.rebuild_menus() + + def rebuild_menus(self): + with self.menus_lock: + # Show the config dialog + # The config dialog can also be shown from within + # Preferences->Plugins, which is why the do_user_config + # method is defined on the base plugin class + do_user_config = self.interface_action_base_plugin.do_user_config + self.menu.clear() + self.actions_unique_map = {} + self.menu_actions = [] + self.add_action = self.create_menu_item_ex(self.menu, '&Add New from URL(s)', image='plus.png', + unique_name='Add New FanFiction Book(s) from URL(s)', + shortcut_name='Add New FanFiction Book(s) from URL(s)', + triggered=self.add_dialog ) + + self.update_action = self.create_menu_item_ex(self.menu, '&Update Existing FanFiction Book(s)', image='plusplus.png', + unique_name='Update Existing FanFiction Book(s)', + shortcut_name='Update Existing FanFiction Book(s)', + triggered=self.update_existing) + + if 'Reading List' in self.gui.iactions and (prefs['addtolists'] or prefs['addtoreadlists']) : + self.menu.addSeparator() + addmenutxt, rmmenutxt = None, None + if prefs['addtolists'] and prefs['addtoreadlists'] : + addmenutxt = 'Add to "To Read" and "Send to Device" Lists' + if prefs['addtolistsonread']: + rmmenutxt = 'Remove from "To Read" and add to "Send to Device" Lists' + else: + rmmenutxt = 'Remove from "To Read" Lists' + elif prefs['addtolists'] : + addmenutxt = 'Add Selected to "Send to Device" Lists' + elif prefs['addtoreadlists']: + addmenutxt = 'Add to "To Read" Lists' + rmmenutxt = 'Remove from "To Read" Lists' + + if addmenutxt: + self.add_send_action = self.create_menu_item_ex(self.menu, addmenutxt, image='plusplus.png', + unique_name=addmenutxt, + shortcut_name=addmenutxt, + triggered=partial(self.update_lists,add=True)) + + if rmmenutxt: + self.add_remove_action = self.create_menu_item_ex(self.menu, rmmenutxt, image='minusminus.png', + unique_name=rmmenutxt, + shortcut_name=rmmenutxt, + triggered=partial(self.update_lists,add=False)) + + # try: + # self.add_send_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 ) + # except: + # pass + # try: + # self.add_remove_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 ) + # except: + # pass + + self.menu.addSeparator() + self.get_list_action = self.create_menu_item_ex(self.menu, 'Get URLs from Selected Books', image='bookmarks.png', + unique_name='Get URLs from Selected Books', + shortcut_name='Get URLs from Selected Books', + triggered=self.get_list_urls) + + self.menu.addSeparator() + self.config_action = create_menu_action_unique(self, self.menu, '&Configure Plugin', shortcut=False, + image= 'config.png', + unique_name='Configure FanFictionDownLoader', + shortcut_name='Configure FanFictionDownLoader', + triggered=partial(do_user_config,parent=self.gui)) + + self.config_action = create_menu_action_unique(self, self.menu, '&About Plugin', shortcut=False, + image= 'images/icon.png', + unique_name='About FanFictionDownLoader', + shortcut_name='About FanFictionDownLoader', + triggered=self.about) + + # Before we finalize, make sure we delete any actions for menus that are no longer displayed + for menu_id, unique_name in self.old_actions_unique_map.iteritems(): + if menu_id not in self.actions_unique_map: + self.gui.keyboard.unregister_shortcut(unique_name) + self.old_actions_unique_map = self.actions_unique_map + self.gui.keyboard.finalize() + + def about(self): + # Get the about text from a file inside the plugin zip file + # The get_resources function is a builtin function defined for all your + # plugin code. It loads files from the plugin zip file. It returns + # the bytes from the specified file. + # + # Note that if you are loading more than one file, for performance, you + # should pass a list of names to get_resources. In this case, + # get_resources will return a dictionary mapping names to bytes. Names that + # are not found in the zip file will not be in the returned dictionary. + + text = get_resources('about.txt') + AboutDialog(self.gui,self.qaction.icon(),self.version + text).exec_() + + def create_menu_item_ex(self, parent_menu, menu_text, image=None, tooltip=None, + shortcut=None, triggered=None, is_checked=None, shortcut_name=None, + unique_name=None): + ac = create_menu_action_unique(self, parent_menu, menu_text, image, tooltip, + shortcut, triggered, is_checked, shortcut_name, unique_name) + self.actions_unique_map[ac.calibre_shortcut_unique_name] = ac.calibre_shortcut_unique_name + self.menu_actions.append(ac) + return ac + + def plugin_button(self): + if len(self.gui.library_view.get_selected_ids()) > 0 and prefs['updatedefault']: + self.update_existing() + else: + self.add_dialog() + + def update_lists(self,add=True): + if len(self.gui.library_view.get_selected_ids()) > 0 and \ + (prefs['addtolists'] or prefs['addtoreadlists']) : + self._update_reading_lists(self.gui.library_view.get_selected_ids(),add) + + def get_list_urls(self): + if len(self.gui.library_view.get_selected_ids()) > 0: + book_list = map( partial(self._convert_id_to_book, good=False), self.gui.library_view.get_selected_ids() ) + + LoopProgressDialog(self.gui, + book_list, + partial(self._get_story_url_for_list, db=self.gui.current_db), + self._finish_get_list_urls, + init_label="Collecting URLs for stories...", + win_title="Get URLs for stories", + status_prefix="URL retrieved") + + def _get_story_url_for_list(self,book,db=None): + book['url'] = self._get_story_url(db,book['calibre_id']) + if book['url'] == None: + book['good']=False + else: + book['good']=True + + def _finish_get_list_urls(self, book_list): + url_list = [ x['url'] for x in book_list if x['good'] ] + if url_list: + d = ViewLog(_("List of URLs"),"\n".join(url_list),parent=self.gui) + d.setWindowIcon(get_icon('bookmarks.png')) + d.exec_() + else: + info_dialog(self.gui, _('List of URLs'), + _('No URLs found in selected books.'), + show=True, + show_copy_button=False) + + def add_dialog(self): + + #print("add_dialog()") + + url_list = self.get_urls_clip() + url_list_text = "\n".join(url_list) + + # self.gui is the main calibre GUI. It acts as the gateway to access + # all the elements of the calibre user interface, it should also be the + # parent of the dialog + # AddNewDialog just collects URLs, format and presents buttons. + d = AddNewDialog(self.gui, + prefs, + self.qaction.icon(), + url_list_text, + ) + d.exec_() + if d.result() != d.Accepted: + return + + url_list = get_url_list(d.get_urlstext()) + add_books = self._convert_urls_to_books(url_list) + #print("add_books:%s"%add_books) + #print("options:%s"%d.get_ffdl_options()) + + options = d.get_ffdl_options() + options['version'] = self.version + print(self.version) + + self.start_downloads( options, add_books ) + + def update_existing(self): + if len(self.gui.library_view.get_selected_ids()) == 0: + return + #print("update_existing()") + + db = self.gui.current_db + book_list = map( partial(self._convert_id_to_book, good=False), self.gui.library_view.get_selected_ids() ) + #book_ids = self.gui.library_view.get_selected_ids() + + LoopProgressDialog(self.gui, + book_list, + partial(self._populate_book_from_calibre_id, db=self.gui.current_db), + self._update_existing_2, + init_label="Collecting stories for update...", + win_title="Get stories for updates", + status_prefix="URL retrieved") + + #books = self._convert_calibre_ids_to_books(db, book_ids) + #print("update books:%s"%books) + + def _update_existing_2(self,book_list): + + d = UpdateExistingDialog(self.gui, + 'Update Existing List', + prefs, + self.qaction.icon(), + book_list, + ) + d.exec_() + if d.result() != d.Accepted: + return + + update_books = d.get_books() + + #print("update_books:%s"%update_books) + #print("options:%s"%d.get_ffdl_options()) + # only if there's some good ones. + if 0 < len(filter(lambda x : x['good'], update_books)): + options = d.get_ffdl_options() + options['version'] = self.version + print(self.version) + self.start_downloads( options, update_books ) + + def get_urls_clip(self): + url_list = [] + if prefs['urlsfromclip']: + for url in unicode(QApplication.instance().clipboard().text()).split(): + if( self._is_good_downloader_url(url) ): + url_list.append(url) + return url_list + + def apply_settings(self): + # No need to do anything with perfs here, but we could. + prefs + + def start_downloads(self, options, books): + + #print("start_downloads:%s"%books) + + # create and pass temp dir. + tdir = PersistentTemporaryDirectory(prefix='fanfictiondownloader_') + options['tdir']=tdir + + self.gui.status_bar.show_message(_('Started fetching metadata for %s stories.'%len(books)), 3000) + + if 0 < len(filter(lambda x : x['good'], books)): + LoopProgressDialog(self.gui, + books, + partial(self.get_metadata_for_book, options = options), + partial(self.start_download_list, options = options)) + # LoopProgressDialog calls get_metadata_for_book for each 'good' story, + # get_metadata_for_book updates book for each, + # LoopProgressDialog calls start_download_list at the end which goes + # into the BG, or shows list if no 'good' books. + + def get_metadata_for_book(self,book, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True}): + ''' + Update passed in book dict with metadata from website and + necessary data. To be called from LoopProgressDialog + 'loop'. Also pops dialogs for is adult, user/pass. + ''' + + # The current database shown in the GUI + # db is an instance of the class LibraryDatabase2 from database.py + # This class has many, many methods that allow you to do a lot of + # things. + db = self.gui.current_db + + fileform = options['fileform'] + collision = options['collision'] + updatemeta= options['updatemeta'] + + if not book['good']: + # book has already been flagged bad for whatever reason. + return + + url = book['url'] + print("url:%s"%url) + skip_date_update = False + + ## was self.ffdlconfig, but we need to be able to change it + ## when doing epub update. + ffdlconfig = SafeConfigParser() + ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) + ffdlconfig.readfp(StringIO(prefs['personal.ini'])) + adapter = adapters.getAdapter(ffdlconfig,url,fileform) + + options['personal.ini'] = prefs['personal.ini'] + if prefs['includeimages']: + # this is a cheat to make it easier for users. + options['personal.ini'] = '''[epub] +include_images:true +keep_summary_html:true +make_firstimage_cover:true +''' + options['personal.ini'] + + ## three tries, that's enough if both user/pass & is_adult needed, + ## or a couple tries of one or the other + for x in range(0,2): + try: + adapter.getStoryMetadataOnly() + except exceptions.FailedToLogin, f: + print("Login Failed, Need Username/Password.") + userpass = UserPassDialog(self.gui,url,f) + userpass.exec_() # exec_ will make it act modal + if userpass.status: + adapter.username = userpass.user.text() + adapter.password = userpass.passwd.text() + + except exceptions.AdultCheckRequired: + if question_dialog(self.gui, 'Are You Adult?', '

      '+ + "%s requires that you be an adult. Please confirm you are an adult in your locale:"%url, + show_copy_button=False): + adapter.is_adult=True + + # let other exceptions percolate up. + story = adapter.getStoryMetadataOnly() + writer = writers.getWriter(options['fileform'],adapter.config,adapter) + + book['all_metadata'] = story.getAllMetadata(removeallentities=True) + book['title'] = story.getMetadata("title", removeallentities=True) + book['author_sort'] = book['author'] = story.getMetadata("author", removeallentities=True) + book['publisher'] = story.getMetadata("site") + book['tags'] = writer.getTags() # getTags could be moved up into adapter now. Adapter didn't used to know the fileform + book['comments'] = stripHTML(story.getMetadata("description")) #, removeallentities=True) comments handles entities better. + book['series'] = story.getMetadata("series") + + # adapter.opener is the element with a threadlock. But del + # adapter.opener doesn't work--subproc fails when it tries + # to pull in the adapter object that hasn't been imported yet. + # book['adapter'] = adapter + + book['is_adult'] = adapter.is_adult + book['username'] = adapter.username + book['password'] = adapter.password + + book['icon'] = 'plus.png' + if story.getMetadataRaw('datePublished'): + # should only happen when an adapter is broken, but better to + # fail gracefully. + book['pubdate'] = story.getMetadataRaw('datePublished').replace(tzinfo=local_tz) + book['timestamp'] = None # filled below if not skipped. + + if collision in (CALIBREONLY): + book['icon'] = 'metadata.png' + + # Dialogs should prevent this case now. + if collision in (UPDATE,UPDATEALWAYS) and fileform != 'epub': + raise NotGoingToDownload("Cannot update non-epub format.") + + book_id = None + + if book['calibre_id'] != None: + # updating an existing book. Update mode applies. + print("update existing id:%s"%book['calibre_id']) + book_id = book['calibre_id'] + # No handling needed: OVERWRITEALWAYS,CALIBREONLY + + # only care about collisions when not ADDNEW + elif collision != ADDNEW: + # 'new' book from URL. collision handling applies. + print("from URL") + + # find dups + mi = MetaInformation(story.getMetadata("title", removeallentities=True), + (story.getMetadata("author", removeallentities=True),)) # author is a list. + identicalbooks = db.find_identical_books(mi) + ## removed for being overkill. + # for ib in identicalbooks: + # # only *really* identical if URL matches, too. + # # XXX make an option? + # if self._get_story_url(db,ib) == url: + # identicalbooks.append(ib) + #print("identicalbooks:%s"%identicalbooks) + + if collision == SKIP and identicalbooks: + raise NotGoingToDownload("Skipping duplicate story.","list_remove.png") + + if len(identicalbooks) > 1: + raise NotGoingToDownload("More than one identical book--can't tell which to update/overwrite.","minusminus.png") + + ## changed: add new book when CALIBREONLY if none found. + if collision == CALIBREONLY and not identicalbooks: + collision = ADDNEW + options['collision'] = ADDNEW + # raise NotGoingToDownload("Not updating Calibre Metadata, no existing book to update.","search_delete_saved.png") + + if len(identicalbooks)>0: + book_id = identicalbooks.pop() + book['calibre_id'] = book_id + book['icon'] = 'edit-redo.png' + + if book_id != None and collision != ADDNEW: + if collision in (CALIBREONLY): + book['comment'] = 'Metadata collected.' + # don't need temp file created below. + return + + ## newer/chaptercount checks are the same for both: + # Update epub, but only if more chapters. + if collision in (UPDATE,UPDATEALWAYS): # collision == UPDATE + # 'book' can exist without epub. If there's no existing epub, + # let it go and it will download it. + if db.has_format(book_id,fileform,index_is_id=True): + (epuburl,chaptercount) = \ + get_dcsource_chaptercount(StringIO(db.format(book_id,'EPUB', + index_is_id=True))) + urlchaptercount = int(story.getMetadata('numChapters')) + if chaptercount == urlchaptercount: + if collision == UPDATE: + raise NotGoingToDownload("Already contains %d chapters."%chaptercount,'edit-undo.png') + else: + # UPDATEALWAYS + skip_date_update = True + elif chaptercount > urlchaptercount: + raise NotGoingToDownload("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." % (chaptercount,urlchaptercount),'dialog_error.png') + + if collision == OVERWRITE and \ + db.has_format(book_id,formmapping[fileform],index_is_id=True): + # check make sure incoming is newer. + lastupdated=story.getMetadataRaw('dateUpdated').date() + fileupdated=datetime.fromtimestamp(os.stat(db.format_abspath(book_id, formmapping[fileform], index_is_id=True))[8]).date() + if fileupdated > lastupdated: + raise NotGoingToDownload("Not Overwriting, web site is not newer.",'edit-undo.png') + + # For update, provide a tmp file copy of the existing epub so + # it can't change underneath us. + if collision in (UPDATE,UPDATEALWAYS) and \ + db.has_format(book['calibre_id'],'EPUB',index_is_id=True): + tmp = PersistentTemporaryFile(prefix='old-%s-'%book['calibre_id'], + suffix='.epub', + dir=options['tdir']) + db.copy_format_to(book_id,fileform,tmp,index_is_id=True) + print("existing epub tmp:"+tmp.name) + book['epub_for_update'] = tmp.name + + if collision != CALIBREONLY and not skip_date_update: + # I'm half convinced this should be dateUpdated instead, but + # this behavior matches how epubs come out when imported + # dateCreated == packaged--epub/etc created. + book['timestamp'] = story.getMetadataRaw('dateCreated').replace(tzinfo=local_tz) + + if book['good']: # there shouldn't be any !'good' books at this point. + # if still 'good', make a temp file to write the output to. + tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'], + suffix='.'+options['fileform'], + dir=options['tdir']) + print("title:"+book['title']) + print("outfile:"+tmp.name) + book['outfile'] = tmp.name + + return + + def start_download_list(self,book_list, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True}): + ''' + Called by LoopProgressDialog to start story downloads BG processing. + adapter_list is a list of tuples of (url,adapter) + ''' + #print("start_download_list:book_list:%s"%book_list) + + ## No need to BG process when CALIBREONLY! Fake it. + if options['collision'] in (CALIBREONLY): + class NotJob(object): + def __init__(self,result): + self.failed=False + self.result=result + notjob = NotJob(book_list) + self.download_list_completed(notjob,options=options) + return + + for book in book_list: + if book['good']: + break + else: + ## No good stories to try to download, go straight to + ## list. + d = DisplayStoryListDialog(self.gui, + 'Nothing to Download', + prefs, + self.qaction.icon(), + book_list, + label_text='None of the URLs/stories given can be/need to be downloaded.' + ) + d.exec_() + return + + func = 'arbitrary_n' + cpus = self.gui.job_manager.server.pool_size + args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', 'do_download_worker', + (book_list, options, cpus)] + desc = 'Download FanFiction Book' + job = self.gui.job_manager.run_job( + self.Dispatcher(partial(self.download_list_completed,options=options)), + func, args=args, + description=desc) + + self.gui.status_bar.show_message('Starting %d FanFictionDownLoads'%len(book_list),3000) + + def _update_book(self,book,db=None, + options={'fileform':'epub', + 'collision':ADDNEW, + 'updatemeta':True}): + print("add/update %s %s"%(book['title'],book['url'])) + mi = self._make_mi_from_book(book) + + if options['collision'] != CALIBREONLY: + self._add_or_update_book(book,options,prefs,mi) + + if options['collision'] == CALIBREONLY or \ + (options['updatemeta'] and book['good']): + self._update_metadata(db, book['calibre_id'], book, mi, options) + + def _update_books_completed(self, book_list, options={}): + + add_list = filter(lambda x : x['good'] and x['added'], book_list) + add_ids = [ x['calibre_id'] for x in add_list ] + update_list = filter(lambda x : x['good'] and not x['added'], book_list) + update_ids = [ x['calibre_id'] for x in update_list ] + + if len(add_list): + ## even shows up added to searchs. Nice. + self.gui.library_view.model().books_added(len(add_list)) + self.gui.library_view.model().refresh_ids(add_ids) + + if update_ids: + self.gui.library_view.model().refresh_ids(update_ids) + + current = self.gui.library_view.currentIndex() + self.gui.library_view.model().current_changed(current, self.previous) + self.gui.tags_view.recount() + + if self.gui.cover_flow: + self.gui.cover_flow.dataChanged() + + self.gui.status_bar.show_message(_('Finished Adding/Updating %d books.'%(len(update_list) + len(add_list))), 3000) + + if len(update_list) + len(add_list) != len(book_list): + d = DisplayStoryListDialog(self.gui, + 'Updates completed, final status', + prefs, + self.qaction.icon(), + book_list, + label_text='Stories have be added or updated in Calibre, some had additional problems.' + ) + d.exec_() + + print("all done, remove temp dir.") + remove_dir(options['tdir']) + + def download_list_completed(self, job, options={}): + if job.failed: + self.gui.job_exception(job, dialog_title='Failed to Download Stories') + return + + self.previous = self.gui.library_view.currentIndex() + db = self.gui.current_db + + if display_story_list(self.gui, + 'Downloads finished, confirm to update Calibre', + prefs, + self.qaction.icon(), + job.result, + label_text='Stories will not be added or updated in Calibre without confirmation.', + offer_skip=True): + + book_list = job.result + good_list = filter(lambda x : x['good'], book_list) + total_good = len(good_list) + + self.gui.status_bar.show_message(_('Adding/Updating %s books.'%total_good)) + + if total_good > 0: + LoopProgressDialog(self.gui, + good_list, + partial(self._update_book, options=options, db=self.gui.current_db), + partial(self._update_books_completed, options=options), + init_label="Updating calibre for stories...", + win_title="Update calibre for stories", + status_prefix="Updated") + + def _add_or_update_book(self,book,options,prefs,mi=None): + db = self.gui.current_db + + if mi == None: + mi = self._make_mi_from_book(book) + + book_id = book['calibre_id'] + if book_id == None: + book_id = db.create_book_entry(mi, + add_duplicates=True) + book['calibre_id'] = book_id + book['added'] = True + else: + book['added'] = False + + if not db.add_format_with_hooks(book_id, + options['fileform'], + book['outfile'], index_is_id=True): + book['comment'] = "Adding format to book failed for some reason..." + book['good']=False + book['icon']='dialog_error.png' + + if prefs['deleteotherforms']: + fmts = db.formats(book['calibre_id'], index_is_id=True).split(',') + for fmt in fmts: + if fmt != formmapping[options['fileform']]: + print("remove f:"+fmt) + db.remove_format(book['calibre_id'], fmt, index_is_id=True)#, notify=False + + if prefs['addtolists'] or prefs['addtoreadlists']: + self._update_reading_lists([book_id],add=True) + + return book_id + + def _update_metadata(self, db, book_id, book, mi, options): + if prefs['keeptags']: + old_tags = db.get_tags(book_id) + # remove old Completed/In-Progress only if there's a new one. + if 'Completed' in mi.tags or 'In-Progress' in mi.tags: + old_tags = filter( lambda x : x not in ('Completed', 'In-Progress'), old_tags) + # remove old Last Update tags if there are new ones. + if len(filter( lambda x : not x.startswith("Last Update"), mi.tags)) > 0: + old_tags = filter( lambda x : not x.startswith("Last Update"), old_tags) + # mi.tags needs to be list, but set kills dups. + mi.tags = list(set(list(old_tags)+mi.tags)) + + if 'langcode' in book['all_metadata']: + mi.languages=[book['all_metadata']['langcode']] + else: + # Set language english, but only if not already set. + oldmi = db.get_metadata(book_id,index_is_id=True) + if not oldmi.languages: + mi.languages=['eng'] + + if options['fileform'] == 'epub' and prefs['updatecover']: + existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True) + epubmi = get_metadata(existingepub,'EPUB') + if epubmi.cover_data[1] is not None: + db.set_cover(book_id, epubmi.cover_data[1]) + + # set author link if found. All current adapters have authorUrl. + if 'authorUrl' in book['all_metadata']: + autid=db.get_author_id(book['author']) + db.set_link_field_for_author(autid, unicode(book['all_metadata']['authorUrl']), + commit=False, notify=False) + + db.set_metadata(book_id,mi) + + # do configured column updates here. + #print("all_metadata: %s"%book['all_metadata']) + custom_columns = self.gui.library_view.model().custom_columns + + #print("prefs['custom_cols'] %s"%prefs['custom_cols']) + for col, meta in prefs['custom_cols'].iteritems(): + #print("setting %s to %s"%(col,meta)) + if col not in custom_columns: + print("%s not an existing column, skipping."%col) + continue + coldef = custom_columns[col] + if not meta.startswith('status-') and meta not in book['all_metadata'] or \ + meta.startswith('status-') and 'status' not in book['all_metadata']: + print("No value for %s, skipping."%meta) + continue + if meta not in permitted_values[coldef['datatype']]: + print("%s not a valid column type for %s, skipping."%(col,meta)) + continue + label = coldef['label'] + if coldef['datatype'] in ('enumeration','text','comments','datetime','series'): + db.set_custom(book_id, book['all_metadata'][meta], label=label, commit=False) + elif coldef['datatype'] in ('int','float'): + num = unicode(book['all_metadata'][meta]).replace(",","") + db.set_custom(book_id, num, label=label, commit=False) + elif coldef['datatype'] == 'bool' and meta.startswith('status-'): + if meta == 'status-C': + val = book['all_metadata']['status'] == 'Completed' + if meta == 'status-I': + val = book['all_metadata']['status'] == 'In-Progress' + db.set_custom(book_id, val, label=label, commit=False) + + db.commit() + + if 'Generate Cover' in self.gui.iactions: + + gc_plugin = self.gui.iactions['Generate Cover'] + setting_name = None + if prefs['allow_gc_from_ini']: + ffdlconfig = SafeConfigParser() + ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) + ffdlconfig.readfp(StringIO(prefs['personal.ini'])) + adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform']) + + # template => regexp to match => GC Setting to use. + # generate_cover_settings: + # ${category} => Buffy:? the Vampire Slayer => Buffy + for line in adapter.getConfig('generate_cover_settings').splitlines(): + if "=>" in line: + (template,regexp,setting) = map( lambda x: x.strip(), line.split("=>") ) + value = Template(template).substitute(book['all_metadata']).encode('utf8') + print("%s(%s) => %s => %s"%(template,value,regexp,setting)) + if re.search(regexp,value): + setting_name = setting + break + + if setting_name: + print("Generate Cover Setting from generate_cover_settings(%s)"%line) + if setting_name not in gc_plugin.get_saved_setting_names(): + print("GC Name %s not found, discarding! (check personal.ini for typos)"%setting_name) + setting_name = None + + if not setting_name and book['all_metadata']['site'] in prefs['gc_site_settings']: + setting_name = prefs['gc_site_settings'][book['all_metadata']['site']] + + if not setting_name and 'Default' in prefs['gc_site_settings']: + setting_name = prefs['gc_site_settings']['Default'] + + if setting_name: + print("Running Generate Cover with settings %s."%setting_name) + realmi = db.get_metadata(book_id, index_is_id=True) + gc_plugin.generate_cover_for_book(realmi,saved_setting_name=setting_name) + + + def _get_clean_reading_lists(self,lists): + if lists == None or lists.strip() == "" : + return [] + else: + return filter( lambda x : x, map( lambda x : x.strip(), lists.split(',') ) ) + + def _update_reading_lists(self,book_ids,add=True): + try: + rl_plugin = self.gui.iactions['Reading List'] + except: + if prefs['addtolists'] or prefs['addtoreadlists']: + message="

      You configured FanFictionDownLoader to automatically update Reading Lists, but you don't have the Reading List plugin installed anymore?

      " + confirm(message,'fanfictiondownloader_no_reading_list_plugin', self.gui) + return + + # XXX check for existence of lists, warning if not. + if prefs['addtoreadlists']: + if add: + addremovefunc = rl_plugin.add_books_to_list + else: + addremovefunc = rl_plugin.remove_books_from_list + + lists = self._get_clean_reading_lists(prefs['read_lists']) + if len(lists) < 1 : + message="

      You configured FanFictionDownLoader to automatically update \"To Read\" Reading Lists, but you don't have any lists set?

      " + confirm(message,'fanfictiondownloader_no_read_lists', self.gui) + for l in lists: + if l in rl_plugin.get_list_names(): + #print("add good read l:(%s)"%l) + addremovefunc(l, + book_ids, + display_warnings=False) + else: + if l != '': + message="

      You configured FanFictionDownLoader to automatically update Reading List '%s', but you don't have a list of that name?

      "%l + confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui) + + if prefs['addtolists'] and (add or (prefs['addtolistsonread'] and prefs['addtoreadlists']) ): + lists = self._get_clean_reading_lists(prefs['send_lists']) + if len(lists) < 1 : + message="

      You configured FanFictionDownLoader to automatically update \"Send to Device\" Reading Lists, but you don't have any lists set?

      " + confirm(message,'fanfictiondownloader_no_send_lists', self.gui) + for l in lists: + if l in rl_plugin.get_list_names(): + #print("good send l:(%s)"%l) + rl_plugin.add_books_to_list(l, + book_ids, + display_warnings=False) + else: + if l != '': + message="

      You configured FanFictionDownLoader to automatically update Reading List '%s', but you don't have a list of that name?

      "%l + confirm(message,'fanfictiondownloader_no_reading_list_%s'%l, self.gui) + + def _find_existing_book_id(self,db,book,matchurl=True): + mi = MetaInformation(book["title"],(book["author"],)) # author is a list. + identicalbooks = db.find_identical_books(mi) + if matchurl: # only *really* identical if URL matches, too. + for ib in identicalbooks: + if self._get_story_url(db,ib) == book['url']: + return ib + if identicalbooks: + return identicalbooks.pop() + return None + + def _make_mi_from_book(self,book): + mi = MetaInformation(book['title'],(book['author'],)) # author is a list. + mi.set_identifiers({'url':book['url']}) + mi.publisher = book['publisher'] + mi.tags = book['tags'] + #mi.languages = ['en'] # handled in _update_metadata so it can check for existing lang. + mi.pubdate = book['pubdate'] + mi.timestamp = book['timestamp'] + mi.comments = book['comments'] + mi.series = book['series'] + return mi + + + def _convert_urls_to_books(self, urls): + books = [] + uniqueurls = set() + for url in urls: + book = self._convert_url_to_book(url) + if book['url'] in uniqueurls: + book['good'] = False + book['comment'] = "Same story already included." + uniqueurls.add(book['url']) + books.append(book) + return books + + def _convert_url_to_book(self, url): + book = {} + book['good'] = True + book['calibre_id'] = None + book['title'] = 'Unknown' + book['author'] = 'Unknown' + book['author_sort'] = 'Unknown' + + book['comment'] = '' + book['url'] = '' + book['added'] = False + + self._set_book_url_and_comment(book,url) + return book + + def _convert_id_to_book(self, idval, good=True): + book = {} + book['good'] = good + book['calibre_id'] = idval + book['title'] = 'Unknown' + book['author'] = 'Unknown' + book['author_sort'] = 'Unknown' + + book['comment'] = '' + book['url'] = '' + book['added'] = False + + return book + + def _populate_book_from_calibre_id(self, book, db=None): + mi = db.get_metadata(book['calibre_id'], index_is_id=True) + #book = {} + book['good'] = True + book['calibre_id'] = mi.id + book['title'] = mi.title + book['author'] = authors_to_string(mi.authors) + book['author_sort'] = mi.author_sort + book['comment'] = '' + book['url'] = "" + book['added'] = False + + url = self._get_story_url(db,book['calibre_id']) + self._set_book_url_and_comment(book,url) + #return book + + def _set_book_url_and_comment(self,book,url): + if not url: + book['comment'] = "No story URL found." + book['good'] = False + book['icon'] = 'search_delete_saved.png' + else: + # get normalized url or None. + book['url'] = self._is_good_downloader_url(url) + if book['url'] == None: + book['url'] = url + book['comment'] = "URL is not a valid story URL." + book['good'] = False + book['icon']='dialog_error.png' + + def _get_story_url(self, db, book_id): + identifiers = db.get_identifiers(book_id,index_is_id=True) + if 'url' in identifiers: + # identifiers have :->| in url. + #print("url from book:"+identifiers['url'].replace('|',':')) + return identifiers['url'].replace('|',':') + else: + ## only epub has URL in it--at least where I can easily find it. + if db.has_format(book_id,'EPUB',index_is_id=True): + existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True) + mi = get_metadata(existingepub,'EPUB') + identifiers = mi.get_identifiers() + if 'url' in identifiers: + #print("url from epub:"+identifiers['url'].replace('|',':')) + return identifiers['url'].replace('|',':') + # look for dc:source + return get_dcsource(existingepub) + return None + + def _is_good_downloader_url(self,url): + # this is the accepted way to 'check for existance'? really? + try: + self.dummyconfig + except AttributeError: + self.dummyconfig = SafeConfigParser() + # pulling up an adapter is pretty low over-head. If + # it fails, it's a bad url. + try: + adapter = adapters.getAdapter(self.dummyconfig,url) + url = adapter.url + del adapter + return url + except: + return None; + +def get_url_list(urls): + def f(x): + if x.strip(): return True + else: return False + # set removes dups. + return set(filter(f,urls.strip().splitlines())) + diff --git a/calibre-plugin/images/icon.png b/calibre-plugin/images/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..e9715307dd4fe35c686b222262b796828856ff22 GIT binary patch literal 24649 zcmZ6z1ymeC*DX3Y!QCymy9N#J?(Xgq2oM+`xCM6)5Zr>>;O_2j!QJ5w|F^#P{(Bca zt7m$qx~h-us&jS`uB<49j6i?@0059>q{USM00{Yy7d$NZO0HwD5%>h*rYa>0s2nFc z0)K!tm6H+&ynp=VwihRWuY7Wr)^P(jU-)=Ih|*?wfiJ?j%P2^|twO+{<3fzyO|t<2 zWB?iQuj*dQC+*)8DLp+7p4&SY+3W7=Tg%GsC79}U|KW%dN&`VBto^V-uL5`iAfg=n z71Qyw`%j||m zwf#Du-@>RU8cRoYf1mw(KE=#Rk`p(V6!*JY-)QgjfrvXqn6I2L>~LDZ-4|k%5_P8v z+eHhjG}o?`BZUQ!KMlG$ojm6fVaEZ~D0QKem-^@@u|Ba%fA_B&P}$`HOM-vf3AbiK?!pBm z+rf@GdfVV1*_Eb!3j1-%JgCHMF>9<|-d2{~P2?{5C-;7Kf zuuT7&o<2?&hPsP*>W804hreZ~f}gBCa7^3;&yoL9&8&FOKJqI5FS&}=@^dUMszFIL zG{E#ti{U2~U@>N=O>4a2qvyD8;F%h$_I;$ox>V=D)hk-K9J6lrH$Mx%poqX7z^dKW zHTyMTw$IL>@NgP#cre{RUCrIjcijLg0NT{^UU_71H6VI0xoF+lvbXLl9E|7$Z=TK~#UX64<) zJLjLHrsAuF=-;I9x0m$S8o+x5_sgv7yoH4V>5rbt`#pMn(%L;VeJIDDs-2LVDaoxu zMmqyaK|%=i0xN-_cfjVDD|Un&Ge)>Lr2N#=G;A#F9&H;hf$JZmn82pp{4!~tyONA? zEebv~mcVa!+6#j8pTQ0IEW zdT5UZgk|8CygO&_;2N4E$EXZe0@lb8QsCo2*gR}zR-jG&od%qQJ(_t7Ja?D%sYnO(s zV8!Fmh7^}VXi(`{?-5k>PVbK5lyBhF=eqLDJFI^heRqiGhXqt9KJzNr9D~Qj?m2_Bh z&6f;oXyhi`vLHC;pwXo4a&B{i5+MWu^Z5K$(a_k$_CRS}$>|je0sHl*CE@b{-+cdB zcM%-MJDysI z5n$B%0~RNY^|@A&Q`q0{!T}p&xm*1SWy*DUGo21FyuXdh@ZwP*8pwGgj4``3Z-HYX zG8K_%%D5?*jm|k7cxvs>Rg(gK%NW}FPA2;X7g+9JAj}iB)6qFZF?-Yq$v8y@krD)o zo7WWc06tw36LUb!Sh#zwIg_?3Q91M;MAYJg$c4AGe$vPd-DmwXF>)RLC*F zDbT>lNkJHz+TdVyX?)@hk9Qec)X+kn)Mq3UJ3ZSSz&&2wPxp{Tu6#7*%ZuLRLY^oL zAj}eBW(UF!;8;SAg8*?RKarqE!(8t@NHUAK*PHdNslUjH`gsYmn6NSpDp8x z7uRLN&XAACp)VgclB4m5B!~NTB|DD@FL6(l!6lw|dcZHt$Z0%iq=!$mP(ARxA!Sac zk?hj*Q|WdFR9dhKXuN=u_kM9w!K>vd|2PIcaQKoqeHplRg1h(%@bvIDUC~KlrfWi= z)dm*BXJhN71Vg7ALXP8AtJkq!Ly+0R$=U|$uk}fO2hExFS3?jgf(FAqmIShGFeY6I zoh^@I#bpMCKb-(~E@fj3&dH{0ENo~4s41jHjtC{KfuLSZt6xn!UpyvOfZlwn%q082 zguXPn-1rN4a|FBABZ2{7w$fkV{+;zu*xk)k&3KuPQ^QuGUxZQjXTAhHbEeqis0?GK zFycV1u^u!bP!&3oZ_-Iam3HwZ#R>anYZGSwUf|(NaF zYrcGO6;@Zl!u}wCV2S~*pF!mI2C|SOF(hP)hdm@FcaLucPK(Zpl}`+E@tW?~L%*I>;Ns{m=+N_3vfLpRvtEkNPQZLa$zO7){VE}{4z7$Ti5e)2R&lBj|osNb}foI}IH zr_CB?8KzbYL_!1Rxm`0L@AVsNp^PaIZ8l0v;Q z@8R|;X5CaRf!*Fgbg`rNmkyom7U*}{F8(jDmUSohg9buU=6U*8E}90gkU>KJBr8fJ zebL3)QdKBaaFR=l448PJ@9jLxGz~1Er+43?lTe42Z@d z5s*l!rZx6Q_oeU{$VDb3=IPoBCM@11Twx&MmM)(~>x=8h3$>0ABT*YS9cWQ|lu{pw!u{an3GZ zrpT~p7+o~MFe?G<=n=YR8{r4l&R6gq!an#kJ!n2guU|mqc+{+Tuvo>|DqphLvlWL& zS!McYcFRYSw9^QQx{vz>;DAlLU}YlQ77N#vI8Wq|>K%?kNecK4VEObM;R?wrcEm&r z+&HcO{5K|KnfYD261lJ9z>R0@AaCB$KkFG4REym$8x}dCiEhA|`zt-lm?mvtUDmCS zM1IW~ZQX7IdZ%g9By~@tpD(UnN05#+k1Ogt2zS4W12>OZKF|3KiJL??>M~g8CRk`c zQB(v07M7XA(f6T@*RW#S*@=aF;{_wJ&GclN1FZxv5q%r-jsU5zhAJb*f*a>WJI2qk z7A__~G#or(fhe*Lei}V9eC~D-I~(+5fyol%ureu1E|koq$HbWAsll(*13j!GJKHm4 zkfgzcs;?Imd$aD#_3hDF7+^!DYECON zxA+oqQI`=3xIk~71w%|y>z;4>o0Mz`+KHsg2hW_o;9{1>)RhGJPW$UO zv6X-|VW5LvV>eG!{63=+yEMjB(5_heD)$T|g{Ep;c&f|pq@43zE2_!<$_D5*))%w= zx{lQzEe^6Lw^1PW2dWV8;C=(isfXH2i~?Tgvnp&{AzIDXLFf#8bSjR#_%J2i-)7C; zBe1AFm0zGX>;4BC$_k!0SkHc3h%s-eE-r`IELf7aTa;?-sUXqzkXZtO(&(isNP-Ly zd08#<9=X4l%|6MWQZ~*gVC+&zu;P4;i(Yy}C`J4l|5w^!ouiR+ByoO|RlE;1mx(Y* zxxf@pW9$zBnR(FU?A7NPL1ovtm0nt29R0#-j|q=O&Sn@~RHAOoK8yyx1G`067X%?Z zYJ8kSMBh!soIZ6WmT&K)?;*c{BIp8r@Ytpd2|Lry<)$f_8V&=NX^xLRjduVLDW;4I z3fR)>%SVe-9|_-W%`VTB{(gGfjZ4%`POM|6Fw0F8g$K$D7!LvD*%htOKbwajgV-|_ zqRVv>p&=LaQhJ@k(9tcJ$fgC9Sa`&Wh$Y}7wFFEptz!x=gIgvhvlNR9{iPWe3=9W5k8#&Ksm!2eipJrpPOuNYdjTkcA)=J;Uu)q zjfSP7<4c;$3*;<2>jNQ0lCVp~FTTQ~!HN6UqWzLR)4MR~Qp!fG)f0se{)I(eh}IoQ z4O790SBQo$vUC?w$x9vhIT!>W_zCz4{3L`)kCd3E%#DDUsfT0|@JXY$5izbSlQa** z2G7E5gV?jX^}gQA??4C-J9fONo;@^5@_mbRYtfTbQr!?JcbPa{Zb@5=<7uB&jQE9@ zrXu;?o(=+39BwVk>;x9P$E|0p++Cj@+**vGy{iP>^yO$6gc&cLcmQJ<#^H^h09dD* z`o?%2Z17QEK{74~me84P%xfVsxtWQQ96G^nhP6bis*g!@7d_^NjZSzPm|*|v`GXfE zYpxuFM8BMCiiGp`>k=TT_XnzUU)2Hu1QZ2JC*c###wW9d^hV5q0LuFl8F5r!SWX_O ziHMnOf#7mRL(M{=dkEdJIwM)k?CxV8EWoEbcvcC!?PM*$i~o5hV?*O0L0-774trKN z4Mf^_#8E%hc>Z5z%7uTTurjZM@NC*@lmB)-&@;+;oix2tW>BKrwtc-6?gDi!4l%_#nfcV+<}!~<=_(a+=1UzD;QpoY1kGEUFYCx#Wo zfm{7JbpE-8mAG{~!Z;y<13tccbY$`boD~Iq?;h5Wx=Znd*68OwN@h#d-#lwt_H(Wy z@s8Hd+Sd%Kh#ag@A{@RBn|49Awd$M&ncB%|9{Bi)6q0WB}@=@{bm`?k%N zwoBgHrT1QNAkKPjbac~3zFl2!m0w6~&Xs|!MZ9?C@q)B~hVWzGF9^XZVQ6|J};%6GN)w$+p%eJXnnv)i42LHEQ@R5Aum+Y~|M%4f6;I{ftqb>)^R0v@9D z=S)o^=A~QTk9L3fd)fO*32j#IPGjY=4sBdUVfO)HVzyGaq$Az%vnW%hJ|}6|-!l&X zv-1w$+oLbNg_DJ%l=-}k{bf(ah}c{V*2O1$ksd|C!JfmQR)4I#gDT!diL$GbcXKb% zyZV%hC756hkEZLOgQyot=!r3qqu9@v~DpHxW=(}E+s%h0&0mwKzwi$66(H~rMbXfgHbrA4!f#n#C5Iid^9m|b*7gX&Wh zX-yoOE0r_9ZW49y`y~?!{VS%GTGI-$8#K!f8AzFmjh6sZ1evh!3@ML*nUJs~`BJ z3g>bq;UEeI51YjvVmTu6k$~R-FAmw7a1sYiN5p!dDze<%GNBJX9KFPRm;yqVjgu`f z4qgA`sEl;lI4JL^uG)%uzW!M{;r7~qxg@W^QtQFpdE=}Kfztv>&D}yLx!Fjr@Rv@i zCVSBVU0|`jSMxU<#j8wN#`33z_VB(W?P3%aO9vd)#w=joIz|K)!`-=Fd-2n_wL|%+ zt{)bS*r!2PQiy^CA>pFCtG|a~s<7#w;w-IIFNxeStv-pvvPPiAM4MUKLvb^X&c#op zvH98{;O;Y94MY%>4?mng>BxT1Xo-Zrd;aEs%ZbuyyB+jDu*t&CSjSRViak9zY)U;( zCGna5y6(@D1{R~nv0BU`!q~*K;*52zD6zWSNlZpXABgzm()n&wBGKyA3$1o;b`WKh zZ9k;QD$U0*$IjMu=NCO)j0xL-8bMr?Pw8JJz8=;-mDep3b*X`X1Js3pszIbO{UJ-< zIY@&{J1C+JqhLuLNCmJd^;Mhu;xSm0#zkXdh)|~?IwB>)EGoMY%UY&B>SX%u%8U{V zBd@sCbrW8C8B_*Qa4k?*YA};))AMjmtEA@KvfZv)q}uvawW=*UN5cHUUYG7FnYmA! zKvcjyno+0kots6*RNnx&Zx}a_CP0H2(FUvfzjE{@GqDZK$SM%=j;LjfBG|P-+@Jbg zP@;5ttLD;yu$T~&?Z}esGVJIeqeA>2F~)sQDfc_vFw|X>_s$x9MLFcTpu6|teiF_P zPv}PEOw_Mg?A8j=xsBnwz<4Bpetngxx}w}G-GneaUQ&~^dzhmi{C14ESY>do5l{qB zyD{yGpVaOP``2+(V(0i*l2;7|1_0S<_lM~Yr8baECf|IS$BOA$(2X5Z*(%O;jGU`^ zAn6#wFz+s?yM9uTFq#%#q+w+(Hdw@MO*(&f;g7`|2B0?-v3kTol*R}s8Z1K79p z<%uuHzU3WbkkvP0U1VKM^_oJOr*6$@8LWS%Zgq+(F-qBSpj(0Eb9^=ml7Q81wUKH= zvX;HkXOy3cL>@_JkQ-Vgp+KN;HvX#OgpsQwsK%1*hMCtfdX8=WRU4sNmuHx4B=ET9 zg1@?QA(tUuI~`J9MNV7D%M~MPL+1x8cSG4G|FUiQa zxeMLWHCJe8T;!%vG6jRCt4QsESu|%^NP%cN!RYy%d|ebHx#KVg+8l|(hr=dn(bbmH;*9CK$OioF; zQe+#37`!@F=O;MUZ^zIPgjo6mLu1gg`>|?f+ddBSXuy^-GNZRkV| zOw*P!tJ!7?-4yMZmniaK7^m${k9vekO*b=5bswV!143eviqe*?zKSu88sAUl$ent| zV56y@tn+m0ILgD{cx_ZN6zMA;Utp1Imw^yN`o$>>EyhxC!IHJp&Y}JyV)6&mijeBmx^BIDvfEybe;!bPY z-gFTD6dN(PE`yRNT}d>S+0YmKo+#cAGp`i1m8?#MNh_)S%JR#6>`Wx1QNT~SbUzz{ zCL7SSxyeVqTCcz}A?8_7{I!QpjuxS6Ob!~XMPKG7UxDD~UUY$22zE7qhNZp1!eqaU zKd)oZYJ-Rv{5mt-I5$YOuuw`rXILP@TE|gsY4vXd7u`~}{o?s4Uj4UzyuiO5jS#JO z#HVMUkc;_fD!_wEW+D z3Q4TKsqYEQ)F^p1GSf*-eExrTSc(x}*0Z;;k_kYq|5+mbf#H0o3*J8!Gdy~GHDlw{ z;+`tDL{>r0PUkHn=n9kR!Rgw^`=bJh$)5&mO*Nm|N2KRKbbr_Ya)xLkv*wn0mB9`Q z$4`$Rk8qGLx%t7Q=^LbuAuY?E$Zj`-hQ-Fn@ag0L0i_T-RbAJ(7t%g*QyXY8&_0=j z4L6W=s+D1w=s5BdH^ApT)pl;D_NL&bq^=__CQM)e+eRi^Fr< zatS!(H#K^jzUtCDK*>8N28u!Rr!k;%UlbLyooM~rNVn+PXv4O(7sx>Wo$&99IwNx2 zqb^3%ybqqSuN$jkaN3sxI8l{cm{x?l_Y9g>V1WLrg^%O1U8<_tKT1B{4_!f7YhU7< z8=9M7xNDA?s8a0^60{iXz>h6dk1$ihbGrVwa%_%zj0{=1x0-XiuU8@@PA4Z5x`3C? z8>xxAYaS_s7q-Ym#EwY#O~A_=!Eu^;(RtMn573t7qq6*$Z=9Shn|4WtT= zhlr?SRghk8mNx560&)p@7sXT4qfdLg z34CW(Mj{oFmN^~TomVFOr94h-)JvtMxr)(U3!kjuY!7mQPZp~-0A0dYLV1yN-NI+f z(KZcj9Bl(&q`KVT9cv%vzGwRG^0VSj9K`zgW-=0rEu8io?wYS&AZK!UM}j=urqb-x zm|&8@*_I^0SXX^OQROip4aniZo&4AH4}s$HTz?ks%w0nV+oc`n!Ro~Ww?k{|;_tke z)l2gePULA`o7b`5!>uW^Tk>{ZI9PM9SxlOl^cu?Hdos{G7hjEh&;+fN7EY@mH%AY7 zy=kaHxw=Z8F?vV`Bg03&U8p$P2&@uXl$@>qjuLh;_udr`RNR(d+33m%)0V~i$&P|C zf42`J)0~pX&;R3QWn-MVLv?vx?w}45LX3ed%Ha=0R(b;j)o~0tO9f`R68luZ$SI>SdL^~QXK7%klcHDf}v$T`?*y&;Vxp3YR+C2DM0W?NQAs+Sj z43_~(GIOA=P5s05M%v;PYdZ<&iRuR``5_U_e+ZPiilI!se z_a6$$H}CJG;TfwiWF`;jgQ+<>zjJfGU&N~IZM!zkXy3On<0YP!`Dw=`;9!UY*<9Se zys@BEic9l&hZf_rp`@UKor<)W7?-KZ9S!AJzci2gp0Y61@YQ`~{1OkN+ z%YotBQn>SDPG>|{=Q}AGao=;XSF5gFszSn|-@?e&&7Fy1v8Kn%PTUXXN*4LmXmkg> zCgw^$`f4mhwZF-eFQ9}Oe;pkYuFsAp4z9MRRLn&~Ea8Zx8$2~!MQ-;V|JE}`+b8C? z`pV_Z_xx7;EX{HkL7zdUP#MTXcwWX>U55|_p?HFsy@4?m)>(fakP~8(sB2hfCKgVs zdp0uDtO5$P!D~tA{qq{{o2&4J%K5viq#w)8r#1!biuyz4a}M!vLqla(6=VcV)e;pI zRI4tk2na4ChDs_bC)aiVpE%D}FkkK}-_n zSAF$qF8wIkOjoWbwgsnoR^ENX=bB{PXXeodrTf6aN<@Wk#0Vax4&1 zEWSpev;B@G(z+W9_qs3QJ_V-KYeH1OEEDQhjz76T`IgGd{SqlqbL+nLwk| zMUPdjjJ#PK@Q0Fxoe3CaF5P7fHIFWj_nDRjG&=VAizn=xXJ^nB4avYmcmXXlBbhAH zv?vBBZM_LUW&KXke%+s}j&bI1iETVpE{BMlTHeDR7Cm{!2DrHUF(RLi?gBM2b^epzU<3vRMoSWqPLI>XlUqt$8xXZR!7KcDpw+<()i@1r3e;M6Z; zNh58X9S!3|D_JB*?HY{yXtg^u z4btY522u*H^0bP;x($+Vq4QKj-e1Y8DKc2a`@5aNvW=hJN^hK!G>z15JIM5LU#UF$ z5`aoyz^mY$eSgkmf77vNh?fZcEpQyC2a^(FQR2shJNI!c#2voMAUl4~jKOMMw^0}z zWQ~eYi>xJ51Y{PQ2rHzkkzOum^Lp1F!@^<`Su{`s_fAZf(##{1N)3+DrpkJoMv1}7JN7D)8 zfP{D9<4LM((jzKnzNu>TJ?njU*{;x|cP#{0AIiweLf7GPA51$!^~Yieu)t;M`kLXz z+9(4Zo-FJFh(4Ub+8r$&m$WNd$kJu9NdmBe!Ao}4<|d34V@EJ@6Q{YP)k{HZYi5R3 zF|u;NM=8@m2V<1>VJMz2M~<+xW42c{;r6uNk%Ma|KM6Dd*u+5`S$kN@@re`25L z?txqpjJe5FS?Pbq$Rice29Cf|jfkQWSv!84Xib7trUddTeowz8$|NKiNIPw1>pX|O zS@(vcb`k(?Z?cw#sGx9_67|%^E77rw{4tH>lPeKL&Sdx=yQCaAMv7((hQYGj1S8>F zOIo{Q#2S{`lX zseX~{(C-ynP=DwpC7`g`FT%GNBvU!z=VfYJ6t7s2TP9ldMOju}D|UED{Fdfvb|XGS zWjaQ8tX0)?{}Z+`F}~I+#3TUiSAc{1DnWNm&13S9Ltu|wgC1L`liS$;KbQjiQ#s!MFa(ve?ZaRq2~JABfV0Xku69J z$|O-$sBpPp_)@uaF*FZ){fj8B)|b#~dmErd*nry^8nEmapGWc;hzu5Z6UM7qebTG;> z>Y!l5GI?84N8NRZ%-efbDH&|?ULcRT5X-B&bY{6Hsxbsw1}Xs&VF-a^GC5>DGC^FW zT-+nAFVg(sG&8IvCYs1rEW?Z<3F?^P1rJB|D0H;z%0a>)FX1e_&5!;GwV zYH>ZoTDj@3z>HgFEgd#vem1HZSi1<@`9fQ_8g5st;(RQ?~~gShU`v-#FM0*eaoR&TDS%G3`=CDk=Iv=Hajtvb@uh3_(EXQYQ}#Gja}af zfkrSF-+F5ja&1}B54}3L$Ccca+LS2&Bh9d zJnfpRJf}T9^+J**w`mz8l?r3zdt8AQN?qN0l0Fxe$gfR#0}j4V8RB5}92hHPH&@{| zQvkjIQ2&dYT0p;1PIg~$(LSqMxZwpNU2xH`hFzojG`|3Tj{N7;wTY^gU%Mr)dglnK z(WgPpc{DzdG6}ny`C`hISjjxM*capBLTxsjb=24Ca2GrPAxZJr;Us1-V~&|u5O6MC zKCVZ&b_>VMxEZUc-#uhIx%Q1hcd9QjT1xem_je)uqhkw#dL)oXy!)E=^S}L6&Y-c~ z7QQ009BS0QRXnTp3a7xrb^-OfW$yJa^O2r0_a;VlT%Q23fF@GV2HI4c{t_5$mV2&a zpfiP$K{BN86H?5ZGgemfM5z_I;jqr+K~#JCmwgLP?j!s#`=-q9B=L_6uKdc>_a$=1 zG5p+TE8I~$F?ackFP|+j?XLXDpR)D4v^pqJ-2`8s&;^k;O)RseS8x3hjZa{Lxzi(& zGe!l+_2Q6hb20V|oJvEfT|Ce8&~qPmzx@T2*g!2MwQM{;Av_#icJzmKB5s-i-ZaZ) zz?t1XxC0x&dwAMAA}|Lz>JbX?YT(dgs3To)dO|dd(M}k|_vn4xCW>oaA&j?Y8m%gQfP>czN44+oR}!o|THH!yJy+=x8xurzVDrw!Jf zZjLY!-!R6 zcE|!IF&4(N$*a(KJNMRPw>CDqEBG$v;Qi}qvQvV3aB}@80Zu43G7CaI)aA5F29lkx zu|fm&jwgXx7#3&R4%K)7|jh7Pr; z^>!0HfRLDY@osq}n9iDXsI(z8jcS$kZ2-R>Yn^i%4nUf$#$-z+!+_yY04HVeeAph; z&fnqgQn`oN;DtgT*Z-GWYc160p#L+k_y$FO=$`rqHM3%ti3f1zKVaRicZPA%vL*D= z=aKMW#B<&olkeH*2aAdn`)6-QSXc-i;7?|yONj%n@!njv!uj~wP1id_!k^<2f|O!W zjBn^a!A5^a5vud&7y97`>+j|VYZc^Gdao3PWIS6x0rBuR)AWOrs;jxLWBImfbiVSW zmVuzifV&rFxef_xRu%5~<*=1PEqM{3(DQIr`2A&XNzQ8`lCk&ErgdD80vXW<> zg&ub33r~O{oN?@gZ{I$PEj6q%3?LHT3Gb|@*l95$!zU5~D4eXu048i|>KVaa2mlWN zF9bj)@tbetPweRUnp%!nYj_0^_7E3@fU6QQe<^XR-PwpBcqAC~rck2RFd^+!Y&nugcEv0f^E*0SXX|>kAaKP@O>6=ZytYhBC^^qo1)D%4p z*71-&ZF^<>RM{B8xB*E)!4)z72*o0;5Ehbdh94y1zuY z5aebM;3B(5<)y#qQ$Qg|6)ed=YI4^s#p64k-Fcoi?Ij1XY}A{r*ug#_wmJl7yB0<{3BN3c8@-RcU&sEPH-d0xTauJO zDFBnMro3T(CqezzW5j81Y(I4%L?z;NKe$~my8k!THLXw>$Yp-~NWzfm8kq{`f^Cmv z8EE)wmGgQwB>c8$^aBz^1JDP{^{g3eRZ|))bH)O57J)aFc*Dj{@9NT?HisH1eBFB?SQoOScO^4<~ldMWv5wU@0oXp&;-+Pix zH)5Cy*5h{6JR!UYfH@~Adlw(pQ`R+B3AJ0KkP%V!{q?x9^JY@cu)%C)Ukf#{Z7nq4SjHlG7-31p?ZvHM?2vW#z*bZuAjYfw@#N~eHH#7)&&ry6!}Aeh2FvRr_u)n@c| z`f;qsv*x^`&}h);v(R23CJ4qv$;i^>e$NKL#rpW`e|%l=fuz4FMU=d1b}K7 z5TA7V$84k}5Rj%DXR{ppoX*qXaQNi<-*$CC6* zIpiqoP)Mp4?6CXms}FZH>RpPrrR9hZ;Dyy?-^ThL50-74|7w7(J^= zYSU6y7s?^~ml?}ft+g7bUAc{?OFSTQnA=Ar1DAXIZ(?syr&&J z5@?YT3J+RqJR|XYIZEIM+(4zu2szOf8-3iikXpoXt)>1TS*|@`BGdZ%3Bx8}gD(DlpUmX5nn@ zdyhEJ99aAx53?QkQBO7+k7LGmel(&cmXA?D^~D9e<*#h${nVlbMnlj5In(6Unl-dK zdqx#J*2^rcA|rD0i&Y%dmo!d5C_{}mLk0lD*?ukAZCDP z`@KVz-*a{a;ge*jPuANTMns<}q%<*2wA9GGGCmN6{*S&;-Gecur-g83Y2oAW%C}hm z{jLEiKoo@{OYMi9?bkyMmUEk*eM15%0$8A&s~a0V?-v&rL#C!w>VM>u^5{2Ged=eG z$0{$?XmsA9T}(YQUPvlzuDx|y=cy^#Z0;*1wU^2?+!eEY3xiHQ*3>lR!udrRO`+Jo zj|>0i+BK zkDLYDS0BeUu9s7AJ^*x?-!ys$4bVCQ$J}F##VII4af3Il4FGEgPo` zj_q$*JPsL@-7}+$NSoy!lLvi&SBO>UOFYW_(rz;#HPaUX*Sc$5Kr>A{KGsSG| z?16GA%%3Cjkupv2x@#E)JWm+=B|~u5(%3$C4-=95GuM!hFxk|r-Zv9YR#?Aga!1|W zo}?3<3j3Nqc>|Ql9Q8ie`H4f0di-Vszzn!FfHBSjXs?eD49TqD1RNTYkylcph#>!I z62^*@S0A#&>veTuv+6f;dkc(${5dyL1EZ4_j6!eF6VXbjenQ|b$I)DOn?%0qVg0AHr^}*NEr=Vag@ytel*Q&D-A*XRZYVE=TEp`r$86 zM1KuQ;;R3byu6Rt{x*Vows@nJ<6}>ZaH5J9yko#ApC%9Z%ckm3T?h;?5H+JD zO1d84;@~J(XdWA<>82gMK0ba&LPqA&Nmaq>s%0eNGdM#^i zt+4)m?5D=7*XtxcY(hTNt;%8cD0k=-;=n_iG396RABsWmeJp$ZqX3B(-b-bq1d?YO zC4)~dAHm92!OANxhU-r)y2)ona&cWA5akmoIsAL;npC@m^}JI!2{&*I5%$ke*P!X= z@Tz<0nOi9HBYjL327EsrFr9{uxW)1Mw}p=IwC=%7BxN5sWa~?TGRri}E`E@L#Z;(T zGTtH&p-Z2{DkvBYw)cmQ4RZ_n!i$?B9iml(kpcAv#70)Mo|;9}6(%k>MmS>2prz}H zIV~#h0P^iDN=|eY`?2)-Dxwq5_E-J^IRA^2G^ntd$lrge!O~g>rtKR(b;M?8XD7$G z4!E%L@Pu}Cea#g5(K5v4Ndz}A0oSxMiI=$zSQQm!ES|(Z*=NZX!BTI5HaMp7LY^7Y)`9B9RWk4W(R%E&A ztw#IAx)2KQ5)kHQf@e4?R)?N=w2^EPkY=KzX^UKR5jgNDC>*k%toQ zI3HlP4wB1e-g9kcZm0FP)4|CW@b!+H&B8h2mo&S3iNo!CtwjDb%W_+yG}Oi3viBqo zoVs1wqi>PJzLn1z~}B{h!yJ*@nVFHENIvwmG%(ep4Ai6N?|>4%%E%!W4Ct2 z6%jC)T9jB6fAB}_nH%}X@$-Nq^aYf(iV3DmX)z`(m63Vz-M!V_FTt3pf}VKSDT-Z2O<{vZ*8dpB9lka~F0xf2 zB@aC%I|S9+=nO74Fdo|L*L^RTBP$iUdALLXhGH5MNLP{8fQbl#ti=F($crG`dT*r; zXJQfVRt9=lk!Ei4$>9HCEL=}=_UEe8Tg+RT zT^w8t;j6#=u>cnS$SvyWCXR%)B_KWX^@(=tVHybxCucn_dZm}MSb{tS&+%xJzA2%g zl9Ny#SPHlAe%0`cO-?3hDG;36PLsHAmkv6!d-CQ)T+TrlNA3fwO#j;KL4Zj~eEt?OR_8lF# z`tf60N4UcB4f-qa?d`1`UstFd9*`~IU9uDnyNUHQ#nA>c{7nz!n3gMcaNOv5q`(cM zivBM*mAS!!0wIVS;I#hU5FNDB^BU|2Sou6dS@nHr$VfO{mT{ESrh?%s8kU-~clN?J z$L1a0YKK+}@2OdR7~BI<`&CwiDo%LtQM`z%ijcHqBR(_WD)e~OSJu|Cw+v6QhBaA} z82Ct}UIYM;)_nY5028P+tV5el{{jQ8_qG3Uki4#O@gngNJpTC`ZAtj;T{;xgauC;|eynyEN+i`V}hS>=xZ) zOmxOVG;W{5g$#aj zVPQf2-xwa$g2!b~u~L4?VE?SjPd|<)Ts}%Kj%CmAL)`Cb_Q}v}us|t>+5NKYH@9 z0@7T1?xw~XEN%<9os0xHz_%u*RbcU^MCwW2NsV?JnV~-^AqT0;qO?`-;YylwIdQ7(hB3DBJAMH1iwHbJN`NCYi|dy|Tsx@Qh)s z`|D$3e^|O8(R<4l3)wVlZPK9o3)pqt)*>+Ko|aX$d9!qc#EGjU*e_2A>3>F6jP3V4`5=84Je;7iv|1Fu{)D$ziN<^tE&2yz?u zb@&1m=>@?i+Ey0(YEfWM$mahBYiztVjsJj{(gID~2$OY$kSte73DK}a2LDTWNJuv#IY);GgHh7;j_>c?U!TA3J-hcg&pqck=U#s3ZJ|c4dq9#408T~< zzgu)e1xZvUW)cM*kYb(Cv2K?`6F;T9d&I^B2t14Vbp(Ch4H*Imbnj1_rZ?POOpGHF zDKOOVB~_OQdN-msFFYqEDKafXVhO?>xVVv_oT7D*y|$a~34Z+Bo1$XZC~{B5 zJ3|di#7EdhT-)u{Rtg5`IS@K0y-(4h4y9cLSURkvsQ-sFM%4nEOQ~Of;m9z%0@DTK z_XRpUOMTpbTB0G@V9nY=Oba58hvbVFLWP3zqY_N+4_jz{L+yK2&L0K8pZaK#X?=|K zmJhj?l)$dtbr zcR#P6qw$Pd#U`t-C-K=mi`F$&_pz??2`n9ZgkMc3s;PGi|H1TtfM2w!P3vbJ+68HJ zO1cn#-GwKQY2tjhPPbDGtpdI#pkI5{mt%IU(%W|%neurQKTad>Ecp}a52YyyOcgz< zDC0PtYHSBzGG@C^U~9x%c?5e|Z^^(jji3L7h$Y;v#YFz>xCO9fe^Iyu_8Cy2&9whn z*3tnA;iJ-p7#r6Vy+J?q%-$aYU%E9xJ;^%mm5%U(Zo52;nhw?% zgf`|6ybbO7`e0inDqIMu5wXieBS|FH?h0Lz%g=e7;??y}(C_k5?4`S)pE`*k<@mW+{qS z-1t8{Nx$2r3Muov{5C%5;yE;tPZM9X@GHMFQBG_k_oC&!!OX}xVillRYPfPablrwG zJTltgc_YKeogVH35B^>54vYH|wH)N;Ze8fW5n7CkZ2yZyoKh%5*vm{8Bok2O{^K1T z3J(P)MBTL+TE8E>Z(QblHbWv)rSpCx=1~-;@uxIXI;74W7JcF4(xMcwaE##x5PK)C zeFeSrzg6_edqpN1s1xxN76c`V4eQtzX$3VOJFlmRiD^N2knhcXbfhaQ)fY^p8ue;E zYQAHjrR^@t%tTCExZEV5e^lENRr+nqNWfJt0&*teezldnZA=%Ma0p zbND&#e!8=EQ9pQ(PZJc`ATgh`rsrOn44|)0=1~JvJ9dkCuA?VIF#^t0z+}}s`i@Vx z2?B!Vh4bPg4jWM91sm-#%cuJpbaZ#V^*?<{s!_(yDuQ(-Fs6aXJ!Rp7qFK+W{ zUKL%}vAP|xE@2a&LeW@gTC@u&Gh$!+QZzGg)GD^^09BF(A`d<>OqsmbY&`dv`!CN z&K&n7Zuo>hJJB~I3NX^ePTrsY?y_RNi98z->aFIx6^AkSZjc#X_}9QQ7wK8Nl0qcr${>x!b?5lZ0 z{i{s8r?PUsCE|GhGPe9d%-?IKKg(+mWw>KI`%F{IM3&6fgmc$HRX2H=hky*_RrgX<{)uoeep9t-Ks86NO1Oh3bzd; zUM|u{2fMT-E*NHKLJL<4Vn>dBIK_^+ndpMY7Td47y++8%_uxwIAS z`D5Q!Ti+Wrwzw%wMxgns26KEjcd?$vxwN-we~m;qhJjBYl-G-+j=C{#ed#;_ld-&} zEh#_|J|V6tH0IJu#e)n_D|Rv-r^UR%JodbXs(@(-2)B-YtKTFVgdnv_RQ68JlV3z~ z3Ku1WFkG%_-wm$%gh&dMNk>7EGU(l7IIR*JCCv;UF>r8H{74@-=250Ughsw3?=abfOI#t!@oy>4d>}8LX;?sE{gb zMCn*4Sx{wD@0Ea@ z`oDDI5E^a140EFh=sKt#+Lq=8*to5zM9iKY{4n_0y-G^-Exfe+dC{eFNTwQ>E>$E> zo%oEq&B5ajcK+L?z<}c7z1@Shbb3R0K)gRYk!{J1&XkZ4H2RRm{> zeCwMG1SbCx5%Xfenb)u{Sc@+;f67Idw>|4fy-T<{pQhgVs3>mdV)GJ}Ou2OL|CfdJ z5zlg!1LA#>3`3S12>vK)oKS69ls%cmW9TunoOVu*t*ckOufVe+J%1&vXk>XQan>Fw ze=eiyvcn*r^+7Ec?msnBGo5%FHTDUMBRw*j5kAlVRZn;_5VveDH$KMB=8w}7gqv&~ z(Z`nS@V3)i7EjPo5%Lj#4=Ur~Be21h;u^>AIR9`Me&&6tYN^KP9|Ln^j%F-$&$!y+ z>Jp5+ik;PEH_P+eufCjI~XM(=o_mlylNa?Sb)MV>WNJQuSP;a z?&-Qw(JH6^Qq{k|yM{O)Nsm6&I`+KIk=!2dA)?DM-Peo{8 zV6s@={xeSMiC>l(^xn*oDo09%(Bt-$U{9`WLMpW)+39#bU~w7?oSrR{?#sfl@#|68 z{;wCY>+~_(EDobVG04U2r9l<>5+@@%)zVZ`18&Ym>s#tnfi8&>hi0Fe3V2Fs7#MuI zJJ54XW$hsj%gz|Y^3o5TN%s*K1O&0dokYDaCDY~4GPV_H40L(Y;)rh^8(MmLQYjhm z>{iRsixhOH5^&TO@c4%v1%|$%U^;X^=8G*Ck%qw+uYM$EN_tH*LtSXi+W*~sP)1gy z7pgxgLt0sTil$cy>q>T$!m(~wrPt-tHuVnX=uqBi&aP^*=#6C62<3u(GvkE({r!2c z{AF)UYs=7t!|bq|UZtTF?0|y@xc=)$m4&@1LASLC!wfEo^@Pr_5sfQGO)kOmV{EAS z4D6X@mq%4Vyzn71rbny%78kN@7X|GYfF8rjkd32ngraC9;!$IQ7>(GZe&wdtQo8$- z(5S$cmpD*CBl}v@LO!al)S-7qg_2vlc)LHcp&MkddJI{`FgMsP%5_4VP97G9>zc+F zPGUUZrfLm?_t(_uf~lsBjD#yV2nq@ciTbKdf!c7x#((4D>gw7x)-Oj}_o=V{xn0ND zipD!-K|&_Ia#cCP5Ex^4Bm(o{0~2@h@aJ;+*nFN6?x=<43&(dfs>{gy^8U3%94&r< z)1KG^4m)I)$(j0K&7B8(tPSWLiG~qqi|=+N{Lkc?6JHuIvXiFRDMhs#_4_xtn)G=P z4q#$yafleL@ti?0fDmt8}t-&*U`+&WHOsdt2jIw5@_wBG&P7r<$xp zisrTn!s&I%otKiDH_r^9;JFq4jc+n?OCJTP$&6e9 zE@8&b8I^jG7UlunlasOyYJ@~YeZD7e+WRDe_arikQLxxnA-#}S4Azn{#)$WdY-`i- z%;sxSAE>$2-&xxAsEt2rM**R4(h++2ueL~Deh6s6zvTRU4E3JIv-%YC7gsAf%hFU@ zPu;vkbmV~FMMb%$njTtoc68j2E!Pv+ZikEIKkC-&>t~R8Y43=;N-WZT8WGq2z4x+{ zZQM9)rtOsadadt9K_K~Wg%S{VpS}f@!j{STG~$&(ntj)w(UGCK?+2_tZsx`MvmALE zsr0mV%LC=Ld!ASRt;^7*UA)K6=^T>(9gW?MNbq&mAIn`5w`L~bg zrX8IbFRfg;k=9KoaNudehXnlUdV|>`rLX160#6kD9mBTwc=g$T{Ps^6LKZiA>~1tB z4E3MB3B1Y<9zM~^!M={(2HD@$$tp%oZE}?3)BM4DtWHogd0g4hfS4Y{al_I`&H_X0 z&3+85Z>WvH)4z((nR-vO5}EaA|C>GKG|LP8dJFoX7_VW;>%8@Fe5#dMDC0wgiMlpZ zO~>-VbNGn+^wvdT3D?+?q|lJMI)Fi5F2RcSqk7Rb;d*QswRS2U1GkLyOYjqoFUI!v zfAHil&cZ6W3 z?v$tH?(uA!4~f)vi^|J$yH#dpB`9!V%K@>4{6Zf@S@Q7r8(fOG;@`%fE9nI!j0_Aa z@R09{O&v}U@x3J5{PIvdODJ(;<7}_}@okOFsz>Ugw2m%PkNYIyG& za}A2<`ZsA%j4-$L-x7_i1eg5-c(wE>e;J*aI{IT+Q-^YF!WuybXOz&eH_);fR#7{JbFf?%aW>tO8B0PcF5~h zKbs|bjd`C%v7)m3DXTPfskZ&&%ig4CG<(NHPe{1SCi;qY8Gh+jYwJucBgW+& znaC^Qi{jM-Ns`(jywj{u4=VCstVcdOy4TbMuDL%NtFq491thOWk?^1e62n72Pa00A)EZx*|C}sJF#|Ut^l$Ylzpp? zjTNjZb8*n^Nfs2bVB5Ul>97}fDnYQ<#Hs)Up-Aw)QHK?hwInv$vR3LFubyJkH$sEA z1HTe!X&Fo*7&*GM#26al&NXVf)-rYD+Hbz@y1T3}gtI&RWTPMYiZN^RPcxNpvzl=g z?dm&QIdc;h7Jn{I&q)(U8Lupp^Z~c)Al_*alrOmrE%%|(DfLBJw870ahvn*vPE072(pp^Bg{q z!cq``_yvy4m0Jfsxz8l_7vJ4%))LL^?k9VDaJeh?#49!UA~D#`%Zrdd&;DSpCGZFx z{2Q+TP-JIf3|9g(?KbvShf|rj;tfPWPEw!D2<=&((;z0eGMNf>%J;;L?Dv1SI+y9A zTFC{x0?oi# z!v5|Kt1e;HB0Y}OmkfPh^!cpNPf;HSn)b!nxkrW1B2R=hY6(=t8if*k!8mHl{dBLZ z<6FgOhSdJ$U5}Hd>wzji&zq-B^(D4MDvk{d@H{y?Xi!akTU#5-G5PD0oQtDk?#}H% zz%#dOVN-=~bZ>91Ng;n#Ugh7{noB10Qb|{pD;1#6N6pa3nx>YlRU$$bu2-g5rr&u8 z=HlX#488{2*w`?ZIGPw5y?4#B-|(@r1UdpiW#xQ5m0%>od<|PUBrrUZq*{!7?QD8R z!tbANkLl&nl=XbWp?BPw+I;5QE0<$ufb!!I>w4{sd6Gy}zPM_$D(|Ln(Q%nCr!jHI2^KoG%;Ay@(C(wO+Rzx(DZ2fI!pj_grWR4^(%xA$bTAy!< zmllLc#80x4pffMgD4Afaj@6tp{3PVQI!K6{{FxkbuwzMB5CoS1f^%|Rt12qivNazY zxk^(w-`lmiZ@+;Lh{Y}Zuj1fu%8jbLyg0^8hd+#1m%ipQuTo*76d1C$u?aui7)u?n zmDbrNv^y2ntWE76uocavS4;lx^yJap>A#t|wrdf7?TwmzkqXDy_!0aQ`%k`yHy64! zuivaa()9Ua=7VjPW0~=VSJZ4`i%Fwrzj5iQ;rK3X=x&G*_=JALd`xZwiT3O7+W!3jWJ zJ45grO4C-e@FOlbbWmkW%^`-H&B&&y74_TKLJ*>4s^b$oeoSIwVrFfDnYi{tBU}qo z0S{3AI!@mCF*t9+-XU1GOdr#vjE3+`V^MJ7BeAqIR-T#lPM;>&*9c9rS zL z+#CtuZv?=EG*ow5QSCOyps!p-s+r>l-Qaq#2G7r>ciV5|M(k5FGwBd!O`XxqLH&b+ zYNudOaN^Rv13~KBHcg;-35_^e*H_6>6B! z>bvUnWLN<`K!mK%zF)Tdjx6RPA|Dyo_3xOZJ6EA;@rBy06iCviO5*?fY4zmukjaKn z2jFS^(!RiwrX`Pi0^wz$4rho3KcJ8{AH9WV+tT-}tyFA|{4M?(F{uKq@i9#P7XD)y zb$RT+HX6)*j|s}A`|QX879?h$e;wvgx>p>9h>)q4Q1l9X6TxE&;lvK%2zq$PEev>ibIines>A(OnPwwh%*PYkxd@^w*)Ah_)#Ycdfmy1 znOsgsulCO_H$Q0B_kVu2aHNH&pXjg?wfw=GHVey?S75Zj6tqBLMuG2-l^a{+5B%@g zAiIG~F*(qoUNpA7DrJf)|8|$`BJ}+813{rGp~402*;e$sB92>K=v+$G3#58%Ki$Qt z`Va4~)^6pFnXP;ymbmU;<6QNs_eqmLHK>&B2Tv$3rxG&>UrWcUc}_B;S*IRypPX literal 0 HcmV?d00001 diff --git a/calibre-plugin/images/icon.xcf b/calibre-plugin/images/icon.xcf new file mode 100644 index 0000000000000000000000000000000000000000..76d7c0c9aeb81d6cf0b6df17f8bc7e6c60a59e00 GIT binary patch literal 63927 zcmeFa2Y4J+mOolm9i*08om*;Use^LPIY-M;&IXKe#9(j+GXvN#3^44@5FCJE2WED5 zC+*C#gAKMZ#tFuFzzAW2aR3KdmgFcWrS7WtJGZ(OY?RzkMrz<#Xz$y7!)Q z&%Gy9^`gZOEQ?t)Z(+>B#Y+}&9LJB9FQ6P}qy!%SnQ{47ZQ(e&@MpoL#Fd0A*o#v+ zE6PUz8Wy41)Wr|nvuHu;q7{qhBZ)&k&n2u}^~jP1F)NoYSv)^6AYtL6c}tfpT@jPO zKy7N? zIjiLHTt~|uN%Vw!cu_nBM&qUaUP%%qE(YnGWR?U@A$hjW#Qnf;-~9dWejoUI;dh=t z?xuRrK2E7|iDgxly2}2tg4F0BPQiIpnt+IaK7X#Re(t>sbqlX7m_K*UT~nGWOT)CG zkrAO0J&|D+ZtWwl|K?wWf6@N>+Q0txcfWh>Co85E`dLD3A%g8%u%3Hp`G(hCef8)6 z^t$GC{ukH&>D5%p6dyypZg5h zjK}1=16Yy4{%m?|UKz*x^St0us5yc2VxoQZ9(=K2T{WV6-fPaAAeA9$eO^?Z9Hko;gFoG1UO|#W%!o*OOu(K&4WDykpY#MZAoF~dZ_JtP9uwu}k)t@Vlk zxyVpIo>@MwzDeD5b?o>_Q}0^1{OOG(PzG1SODk`pKr(e@PPR z=P1Vqibe}RT;c^jfBl6Q*KgRk@s*$d;(2OGr?=X8CNo!iW#$|HZ-}q|g8xO&&tHMi z-0;E=Q0$7y4AC1IX67FE$=w@}yTASUGfOAs(t}i@pFi*4>!;!xWT}0fr1qVYn=12z zC||8q^{F|Zs4vY+iS)-}=eLR5-&MTZzxzK1cZPreoOXt1XucyP1??SMCgF@C22yd~ z-V(}P?d$FB>+8k;{r!FY#(qy<54H|YyxrDxLf)EsZrz&Ro=#iUzi->B-|F7B=Mp{r z=|?F^b$U&pHb`sGUJud+ zY6CSIe^rn^Oy6hF1q7$BaqjKdnlZg_yIgt2C~3uv){~%N3+|SD z4_mRlSg}eKTavey@UDC}6`)0ZQ;FdcWB@G5*0tI%K+6xw9Y_LAK#-o-_shb`;xG7- zyrCZJuHj^3F%62d9-lZP7DulXDqhJ8W(dH35`Y>uN~L%y3o45hVr-qPN6g@c@-vtb}@I{@~58)eX8e~HItWdL3)GUsK0K|>w>fa z{?YRvUTs~?Kg_S``@sWEi#Y#4YoHL&uknwVyLy#nm1pJhrTo&qIAC=_&Juz`Ev+GJk*2H^Ptz0$O7>aRPu7`)LM?s>?)$;23!%MjRR3W5G@Cuo3 zR|^{!+>|ds+V?6{ew?6C2qxhQSJrrQ5d}$nf!OM5-7WRi#N0e-n#iubC0hm2`}aw{ z_0hz=A}<)x>8e{gO_^r?3RgTz4y1lyFn6`;mdRo*nk+`k6)t(q!kcDF1rGKIS^38N zD_Lc;mW-0?RxVh!)U@>4vK1?bkqJbgs&3`t)hmrF`Bm3fu2?ZdL|?9A(Te4!<@^eM zW$&$->gA6=W_;}Gqia@;R#kb&g87E|SLQ8Pviz3kVuo@GjfGcoDrVk)OOLU>^Gb>h z#hAZQ2xsOzfm&<=SIg>dW-3P2%2Dco*wv#X<4k}9{cRaxG}7LNZH&~CZS|4!(a&R@@T0$A96jUCypGMH! zItfFl*i?)3^XkA_BbWzv-8Pp*4% z^&}`sO#1bDr5{y^PplX_W^7e_&PbVQ+MNd5iOwg9sXcs!!SOMM-$Sqk^|02}B(fB37f)n8ve zeDu`CUN`FX^ouzw|L|+`uLs`vwdjmNHX2bHjnRzi=HyN3(EZ9Wd8qt^2ZkkK5QaO z8k7TE=uN|A13`o4J;k|fhssQ+7LYuoca1F&03+NnWz46m0{Ju(slbU-`W5Q zDC2&2?X2PKKzrwP_h<#&s-&HgRpP~7i7^$Co;@6ORf%s)eWt$QJU1-ctxkLYb~TmG zO-DX8d^+&S=UsiHJ#|1ZHLqE2SU#}iu@5fYS^#?}Pm*GQ3%`kveDkGWKa#&#I;PpH zjD?Iw1si7~0*%Hfw_}9ASNea(P@{68d}eapmV15tf8S^&2EH4yjsnMS8NJlgu>A%I zJy=HyQ^lJXMk_??ThH|;+`suUTmLtAcHra-;U=5fb~Q9S+L=){W5a< zq-UV7*ALr3zgv<#Bq+sR6uWGF^$gq$ov#lG+a7ViEx9kBeur8K{#7Tk&;2IYE2{fE zecx_=vX-)pkL~#4voGI&!k8`#`GA(IpS2wPXy4X zK`U-7e{j+4=IZQ7KQeFg{#qTc>(l!43T!YsojWK%$#$4RdFLtg?L^_UIpH4+(L8UW zO-TiTC$58g2LHjX^kZP2CndTkQ7HN$aUhhOMgrFhUk}_kgnI74Udr8w;dp`L9$(Z% zMU6=Bf9=KlveN0%o7^uvhHUOy3dg;)>6M=@uHzQrwr|7wjq8892u08HuWekvantHq z9GAlX30kX~X(gf0Bxa%>ue%(=+q$;S5vP{S0o8=byTde~4 z4I5sh^n5hCal_A8nWhH_|IwqAKJVcTn>MX~_+G+)_!M>Z{>i|%{RDoWP3vdF6(>CV zGDfxm(__zn66nSa^Eqzm;%8poxbde?tmL@qPra~l4X-?v0{80U zsPfVaKgLA%zKFSd|X2Fzw*+f^KeqR{L(8MpP5<8g-_zRO`D!w zfh4%lUVCIdcfFY7-W7Pd1OaeL7THd+KuW^I30c z{&d@3LLP{ALB4TRYV1V#-KA~LHG z>BT3BGoB_^S@&OJ6+DZvieG@tDr)L4+EZ{)QOR-w3yDe)h+i}?{7|kNbzq?A_h*(B zOb~1puqmPcnj8MGpriB5g5mLpd4cPJ4FcwKf1-J}O2(lX<1|24j&6THIK`DpKiJVV z8j_biJj4Bos69wS4fF#~@#i&ef6y95n5slIfkW#;AsHtf3%DPp6Ha%8^YmQ8AzXn9 zFv#2%?e&%AA=iOxU?W0LyiK!54#$T-UhWoVz}H)-MLc(v+Q1;Njs()+dI=?q8m~Dk zj~+8Y1!z)OKiMpMhRq>*pdm#OJR{FFjfta5co?J&5IreZQ?nBR#JYKJeE& z2>;PV;GXuSgx`M%xafHtxO@5zk3z8c^@5D9&*AL3;MpF#>1%?RiX4ujghZI$hjZLx zm_t$e>s0jaC??G<{aXl@Bc(})S7Kh_H4~-&m$C_kqUrvoi4jARjz1humLXp$-vBO03gog`|8-o3Wa3gJGnXudbB+<$|Bm*5IBj`wG7nD18B@il z2pUeoD`51uC<7E4r9bYVu_$<@LS?3-mf8=yCsY{dkK}jq!_LiX3;}@}A;1rYHG#h> z&|wXGQ=nEG1ieV7SLjqhf{xb<2Dn90qMJ(i`3GoqdZWoq5mQ0If>p2xh@#=Gd@vuP zwvlkTfO0TtYIR0)uq`Y+A~Gr}S{p4yiIEX}1Ru^v^hHKRN5{m(s$#LD^5hs#;ycSG%h! zE6RoP-ioT~n!5V>2BATyr%Ii0nfnFk%%Rb)l&pfXs=CIeapNaUm?%tCOyZjrle?QI zO`I@c{P=NW$BmmXaZ)qi%unJciW5jaqcV;UT4QjeGd>|cB{R36sHCi{yu3oFP*n0& zs_JW171d35PASjN&Cf633xs?=Ps~NlE|}WTRXjB&!4+w=hJ=PiVE)umifAE58QT*T zSF&hEqK%@4#9*reQA!H#3e`bRE#dZLy9p+=UQOcC*`YKBK9K81E`kE|)(B^c3tP29 zFe*e+Wv_GE?8onmrYS}I%1|*!ij%zPycxXI!x>7RUFibj;15Pw1uMrM7G_Vv!&o^m z71yULd+`Txpgwm)5oiszn9U|qfNUl5)_lrdFuETHi2c?1afP z?pin}L&sXE`llZyP+K=lWouU7H~+2~Q&e(dkAXeXZS93#UiLU{yJ}|x|Bs?nC8JB<+k&>F4h6RvcQZ;7E-S;oe zgAtYM8*0E;_Di)M}~%}TVh zBL`K-3Wmg!cvOtT8OJ0_YDRWJc|-H; z`yTYp+_2hrPpr(2j|?U~iJpy~YYi4#M2sUoDJ3mE15ziipro>X!p!;0mKVr`tNry) zygkpodqPE4T!e)UoEN^sya(#dArY}IFi%c?VNo&or>b_$#2It$d-$QkTdL0aWGAn4$(FCXhguq^HP>^1y zhJ4`qjP<`ArK(S;nZsfelMV1%@_a96Y#6O5i7uZ=N`cT(2wH4=Oel?BfwirMXh+*g z`tNO<=g-R{utmouhG=NxGnvhZ#iRYMC&U&S9u=X(;-U#0eFl#Eq?a?n5$#G$PRpSE zDHr<_Ca`Wy^H|sSP9Ui7-G$^0qG_+PnkCJ&bjw4d*tbLIcM?8$JUj})E~w?KI#uLen5~B8>b^aIXx?f_SNE&@~ZlAQ)e$; zy=qj>Wvu6|tyDqaVBLgA+aa+MlT%VL{=8y{t2s-S=F1CyXwl{R_#S#b|DC+xwTR7z zxFBgUC~U^gSm0alkX1LYIXdQ?KxHOuKro9nI0Tdmi->k4W)#*;x_d#kPtJKIwIR>9 z+F?E5Pf}fI@rbh0>GcL9&N^0Gq&+FSeC(__nes+6DC7-;Vn&0i*Re^mLlkQB0eoP$ zR%b9{?W7eoOrP!DqpE%T!;pLnKvns$Mm8kgkN3Y4pw*j0W0Ui1r`(w?KXS0%kXQ}$ z9{B_Wy<8yy6&#h2Q`J1nSK|%u_Qm`LQG|eh|8{=I3o?=yKLw4l`dII zyGDK{%uEz>eub8>Ioy?5K4E&QT(-)$OPb17ExvP7Lq##d1pS7WEgL`8Tek8S-W}Cc z@yN3IvznV~5$ntIR}?CLttr%zUfMJzg|(`y`1y}yT4L{g_`wBtO>g#=RQd%PL+xqB zjgykJ)K^{k%TM_B`A1gYKmV?oQ-+na#ikV2k580KmcI0;e5TMQPnkTfv9_v0E-9c* zLr8Q|UiFxGvV!*o75&4*E;@bmD77{l1#_F%ayK}Q6vb(wR~e)b&Lmkm3v{hu$;gFB z1};?6abZ#r7Y_80(c#8wG(k za|*=3QFz)7KeAAx5UB~+AB=*5*LUlJ5JyP%SY)vroLX!`$c135)nYOlbV0}*%!9~^ z{*W4YKw?X%qg#LtS!&aNQey|mT%xXctIPUT-w-FU6hq%M_wR$NUiHQgiPRYqYeyB zt8S_-$xgC|S@eN0yHhTrScHBU*Pt~C+&RG*S24D(Br7pC)T|5eJ{vZ7<4Hr1KlyV6 z*nc@brhGzOab|pUh*1lxIW?o=#IjPp^khj8A2bG z1>+k_Gvgx%n^ST}t18ONE350rHqV$nXIcsjyC`QXnp9JeVh=U>dZ*wn73AjRN1^HYq2RO%#Damk#n}akoEhy)S3=QE!u7pO!#HHqy zkC`@inw1RLm-S5(DsvK|2A>Q|b%@!>8^1GIL!(@&`Bf9{n4LzO2B)lQt}951wHX3o zdnXET7tHz~SVi<^TU1;|al_QR#)MIm%Iu~wCFzbZ@2HXFHfRH2S2=;xerRk`F50}y zNlhx8HRH>n5txG*^{7dGAl8x}j^@?~S4PR084FUWNg=p&QdLeuq*cf0f~51HkP3$i zoWilQ71U0dn@g^bOMGDd@@iOam9sJSLDG0IEw z5zJv?%rSXRHkl4C3*qJ9_V`H4ARBr4P2=OY>GP6^GZYE6W6LsKSU-b|2^NN;=H=kZ z33tqlWd-xcH5P-8L%i-?6b3Accr1!3vuZ-9psJ?1HlKKHX!T&78H{Y1rR7(TpLHh* z7cj7)X*^cskSxH8WNY_oXhba5ZpD~s_cR-*$$+@=jV0-{$mLFwFo=ncinhBFnJ~C( z2F!dY7hHa`&^Vfs2wP%Gx-484R2Z3XiJdgIJc|i6pAfp7pOc-Pn_moJHRaBG#@RvZ zONN|gm>;mZ<)<>wePuaceyyUqu4&SY*>^WP$=P^GNUCcp&rXO6rV*12B2fyf;P^>X zX52M*MunZU8IIRiPOK?FbC6W>Q&FsW5=3)z^ThG>>7npHK``sGn;IZ+S!1%CS8`%` z8DDm?q_`k6A(F&5ifQ7S##H#^H(P+Pk(f`IjJg15zVe>omsm5dDj)U}>)_U-f^|^# z5=7t>c=HvB(;!_@IJHLO^h#F@PP4H%)jDv>b>YODh}VUaIkU8uU-y4;TRh(ZwL3s> zjf~GKuXT}B*A+auOg3b?6-tmv5l%`7Q|`^-&a}ev7-GsBff)}kieffVr)(3271tJ% zlv5Z^zUN*|!jd~|%(Cj_&JI~tQ?p~uK?Z$j);-e_e8yR_jN@H5))O*-M#rZnx?ljv zET7g~2D@*8P{8L4WcH<=oFxg%%rx;SP>^A69@Eq`mJG{^NdshOW~EM(PRtD1F=R=~ zBeQZvWi>SV`i6#jzMij(^tQzeg`ttLPG+tmVO#urg|o6H(HtBa5k&@B7j*Na zWU?pnsX_|m5_&Bw&^m+35)3OeGvj0bjaA1YhDo;B!t-m5b>4Im&UkFfu z>v(V-Ph3YD6_=21TZok7=kV zNRN-9Bc=D$39E>OH=nTt+am0VITiH{HKn;}anYeTm-^#{BnSwVQ>=K{x{&CAV^kM;Jbun;qj^?u&w@$Pv_GN0T*YJtlU8yyiEY&Hz31$Z@vkL6{Z zB7C?`0YydDDWHv?b{LM{op z&TxSL*B^>_5OooAh7gdUwtOFoh(Vh7uV#f8@%0pasCka#7WRxxulWtfH6t3~E~L-B zgYwqGX%rw?xFjsAG#EcJu*@=USs84)@}|suXyioGLoO;~1-sy&ayUwm{Tgn1d|kpOyIfuH^Tj795)6q# zk}{c$^SsbOdo+3JEg|H0cf~^shdVJngU?WAYO@sC${cdMwfjy2C>I`uBXuKWxRl(bhWcjuqb;xECH~|RFX5kp%IQQoPwC&olF>tX7?nVh4}G`apW3nle?zHvm#>c z_81&?5D$ghn}{LolUm z)7M28_ik zm$2%*a0KDKjDLf}qFDV*czWrajYHA6=IOH+ELr7ydijP&0*@dG znQ&N5VN7wrTB4z|M^ndr+4^_^-d&;oN zv{WE{oNzGE`r?2|?&6Qbz70E6va=XJIVAJyB{S=6!76N){1JcT_^7iyJcI?BN^>v*Jk{v|m zGUgMGjJ2EnSg;wqtZYxDZ7(!DR7)c5=BEr**1$Cy5*}lZ2&HH-ii1f_#~LF;e%;u5 zJ9DQGn{nThdEpqXY?9lUwYcQ;Y*{ijj-5Dp2At_jA6oZ=Vit)poYo*0?azqbp=o1J zmR;c#0#sR3KW6;o86*;Z_(NaaLG~t6`|6S}nl@U91gvR@B*-+jG-ljA>e3({gFc4o zK0dUh!6aZaCQu4WDiC}zW6t8`-k=|>Nv}B26LZt4gW`SUbAZtjj6emp#baIO6qHtv zX`VH2$^AJXD|u6WVoXnqN#Ivg4GOrSsZLDI%qy)KJ7xBwd$Vt@N{g9X;Oqc^ z$0E=NwgWTvpD;Yy5tp2iS5`M+=Dhi`2YpENLWbPIHbh=HK@at#GZYR<*pE>cB0M>> zuoB@-bJArmaoI~=nQh2hG#eF(Cr?sExh9>jOk~)A2{EU%ZX%5Oa=p?Qe<*JihJyR( zi%s%@u8mcsxD?i3a`Y!=7FLg&F-vw|>q=hm?l6Xe2NpmFrL$oho%ZN~bl#(Qs<7y| zG}tSr&VW0F_SKS~dABvTG0htp-eff5BL^&>IjYH?!$bE| zhx1{4C~rF-5^No`4-L7p^_<{P*mz#*Von7*1)hed&mbz`}~s!rY9M zcze{yvXJT~d}PsZ5`kFZ_LRJ`%8C+L|6tx@a#R$4*~VvS`_ zxec8t%u045E#3qVNyq4~3CO`AM%{MfNgO^x-{MR8V`>CnC^y1WiC0Eza9;935cy@c^DP71mW2W+aXXWKeRQ_`qLHSz#{1 z=X3H3OA+1IJY#Y``Pa|;N7dApQB*~0GR|IEd5D7@KV?=E zoNK6|t!SvoC(APR36^E5q2M~EjHxWlPECx5B?3BKZgExP#A#FFpfPjj6$On|MVX1Q z6n)_{7Nf?b`m)@#L}zSFOe{M-;N&rRN`;lxNUB0qMY4lv;j?|C#`qcpVmM#^9 zI85%K5naLcj497eaYcs(BL?(JFb=yoEmSs6nvq8Cq4T^Zy$Lq?1Q^~2&FTv7Y-3qY zGV!_zp@c9C;H*$qKVfWaxhAxBRj^c}~f{EEg&^$CA#jfTYGy}J1sgX0+NRbz8~Jk@ZG|4+u@9XXAo#zi&a z{tL4kPP1FpG`W&OCN3n7t0_j1yEp!atq{!atrRQQ*LYXrAa9|@#5#DF60mloar95h zfY#8|++1X)8pOp=s4#|Kr`m&+mr`@o<` z>6|{kwk$s@Ed`EC=-$PZ_2VYbs1KrN_{Y>h^d{dVwL0&ZJf^y|fW#cz@5nMSJ_A-W z?z}0tmPGGOa&e>}t!`+VG;LPCjtSC;I+DCzDMuSHlZ)r(-7#f6$!dy_s%;$KJZ)NK z1bF0JP)cJZWD^ruv^O!SsKmFW(r}DVW$*8W;K**n(Ve{VK2Q8`NM3p0b+dP#JoI*` z2jr!9LRE-^_cH;W&m?$0)8O^Yg4c5`zx)3O&y!D)=b@fK&%#S$5Nnx5H4~cUaNwY# z=e+hxW68>;m=1h)71mi^d5J9H2=9V`DySSgA%TSiYYU!PDO)#9Yn$YGvwnR4C2DrwI z!BhpKZnPgS4 z{4-=bAPZA)cx-%HW|T>*(*>u_oZz$?WkWbUgcr`Rr|7XmITBOj9get!wBkwQi?g$` zv$OatF%x}}{SjW1QuN)_i#cRjB&&R;*a8V9&0pJ}y`BepFJ@#)_ zfZ1PQsDnu$0d`H8CSjB=DOHqG0XRL#x+A~c6oI!5WMdkf0erTQLqQ3$AA^){W)6wO zsRB+B#62V>3&~VK_OGz_8;AKr3=I~EMbby^l!4AF-`V0&?G zSpjzUC>v=hEY=fURC9jdJr<=*hx!h|=qjv2L$cW zvEihxu{dkm6Ro5a2HN8CDytAOk`foq@04f3N59gyaXCZl54Q2R}Iy0N1Db!3zoI!+S=->iqfLIY{a7= zN}S@zNtI-i!b2X{9UoGyON)zeILJszba_KlhV_gMg7SQK+0bI1hHmXQvvJ{dLuM{E=nduZzjt@M?I4S);^Z3s{#gMytI z6c8y-b^^JFm_Ha4WXr_am}pog!pSm$U`n(z!ZIP7H@a-FZXo={fM8`9NNAL@4MESz z=8+3ba~@_Mfpozav0+#d;H~x5+ zfxws{qmR!FBpZbcJfsC=6Jk#AQ4UF_u_|m9&mEp3e;cI z&2f)laBF{rUv+tv$ofP2;#%|AcENu7hX?pL}@@-|IkWWe+FD*wywt z&fj~9h3r;b;7~jAxZ414k?#TXBqKSK^Wb&607p9Kj_q_!)`=V z9_dX6Q8L$Qa3&++DK9+gFgTK_hC#RF z*}K!UlVvy9=@9#3~N+w|3g?_!i%d{(zfO!{)`2H@-u6Tf*)}3(#f_JhhJdf$S zC|}~Gi)x@^_XYKZ+k*3I)DXMB51dnCP^F7y8=KW$MBl;u1T%ECkMVS~_VOFLat2>|qd}&NZEDB>V*RxrVciZ7dwm{ zG>ttBqGV@>p@XLJiWd&x^P6lMFPzn%rD?puAWC+e)t$vOUSaoQTf3nh)7aK-Y)9AH z+D+}4#qV=XoZJ7- zJ(5PkWF|d-K?;=cflSTpefXMf66a2N>7rzSis+g0A%5Elz5c0Wq1$+o-Fl{U-Z9>x@{^aLL*T|`_-rY-ahVj1@N?c#f=>(D?GEf35Iurjba45cpTo|Hc0@jE9P~3( z4tp#DJ_8-F%{bgHI}fvX5d7C}KGp%$6K{7Y9UfN#tj&5yAiL)Q(VU;%~5%2_5dp6vODc!EJ|uPG0vV%Al8|U zcLYScGlzf+0N*)IcIE=)2g86%k=5Gt; zZ}XPTZ_)g1iRP4BJe#-B^licPNpBPCd3W;`B%=jP-xf^YyKisd0mUtw{Wg2HyoKpA zY!llAS^Tmo+~kiS)p-fzXsZN=!`!HC~p%k6#_v$tKbt$6Ul{WitU zV)@zuxTAO{yWT6_p?D9qb`-yd`}fLsD0U-d*Wk4qdAm@PU6{M>t2B4rSFT*8x$C|n zToJF*+;wB_F4Ej}W9}|?U%o_h*A3q5c3;9LSTT9sn7kWTyJ_;eFL$fD#Va&--I%-X zE8RSx*nQdWvggu8V!ex(b(h6%n!XzsE-No%YNX47m-{c^cjYc#Hc%Gk?<&e;{w}+_ zxf_=QsL-X$PUN8^Chp?pYyy`qW?#hoUAve=-~s^VPP&*2kWc6Zpz8Agn8J&g!V8$H z^SI6_x{3!cOhT8Uvv}~reW#+Mc<{n~hvIDU;D!6MiuU5c3-|4cwqkZ+5|=$slekQh zo~KD%hEJb~&tnpSDG7_^L_MYww`#hL<_0o*evyF*MT$SAL~-I9^liD_Qz+IBsR5Kq zzi3wWFEqcF8WzaSA8R?kSk6@gYNHEt$~>8d!FN~f7dJdQTl$#DS7_m zC^~~_&y=zW{0hJyFBtYQhzIAu?e0;xGYI*ai(6F8mDDxjTxo4!K9uJQ&J*qAN+9LL zILwm(VFwAvzJpFOr-;LH$0bH$@|5->PU%Eu0Rabq%2|j@jKzGa?FG1ynMc57*Fr3D z@$A4xhc>HS%*L(NAx^z5u*-Sh2OQ!Qx%z0p4V4EUmD4zMFrA3WoE@mq?m=PR)`l}1zJd=R3tLg;dxua^E6JY!(EDBHk47%MhT{>D2F@c zk{6kd3K3}A>J;yRi3$&K(*HJmjmF^&=bX+k&gBf{;+!@_F@SP!Z6?ZX5#J}uZ2{#r z^=y5PNVgRN7^K?@(rpztgLD9I5e2uRb{^-qHRx?| zE0Jz1jy|}oTgCTi(%%vH-xjBDvYnyGSdoN}i zblYaz;rW(PXlKX{_s+gS^b478J3`*`Tw%o85&WL}V@9`_9n=m;^`3QydoPk?l-dE+ zcDUb1D3Xzw(SDxc7b%ezzDm-ccO#4B6fPo2k8s) zb&EHMeBB`5W&HFd$VUH}oR{;y51bP(Q}xk-yX)KqqTe}Pm%H75U9IVCX}A77l9CNEzBXqFT7jo zgy+f$50(?2EI^Ml(P;y8OMgjqh9FUpJivmH1ni?!ht=WqVit$Xi01Fju3VP|D#n2H*@^_0(FIqumhzlPFl0`gA2uuxOOQ;`C2PffcIbz2-)c*VB6^+5V{j&0_xc-C%L6dAM6d;%g6*Q zcrPQ<#eMHt-(zIDxNnbT4)?m#4;itJ17XA>gc0i~5KQw|BfvNEofmxhf%*d!`SK(6N4J2(a^m6r>ix(Y4fqfE z2t`mPR%jcRQX7_18)yXRX*=C!1BCJHOj`&Nu?T<#BMFP3_jK@SA`uYlX`&Di%V{DI z5c6rG4-nI7A`cMbX-1vi)2H;O7)hxG$YNt_KRDh*#T> z2OTG(czQdJ2OeiMabG-n-2XVEiFozYG4(Mme;Jo`PMUC#Sna8!L1 zt+bB<{5`kQ1PVt8eDeS{xak2x;j#BbTn7Y!s`^GxzK&!kvah2u6ez%8m_t03*I^dH zECDjJ4Imk2=$4*O9TbURqB{N{B3y=`Hj&Pe;(&*dkKw8RA%)ka!2@Rc&-2^mmiBg*bU z0}2^Zpv#WPFTW`lwOy#`y+sQlolkVT^ndx*o`DBHYY#QjqPQ_kWLg;!A*Pi0N-ivG^dteJ0yl}r8c9FrW ze3xP$tRmhE!`z3OgBR}iC_Y4O?}Z_MSpET?_YtoBssT^$f&F>=G2(Az&<%irf4V<+ zKMnjWgAnwee4O(!8~9-$e#q%&5W{QzIQwI0)>69{_A!WzqX(1+JfD4XAn5>&`_cja zK>uHlUY5Q*;5s1w^?-1I>=5{VK%3%p@!*B-o>rVH9=tHvQ;L(tgID=U#fjp<3#Cse zju#JJxIeBqRy=sE71kZYFpsJ{p8jJ;^NwPe?ML#C09*&aFuyyJdxVC0kwN^(&Cw${ zNI>V$0HI-CXAlYB9?3pJ!@R(tTN?Q0NEQ;P=wany&+#LNlMd53uN@W+51hFsojmM3 z?D_f|+^r2-C#j4cN1v1@;RFA_mq85!9{!kYdRW5%*(Y*frx5=rgIWeW!nd*!;}L*( zX9$=F6K(|_>cxYB+q`%P z7wg4ve)VFZT(lR%@!nzawgz>K^_(RZ(d>-aE1s2kWM|lWp1p*FNA`y9@$Bsc z4j$QS^Wu0(;Jpwp4leOxAcPmgjPJ4R_Uxg0py?j77suoy-F+3^2AAyi9AR9t+w+%g z#2mX}64>oSK41uJ1A9na*@f&~A^Tu@L34!cVu)=YV)6Dtu?ZuvhuCEwtO{V4eW82M zmKWK_kdQs@55D;rX!M@`mUjHc70`%1?gPiK$?USn{b`3=W|uudAG!~GdrfASJ%Jy( z{|Y6Vu?ud*uVr@mQ2a_}mk-@!83DU|D7F&2j07LZi64y)zN8BK#oGeae(};k(16%a z?6O~MmDy##*d;T`e$U4qA9jkEL-DZ(rWP>C$L?e36d|OnTa{oT1XpwY+s+ppkcz0rW`D1H|+$RoOmH;#xWW!^XxQB6y!Emx) z;2zQf&Cdk&0k7jDu1IHQUjVOJBNc^U-FS%s&m|3*AXXA*%ise5yCkL%Gf3hK40tYT zKxNoI?vw#Z3G|}3=$2HNY_7hVjbs>cK3=FrTQKW%znlfL4h%g9=4oN*3BeEIZA?dK zrd4wH+ymziS)u+r3{KmKjYb%_V1sBh1{p;y@d1u%pfJGQ01C--PQxI6#|*!jCwX9m zBPjkJD<7Z$04J7%ATGdcfZ-+|hO7EuXNgnB!5`#wDO^Pa1Q&Qm5+8H%E^5vFhRY>4 z|AtF$_YJfpNTq1B40Swmf|t#U(M zt#U(Mw`u6}FO*-Pp)U?84xynh4he@?LuU^0hgd`B4jqykI(JBJ=-h1@ig;G}EE-yd zb&iI}<5j>7L_GWKdysG=(~Bvamte|-e+(8)&oYTCMH(BOhU=3&71ghtLkW>i@_iK- zH1Qh4S}abh9i>2V6ER_k2i8?I89 zM|#6m;=(Mw;VO24iQaG(xkMTxutIq7SyF*Zkdb^BqW>lFmr%y=wi6+_ct@BaIWDz~ zWYbsUC@af_SEC8Zbm5gaLNZ)9( zqLc69CP_S;p3fj7bD*V%Xd*O@y94>lkXzryD)a=40E!)2>H$? z-yuF4tvG@mEDwRqbZ2phPoWjE1>w#l-!U`L8x-R}dCUa#1_eXV9W(#@B32vA?wIB0 z_1FOUE`?foJ=R~o(}Mx=qr@$rouMC;6~8>-Jka;qz?qf<2atj28}jyk!4mQ7SQoyO z+4T?-#Fim;rCQ>_A$FyEH{#;ZBG^^VC5HU+pz|OOKO?bEoS%rFv9#~SzM$MssT{*T zza4h?Gv{X_%N@eLZgIBAX}86`qV~R`{(VX8`l;ATc+01XPrrPftvRiXUAtPd zTN%4{wPv+4cI|4(Y+>x$)soS|*tM%Atp(e@7f)&Nw0YUJt0k$$-R5K0&Xxoe!*nxt z?QDsY?-;vw(%>nNv1@0GUA|-N+Sw8#-!XRWJQ#IQq~HH#?Am!SLcU|{+WBeNr!0@L zYv-pn`Hrz`=fPmPBxBdkgO-EhDYU}awez4!zGLj#`HA5ZmdDt&^AnwX$Jn*=pjIx) z*tO&1fR9;8#;zS7`^$G;c0G3d`~aRHk|y1E%zggY;lqqw`-xpoutc!ym6I~Neuo6{ z_z=5(OZP{I*p=?xM}6!n=Mp=Nihbq$N<6}HzZW}BxreFT-;N#o()p!a_xrJUwWw8Q*K@6ft&CmIwG^~4 zc0JdU-@@3nvn8*Ev1?~bZVO}A&X$}O#;%<$*)5D+J6p0^7`t{J%sj~0wew)cLB^t; z2h$FUCuDZ*JeYFObHdB6od=T+x=;Anwc}s{ih+QPT{{lO$#;xhJ4lvL9%I*z19th2 zv1|K*82OH|Yy1AF{UZHjI%C)N{SopVW7qbN!aiboj9uG5vdMRhUEB5t%Ox4Rw!z1M z7&kAww(T>?cZ^-z_8Rtz$B@U^wQa9XzGLj#woki{w+ zC>~VyCgSzxX&+MP4V4k`!`uI#!N91`;`h+eYtOTaXJN;caEvOIJP6<^kud0GQZXHV zmPzzuegK7Z^jRj+PyGSVul@nh4*&wt@5KSo*Y5zbX-b!2nga=B;*A&p`r;u#8Yw3X zOCd~8X-R~!=q0AQm?@2^G*-gUL2`QnvlA9d81xBplLCt(ERwJY!lDTaBMc!?E**lGN+S~6fQ_cQaP=Rq;Y{VlFkLlNCqrj>~N9E`O8Qa z=O-iC_|`nj%Hd#~qJv8=eixG=d7MH<@;O093OHUy3gJVg;}8zb1n9UwID)mx$2n$x zz{p^H7)&1;*{9f;F|t;StQ8|`Wor>5YZcF+F^o(`Fftjz$YcZ~lM#$eMliA#wnt%P zGJ=uGSs0m&U}Q3ak;w=~CLQM1<2q631s4Xr2tv@9T$LXct`qL;D*(FE@rhbK*09z(K2ged zthJ6$1o@7&*73ic``uoqhgNEh0C?J~z*z)#Y$zjgQ@*FWKTfBq%M?Y+iv zhx?Gmt>xX^fM)s=zXMP9)~tN(-MxSP9ISgSW6}D}@9%H@^7zps;PUIyGoSv$w!I&J ze)Q-!if>Z5Yo@Ue{c7|32R{4m^tXrAhZDIg)k`-1>79Ko$4<9*oIUfk-`6hgM$YV~ zfBWZM2amL!70wFn;?b}CzKZ2;L`_`ti$A^l@z-tb{_Wz)Fa5uaX783x~6|BxiK;`XLxH5=J#^L&a0iMA zA)Az=&3t3a*?PwF7kMmg_>TUa?AvEwJOD?K8gw7u(Oms~V6CNa?$kxvQ=t*>lo$95 z**8xg-@DDZ&AY{>yrwPkU%Ax9!O~pUM}6`vE5~K4zIyWL9*iRRpRr-<0c#)MrLK#t zEex0F>l@E^@bs*|^%?&&!A*#ugzC-RMX+&-%>qcps6B12jjLDcP2NrH26^ZH4c-k7 z<=ynftCoA9Ws^pWHKxq6a`KM6!M^eF=G{k6o_}-iI`2A%^47rG$H&Xr-pYKEmzsge zd>48krCKeihEZCP~;7Ls?Cb!u4*E9C>;(AOnlrqo=f|vd{0`yfmUVf(`R1Z?(*v zmyhS>vC+~pGP81W=zM}bkv+b1{o)8dA{Zvl^C++R#`Y^bktVa}PFJ4{EjgdOb@jq9 zZCG%gIs}!_r&xMXr8gSUqhY+Pe1)X|u1y1>CUV~{Z@7@+opkZowa)DvEv#Vj*p0jk$=`v7e~ z&`<9JA)zaC0xb;^%~?i|PQb;9P8Ia;8wWn^n>3%`YVPJ{AJ-NY6) zma;UTHOq{tF$psE*q-B`V>fHSb21XT!&9(;N6l8&IJ2yo)68uG5nX@o67&S-m{@4S z>or|qw!cZx#B9`roIW${psfXSP3KNxL4ETqmo@Sl*$tYMxBRJ=uullIWHOHqDQe8D z%+5Bj8@Ojd#4s>l;%H}OG27UJ!__gGHGf9NS=L$J84%%Grgls0t>IzM(`2Ix%_7e* z&IszYAlJ_{D^Oy=+?i(67;5^EO3=2RQ_rc>f`>7Ao^m%W(-{^F9;$CzRUNaAR|_Jl z&MXJe0yfgJ;IcI*8=Fr*SWB-J)M&$_m|=~SFf%dNM)l2^yR3#*qgMUNa-`NH%dsrJ zxVwMk{J7=FZ0#F7KRDdqtF#`;?q5KA>qq?ze;L@E3@Co4$6hobi&S{i?gY77;J=b^X_JfBH z@CVTAz23p>m_qrtx^rwsS$)ri+YcYyXWb|7cDFOz!GJ6ZNiD2yAG!7D!F|Sk+C9Zc z8>0=q5mhd-=H%3Za)#2v+j zbF6b%vw-r;pUg<$-8=j{(%V;ty3RGTn(-zTmZbj<`;PS1<-v~TCQcKzQH3EXzD>U^ z`|RRCMTpj>1JR&$mU;#de4;Tq4d0I7 z480yq#RO9le};L6Rxdx((os*VXVj^(Cc*XedZO-QeM|3!>o?o$h&ooSN{dnoBCJ+g z-`aQKIz+-KhHLR!P7M^DXj2Z>VzsifZG9IZ@Q}QAwg#)=R;#or;`V_{*V)$wT;}HP^QwVxC~vPT^SwjZmupXE9IABB`WPo`DN^~Y^bB5vJ@}Xgd!@e zN@81mSqZyDP^{9X_!o1E1w~k)N}FPvQ^YC~6ygOcZOWcPMxmg9ny=EPtS_Jy2=eK9 zDs75OKAtZtsAwq8Q_sV51(bhGv?(Vl8@tYrwCCmJ@^h&T9Fg{EU73J`A=-Dc* z%2p_#IFrrKW@M?dDt70w^JI5+7B`EPsnV*{4Gk4%axys?U^XUNm7{}A8EP2}iAtN| z*Ox8fOIYcuY|6ZnbbdPfv`U*2cba>edkRDoZOW=s>{I-csw|4tN!Cfh36&NlhOF|aLKQXJYxU{OV zy{(nrs;Ee!CK3C_EzF7RywaMcj`mhoE7^Q-AHI*4h-}Q-gXx(?)lHr4t#~WDMUkC| zC(`$VjY-L?YU=7}Z4tDPb%}dvdkOH1j>1^wH1IerjB|>#J+wX4gg=>?m~)(SjApVa zFM*ms-~FG%72jq-v$U~1^YEVC#BSy;6=tTmnF01kdbUJzaQ80yF4j&Jh9Cvzt!UX`WU(u6n3>hez{#dG3$J5<@36;*l1_U+*B;BQwsWLdvsJAb<%PO(kp zkR>RN7bn{mmz27Vy$z35WoA}@nm1L?(e^?1D^ zKZX~hh*mvr35mgCWbub2rC`3u+MUsyXk4t)#snP7EU#zQV|9vSVvd;Ds?y3VEwAI$ zk)_dFSzBpaRNI)Nb@V!Ftzz#M<`%|gl|z@4si3B&ny8i)pGuD1NZZKWz@vOWarly4U0s!bEK$6Hxq%m@(#k|0-n}`B z9VJ+=(#m+Q=d2g3!y;8$8JqBRtaTcZDs7A?k{cj z_iqfx!Ub#bFqJlDO>A;{N!f|DYs2_q)KHZ+CZP;3Ba5R#`Jwa>l{O{>46h zORQ97W`w?kuV5v01;{20U5sI4TEPmsk4ih^?!)(CE>~$~te10_v%OV~T}-_>-kfEs zEX>rU%b3e}OI6wzr={GbEH71KmpNW+FOFvlmJItXP{H#mB^Ycq9dOTn!09Z=j%qD# z20!jEI1RFp>zjv_Ja`RJW#*J$Fe;Oyp=aWA^7=o1Rlr==9LlfCE2p_64t(=3Id~pT zDZfG$e4Qx^qZ^*QMJoNOWMgw-{z-~Y z^1C()^GrA_Cv-*Cf1KcmPK*;Y(f)CQ z>a$Hr$Y|a{?_f%c35(wsiEaV~aYVC3uIaE6)&R zD+w7-6eS(A0G5pK@cSAl4n-~$^^6mSq9aN|#_|pQ0Mlh0iVS!5`y41PMLti|JWl9| z_9_V(-6!b%m^sLhKDU8_HT|!4#J<=z|CuU$j9s zHV09J$Et6E5>Vup@R*c@q3EcR0DZzfz*L<;sm~Lj)Q~>s#tA)9qLKi8g8wx(8)P!f zQFIq5KZ@KuQV7H959%B8`)}O7beurnBx}vl*2{MuJbwPo^E+E9^C%xY5^6`T-+%b* z+vn8hWD5rK86SjO$_6gqee~rw;DIBbOo!>a-}J+>IxgP0`}sG|zv6zSSO=`%2d~_D z^!4+vzIw)grl_Jq8v2E8NVsn!$7mt;&~JP|;k|1qFwUkbj!KPN}f zj*ZkW1fLU+$%iOJ5dU23G4=?IFKl7*SnvpcsAxnb9tj>&ACR{o7LtNL6g+@QEI%Nz z2U_=udl1(MJD=Pa+@swkFEC-&k+>(g%ebSk2NJnjS+^B?fYiQCyG6EP5RXXQ7TjWd zMqW1t8oMR z{~G-&dF{$o{Ho$0aQIglR}@#TP_K~f1Wb5nT){76S7?{Xi}T=2aRr=`REWdAM8Bli z3=IAy+C}p6B{Ydq4yU|}^b3m17pNDc7cYzqpcMg}3ycvGPN*Y_izC>GVioWNBlxg< zALQfaJ)~AU;SA zH~^as9z$|uc#weKA@n;C4_qF23dzCY0Rn!{58wlMKUs~zv@Umm=qHDV!0AU0oQGec zkL)%9KD(dVM-Ghi5q+}3fxf<8S}z2)LN38RIxMCi=*3};{ZKEpm*{~&QmCaD>yh`3 z^!DIAr1Xm%$CJF%Sa>cC(FmqFOGh1M?XZEv#x z5vv1lCwu$b(QkJ<(N1fVws*I+tp^dKo!UkYcD7+{@{ayi_>a*l?dWQ4%>fy$jn)cP zLGPRF>3}{v(K6c6+0xQz0D-uyR;-2W?`eVG*1i^63+)`)*>(;)r&s|(MhkvU-a6C@ z=VW*LIRf>)yIPyEX81_-DD-ngGuhn-r{uZbW}=zdM0PbdVNGN;8)nw2&9o+RunnDe zwL+(w)=2g=fNzFuumLHvN#58xh)$b(8|jU-2C}Ie95ad-kkK3Q2C}EK0c#*TTN^fUVty3IEwbtWxAJ3dWTUXcC3_MmX z*;5Bj8LWmZQ-?5SqK>E~oBP2Rqv)xn)d?|($ zYNeoptgI-fmqR2qO!!s^%CR!Cvb>Ba!^-7lMLH-rq`Zt@hL@5RWu??oMR_S!s>N zR*IL9r4=RA5=B`FUZU6zJdF~vytJ4oCQD0-@nSNI2{46~s1+;9il{}3k|LsrESwKK zr{<9brFn2(Sdd4{!*XHMg-H}15M*&dE}l!~73ETMi5$f)glD-}jy$&{HwVul z3v+Xb9BMXsnvd`-hsY+23bOHRG8g?avJ^H5&$6jm@|=?FEIdnAn3I*2$;^b81$dT4 z%Ovv)GVx4VZhmHF1|vh3opG2z=#xp$kY^TWW#Ac#!VGE#RU*yEm82_IcV*xbGB;m> zesd&*gqALq8Hs2>{Hk&+3CDfr@(+y4P8`RMlLuJ{&rZ;elZ6s=o}CWA z>|YR7$n*j@mz~HxcI+tosO)&o(W6JOBd{wEM0*`$93^uzk77sV znWv5(;T(~l%s2vWBQgO&8uQ4rfb}zo!)+Nli^ZnR>U~D zPWyycC))>yJIb<79LrgY@XGtZnf`&E>gC%mfY=lke;&N`l&dWe4rR)YXuWx-t{FBx`#Ok^XxIw&|mO;OB3 zcy&OK0*)+SgjXra{A9*{ay!B+{(e>xxevB4q41J8`xHxn;Oyfi!Z$pF!b%kEC3o`> zQ0?XJQCI-M*~3qO?#gruJ3+9UjG+T8F?S1g;X4&mfu!#e?4-uS3zYwJ5FH*q+)v%-~cRxwdA^pwY0TZ7z7>xU;#l!MTFsDYoo$K@lbMISSS%n4IzV=2(Utl5OQ5)2p&R)MZhm(jba)CtPtuNd1zF~8f=X; zWL?ObHNo^?`I_Lx2(#AEg30iRU_4kBwl+98h!G?U2@)wWE0`W64~|+Bgu#E2L93ao zr9ok<1N8uWh#=-_GAwd64!O zB#OWQRsb2iE&vaZg#`r!_%r>nAk3fi<08BYp!<^%A^w=ZEHK>PAG{G{@ak3AD)$_4Ewvw|_zQQ+T1+fB*E(lhVp+S(xl??J< z!CHa)kbXfvgb(Sigr^lBeb@W?_^b@|VfhftN&kT5@aqkk+$l^SG9Y|8www&|Th3Tc z^CkmD-c)Z;U57$nj(LxUu7+*pL%r$VjAdl-ie=Pg(3gPDmN&laqxbq1%a#TB0gt&< zzASXvQtDFDlL62}EF+eV`mKdjvtdhFOQ~L@cd!@arkxBT`qGbHApu@qtNgqeUUW}# zWq>Emlk}JmBAVClo?cPjo?c;Io^(%!2f516gX)2~lgMgKK|ta|78RnIenxH#@HN<28tL*`7&tcGgY!Y4W9YPR_~rZe z_cu-xKms@9e_-*oCRs+eeEohDoE@X`Zx2l6Q2}m{V}z@zKR2}O@tY3{@Onsp8F1HP z1BUz?RcyTCBSU79tME%(a0%r=*7=6SiUE{UttEdOryx3zL&yA57JjH zo|TA^hX<-dp1E~*Hr=qHj5c#NPE}u3| zzHjr=W&vq1CL4;Kgt|)xW76A2#!wep+a`V4Y_Gu>C*|F1^f*9H8k1f5gPpm`$F9F{DPNuO5E0?n1=Z!L>Jb6BMcnr}dw!y0&a@R`P- zxsrU{FdsCB^{ivEUSCigQPLaxk>Watd|ovh6jzd?txll04y-GbUfhHf*GA;yToX`S zN&eKZ02J4T1(33Se^4Az(p!g+;#!D&Sz`u@D@n5585D=*`=EF}Ulz;+#g*i5 zjY3dd6BbuWhk}se8i>4?h7?zluWQUeaSd3UDeH0v#StaF5r-64N96M&Q&3z z#noXwr}V-aq_`TW_B&^?1?=|@ks#R2k2Jonk?H)|)lBFl!z2`4*UZ5IMQ2R6pmVe! z`e90b6}^gIsY7|EF~fS1EhHc_p2uaW>rXSA9#x4t(BKRJ_afKOVlm1-W-@yc9UOKu z=i0*5u|_#u#?Uu&PzFs*pA9~^Ni*h5f&MnXOa~%2G>xt8tu4%EPMgbMsq0OhHR%ZS zyalC-5_r___(m2G)iK9(x|urEIK_CnH+ZTAC0Ma8U3}m3Bu_2YKX)t}ZDNJE$ zfaEPz)7a8_0c1%tnoY3OAhC!}0rbcPMS7^hc?&^eY&4s};A>BwVX(OfgUMm6Ko4Sy z^k;!=w((RGEh;z)r%qp3Sipylw;ttpn$`@V5E5cd#ioA5S!%k5#s>MCAovjz7|sO& zBzc=jFaRW`X`Rf^&*MV}T%Yoeqi;GcGv?7a8u~^CJ}?hV12PD%CCr!l|;4C%L%vQ_BvJ6mdGnE2^>2w-^%M?v<7C#Hm1PMcH z8j=`IGE#%uv<(cbGr5`643H3NlV>X>lk}iA4Lw8M40Z-xG6~+5p0QG(Z-mh}kS0ZN zT%v~fOs%OXzujcLDJ%j|PhWH8IG?IE$rPHHtZ$%>Gq{isd&W2)*5CyqSx+01 z5nSylAk)^=;zFVXNHjh}!jZ@eZs!4-y@%kaDM+TFP9YeOHJpcJJ-s;~dk0_JWCLAo z4K)oaxUsov++7luM3#TAs1`}#8-{EEkbqE~iP0dfEq4);cAuAYBPqrd=;%Q<0W|@n z89=FJG$l-lH1B@77devhCtsdL1;C2(Ab}&KEr9eY zkSv7-!@duhj|VbKF5ka5043hRouf>Vkkt`uLdpaWUOr~nRWc9fYYA(Ie6(=f8Un6uVnMeGqtL1kkH zDu5AmxE@{}lRtU(Og*(}9%1`aPvw=?we()P0Y>^I&d}0gvJ7ThOw*iX8xXtqz_HB2 z>V__G2tWA;$T8;knkE*edYbc=u8Y}~oOU{=w7#`x7@~FGy?cwUpJF~$W4dF&dhzzX z2bGt03|@Ww?W=e19@iu*Z^1<0D=1>qwq5(vPUn`^oa?@N_t|%^UiO@T@>@{>b+r(b z>H6UDto({IT^DbE{_NY!1xdRO?-9+_*MtJ!;|pVUCLKPRRb1OSbp77vS2L4#rX7y7 zo2&(iVkocOlGD=jPzczK>lItgDA-2N8MvcQKb>soXARK@6}ad0T%CNz=#bb6YzgJgb?r(RO0`x1ZDzzw<`P>?WXVM z?gGgK^q;$nx{JS4CHU*@Bw!4v3jQ^>#>Yc60JdEf{4d_Ff%p@^-*7t)F#`IK%;vA) zuO1h-jlT_xRR#ZkvHVy(Miu;<#c*S&(W>BIGnySu7psE*O=2~~p8)@h#`zQAf6_RA z0{ruVj{y5G@XzxX3lM(-{Lc~d#fsf3;a?qza#X=TXN#CCCU@4Wgnwrw%Sl%W|C1n= zLB6wLhbs7IBUw&voGSR|1c_N<*^YC&52=KIb0p0!7AsB11o#(-nPTaVmVM${!1wom zg?|>3W>t%Kw5dXWmWP-w9*t`|Bu-PxC!qgiB+oo67Uy@ULVt#sCjJ=LeoP$Og5(p> zpS1?u4IeXFqQ%F$>Q$n@P)vw_kL!?##r0h|s?eX03S@M}i4%Kzj;lg{h8P!ri0vqd zNviD{XoM0k{)+yLWnyu3Zg1mYcTSj{qY^p)0{%0ORQI2++u>rjGWl=7zw78q8<$Og z5B`RB0ZIRF!2dz%j=u;0@7hwLVs=OT4fu~pwrxs`T{=UJqZ0h%Hzg(pFVN;QRDplE zqk({<3jD#2|DS{ZR8OWS&qF2nyLiw&`0lE}U+7MC7r23QOw}M@h8ylCaK&9zSpa=k z%vIn*IIFY)4$gdM+7eYZVCE9;5{8p18(`qXabh~EvH{wQ9hr{o#j0$;yv3Zwj76$! zz?4PoMN9{kHbBFH>A<#!VZ($WfaXGbnmyM}l?9k#N44YGs*#I+ZzBO*8$_D6LajmG9s%!wyl5I&Bs;~jP`9d|spI`%w#`zO$fZ8~Jf(>8; zUzH7*B@`h31RJ0$~=3+s?rANAZbXHFa${_*Z`)GDU{kJxC*zavH^4? zT_YCSC91Lk%xOZpaMWh+QsHu?e1Z+&BYE(4OJPKkDjPr*(u5yv5`Bf%dyssB4WOF| zslt!Jd#r@M$#E)efS!;L{=P6NNGRNv5~|7uFi?Tj$u>gg0|$as*#N2#7k;qb7jC_D z_wj-(RW^VIBW%maLr1F1lRO|(f{Fov=9-ksn)EeuW-Q*P+6D;Ax7kg#2>*KaR9Js)s`44t!iuD;BCk#c$l!T1!8}<>F;A<*leVzfOMUh*G+Q$h!QIe8Cu|B_y znScz%`aA+k8|hQ4BxKr1A7fyFK7sGXDa=lE3n(3=Poa{K2}GO7u^GTZ`uqTtF4E`X zIAJ6@sU$$3wNhCAuM7IP_`Lv1k0PHh>K-TbAxH~xWb_rmzd`UAP`IP$DNy>b*P^Il zoB(~41XAZ`*g!xZw6+u72g(4cQ=%ls)iDAVsN?^3%m{W#lv{~zj1yBu=}H3B375mp z1CyX3SHB;CnvB#L8Yd=+4k!tv4vKD@tW@U(P*adPZR3QtC|*g(sLNM>fV~T*DAjoa zlp*AegO{og+m(Iy8K4o;Ne;Ufe3ZR@)OvCU0w@{b>HjVGpnsJqe);-*Zqm9{qGgVN zJ@SR3KG-Gr=bs;6sY;6u60LAqXa>nQ3@Ngyq7(%zEq+ISce5pP&-&GY-cHu$C?&Q4 z+^fH#F+82AxjN)7?2`B6FJBDTp4zz~BE-+#0oEWIgS$sp2fU7-gTn!mz(W33O)VW= z{mIKW#6KB>jqmUE6d&0o-Wn0;4GZe5E#^WdYS@&(7&dacg%Kz@Fl3E|9PyCP+IZ%y z*>mioc%iY!0#o2ku5+}#SkYN(m7~cRl5jbpgm6>kmSO_6u zu9J(q=d$I#q1%&_cExUqideJSUxf0CF83bKbm`{myaa-+Kf@W|eA3pRGSzt291F{Z z_Kwc(Udvam3Jl$_b#qjBXz=R50DnJpCAttYWV*S)bv(*TxJ7V2X(L@tX3d$0RJC91 zxWw7T)y)lNE^s%@^;al5uA{x3Ex3(e;Z$f$O+y04~=+bp@9VZG_MXem|Y^|*9sev^>oC;ZYO+`gH ztsJ5{v?z+o3TlOPsJ`M%YXwokDVJCER+LlA@iN6RkkZOW>xRn9yQ{&C%PfNk8SvRs zOUWVwa9QI}NZQ<4R@+fVFJqOGO)cQFrI)}AC+y2nR!Wpgdm2kSt00nsQzC0?0H-aZ z7)F4a6l#gIbhxszvxF$&6_eey#nfVI5t+>f-!`>aTGwCPRs%(uMe@?V@*=#5QK+y6 zk)jxAS!-KacM-jaT_~;UD=I83U>3mqEL>M88>l~5TSzP96v)nW6cpsM^2uX3IKzno zS;@un-U4a?FQ05V2ay#F2&zD-%!qth-9STqJ|mxO z3~7FUMQ&~mI|m}XK$0sT=xxZQ<#Kc6W&MTO+-y>!4j(+8BP+OcJ|99Y_}OG#UpA4= z&VmV35M;}1E}zS0!a2k=Wno#|ObAQ>ewMuDQhpX4&gFe)GBbD?tgic1nLiM(kb1M&8c^ZEvW zNA`4qgO)t42f3YOl9Fr95>1Ibd$>`eE`cC6#Ou9Yg?MEnRfx9@@rrKV%tO4U3;Dn! zkF!wXr{1fVQGNZxAb~dloU0dlQK{UEU5FKhSS`1j5UcufBVxrPR^9Ci#A?4%4t2>h zDt!>M^G+6Gj$Fxsn;Xq4tgT$H4}9{{WyCJHHVnSm-!clS&K4#3I@+is(cK;-I&-ZJ zysN+DRW#Qe3tO=!X}^~(%=c5s`)8H)U9SP}=KJij#-_X&uceWt{THw2`P(m0mMD>E zNM4rTyp|)8WEG!nuS)iF4LsI;{_@Rx4?b^*aI$5?-f`r;3{+&y&p#_fBbKmGQfZ{EIseKXE} zIn3rmQUtiS=Q$N^0}ZJGZh=SI`z~C)b?4z1&tJZJ_v4SRU%maJ#BGH$3ze*uuqDs3 zOPhzfayPiI*xuMRbm{u7dyk)d`~8n^UjO*|&G&uB*ZPL|S!%(JD0097{3^4!VPL2( z!Fy>$VSC^3l^eGneE!w9KfM1D`|+38Z*CQD6NLnOSQuzA;dRm^U*^~MUg|g*1#7hM}Vf1Fv$T*?5VzTS>P#5@J#D*q_1wOdUTd?Px(6>_xdC_g)r%kA{`lt2 z`-j!LgVwEIVL4eF&DedN5#r&ORB^WJ{KYHK`j^jN{_y7Qn|I%J9*GR!8tgP(7aE^T zd9e`|vch7=YquUgdiKpf-$Ic$*YaaSqocg%>T9r|8~J_hxt?KY=JT&!yn6fg-Mc6C z$>CdLf@}?SAd5ET)xg!;@O=OABZ&Svn7(;q{Cc;Udg?UTz31(NXD?o(EAEu+joKOO zH{U>$1)HAy1V!I})0(z%$L@&5Mmpdook#ig^`)%%n4}o*S$Y~Ym^S`hynj!y)g)~; z>|Y{vhUbJsWt=*gp*r}yvAKCqv^U!Ih8Fo}@_p$q_+`$vNX#x7zfB!>sVZdrcn z$wRx?yXZT~G|5idPFg&<8KEb>Yjo$H@}yj}a*7#Go;(px#4~n~+b4lcu`?cvm!%dS zI~>o5XYU}h5AVQsu(t#50Z{IcrAzYSchG?$-zM)8O;Ms#I{T0c2<ywd(_`o{+-UjE!X)rWLTC@bWwbo5Dj}L0 z%@@m)i{cU68L{E4GFq%DmLD&SM$9lJ6N1>pTHs*J0!JsgMGL{BIHC3=Di>FBOsp=3 z3~z{+TbqJ-Nu|k%w+iug*VXS()>jq}JaQ8q{(9v(HC0fbJijDbtRYsIB34aVE-JRY zA`|{ocq7)SGsh7trSd3Zg(6mJLlR=8S0zDZ^60~kh?&v24KYir-;rksa8M%i-ZS7aX)+pE0-DXWiMOyYik(WvJwcEwwZTr%T(#0;;%MN}* z(i)LDZ089`dJf&-%iQ|Ydykjo?-N-&ZT=II4z9I;jqL&XD&S^bMDMGuEkC)=-hOq# ze?sZi^``dPs8}Z4-t!%)1$6~G+!ii9Fab&*=U6VXW}uP>#cc7jxa92ms>1;bmux=! zS1A2QXUa;aDOlygEmA5TIujvut4?K*XNH%MQmT^I#ms>1Sk!Qi}jtW$zuSN z%Ix$Z+y-{AB9usqc~IR)*Z8};hkCjb?i@E+h?kq2E6o)k3wo#SZiL&Y`#Nv8o^{%cVp5R^Nxj;~nGt-$|!a?}x^3iXN%hJ^@F5p`vSFT#Z zTB0ySeCN?nU*}*?=Oz3lawq>4PAn&g34|``5~*{Pn|rVm-wAgl*LXRy97%f=fkAbW zdPg_~csX(%iN*3IVJ?f=ixt{P;3x|SbPidpzF6+Lb}_gQ$02sHG-COxWs4W_7hw+M z@*oGc18Hvs#fe4IC7WEs7CG=72z%1s&z@7rLW=pq$I68o3JDF`n=t^6*EyG6c60yjJuwgDFt?39w zZKXDgHb>dpun@~TY$36bZLQEnXt_}Cxy5fG6V4&t%Nn!hS}9Br-&($Ev%NLlnqwui z4qt9%$+INq|MYrNw${ixzz^4bPLjf)=$}cfwlRDQ2OK|GPKZxlEg_gCK7++{+6~j~$6A(Vo+%d*u8lhK{V@Oyi(p!Ekra z7x`^V+Z$*OZB4+?2gme{XP{;kUP89;nXQlbNZRH_hGwA?F!W{fHapXW3-r;WP}qVu z@|Cq`RH`I#x$$)O%!wF!`H0UfYrEOHG%AiTRQRQ(OW6MGBM}P>7bZ-=(5I#A7YH4N zMr?3wX~3uR!{Wd_IaxcLjm*|egwP*4_PAI$FPa5W8#sc{xXhB&Ri@M3PECN&Tawia z9NmPInN$>3_I#7Ik!9=z2>rG$+QEL=A`=d@J%#eZZz6)8Kj^d2+jD^~YHuQfKF*7@ z_wsd|&LRjv(AR(JjsBx`r@OPim$^E4`DXp^&gj*nYn%dC*&0y6z4r+~pH*yi5rulq z94yY5^hqL8TuJ_rIa8lGPQExg3+{mhHH<1+x6agLjI-C8I!^6(oxq#d+aUw?%BY}?ZG0E*P{6|jr24)3Qt{3f# z^b;*dRT*gUm^2K0di=4s(pL|g4sTs8@^!Z}H=d%S#)hh3Dc;x{#ZO-irtb>#6L~FO z06svd3R;H+Ib&}>e0QTXc|(9`xwF;m>65g1EE*2c_Qcp**{g@m$6|w_E_-tmLtS+? z9rYYgO+#6Gpz)=S3ua9-7{3oQ%1@PtHV2A)oUP6NxCI!7p=;-ve9~M9t2<{+|D>%o zMLMn>?1RZ+G6)C+lhUYEf`H&69D~3Z9YPz^!Za}rOdV5u1EE(uE`%RJ1QUucqCzng zhWgNW1|+zjHf^erk>QjnlP6Cy(AU$|)zQ|{)X-3ekSGXgLRXAm09bTqzQw$G=E&DJ zdzPuGiHY%y>2R&#l<^Cd*FhlGA6KDP;0Z!arWpfT7k z4v~(iA-Fr`%^?-hBl&b|%I5)h23CF6mr(4}Q!s+(X8yp zdamffDxVQ*okl~i45LKAR*aM-pAPsBTRwhJ1gkbgZ&<%`%eL65K9TWa5TU&tmH+uELMIC|NS1Up>O!#?*ZZBfAL`)7ytV`jN8cneh>fs y9{${4`tSGfr|$u5#-E?f{|ODaGKP--+rC!{B?EO~>pmTbUiIlf^z2Uu<$nQOKjGv6 literal 0 HcmV?d00001 diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py new file mode 100644 index 00000000..55c9853e --- /dev/null +++ b/calibre-plugin/jobs.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Jim Miller' +__copyright__ = '2011, Grant Drake ' +__docformat__ = 'restructuredtext en' + +import time, os, traceback + +from ConfigParser import SafeConfigParser +from StringIO import StringIO + +from calibre.utils.ipc.server import Server +from calibre.utils.ipc.job import ParallelJob +from calibre.utils.logging import Log + +from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload, + OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY) +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_update_data + +# ------------------------------------------------------------------------------ +# +# Functions to perform downloads using worker jobs +# +# ------------------------------------------------------------------------------ + +def do_download_worker(book_list, options, + cpus, notification=lambda x,y:x): + ''' + Master job, to launch child jobs to extract ISBN for a set of books + This is run as a worker job in the background to keep the UI more + responsive and get around the memory leak issues as it will launch + a child job for each book as a worker process + ''' + server = Server(pool_size=cpus) + + print(options['version']) + total = 0 + # Queue all the jobs + print("Adding jobs for URLs:") + for book in book_list: + if book['good']: + print("%s"%book['url']) + total += 1 + args = ['calibre_plugins.fanfictiondownloader_plugin.jobs', + 'do_download_for_worker', + (book,options)] + job = ParallelJob('arbitrary', + "url:(%s) id:(%s)"%(book['url'],book['calibre_id']), + done=None, + args=args) + job._book = book + # job._book_id = book_id + # job._title = title + # job._modified_date = modified_date + # job._existing_isbn = existing_isbn + server.add_job(job) + + # This server is an arbitrary_n job, so there is a notifier available. + # Set the % complete to a small number to avoid the 'unavailable' indicator + notification(0.01, 'Downloading FanFiction Stories') + + # dequeue the job results as they arrive, saving the results + count = 0 + while True: + job = server.changed_jobs_queue.get() + # A job can 'change' when it is not finished, for example if it + # produces a notification. Ignore these. + job.update() + if not job.is_finished: + continue + # A job really finished. Get the information. + output_book = job.result + #print("output_book:%s"%output_book) + book_list.remove(job._book) + book_list.append(job.result) + book_id = job._book['calibre_id'] + #title = job._title + count = count + 1 + notification(float(count)/total, 'Downloaded Story') + # Add this job's output to the current log + print('Logfile for book ID %s (%s)'%(book_id, job._book['title'])) + print(job.details) + + if count >= total: + # All done! + break + + server.close() + + # return the book list as the job result + return book_list + +def do_download_for_worker(book,options): + ''' + Child job, to extract isbn from formats for this specific book, + when run as a worker job + ''' + try: + book['comment'] = 'Download started...' + + ffdlconfig = SafeConfigParser() + ffdlconfig.readfp(StringIO(get_resources("plugin-defaults.ini"))) + ffdlconfig.readfp(StringIO(options['personal.ini'])) + + adapter = adapters.getAdapter(ffdlconfig,book['url'],options['fileform']) + adapter.is_adult = book['is_adult'] + adapter.username = book['username'] + adapter.password = book['password'] + + story = adapter.getStoryMetadataOnly() + writer = writers.getWriter(options['fileform'],adapter.config,adapter) + + outfile = book['outfile'] + + ## No need to download at all. Shouldn't ever get down here. + if options['collision'] in (CALIBREONLY): + print("Skipping CALIBREONLY 'update' down inside worker--this shouldn't be happening...") + book['comment'] = 'Metadata collected.' + + ## checks were done earlier, it's new or not dup or newer--just write it. + elif options['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \ + ('epub_for_update' not in book and options['collision'] in (UPDATE, UPDATEALWAYS)): + print("write to %s"%outfile) + writer.writeStory(outfilename=outfile, forceOverwrite=True) + book['comment'] = 'Download %s completed, %s chapters.'%(options['fileform'],story.getMetadata("numChapters")) + + ## checks were done earlier, just update it. + elif 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS): + + # update now handled by pre-populating the old images and + # chapters in the adapter rather than merging epubs. + urlchaptercount = int(story.getMetadata('numChapters')) + (url,chaptercount, + adapter.oldchapters, + adapter.oldimgs) = get_update_data(book['epub_for_update']) + + print("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)) + print("write to %s"%outfile) + + writer.writeStory(outfilename=outfile, forceOverwrite=True) + + book['comment'] = 'Update %s completed, added %s chapters for %s total.'%\ + (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount) + + except NotGoingToDownload as d: + book['good']=False + book['comment']=unicode(d) + book['icon'] = d.icon + + except Exception as e: + book['good']=False + book['comment']=unicode(e) + book['icon']='dialog_error.png' + print("Exception: %s:%s"%(book,unicode(e))) + traceback.print_exc() + + #time.sleep(10) + return book diff --git a/calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt b/calibre-plugin/plugin-import-name-fanfictiondownloader_plugin.txt new file mode 100644 index 00000000..e69de29b diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 00000000..e72999f4 --- /dev/null +++ b/cron.yaml @@ -0,0 +1,10 @@ +cron: +- description: cleanup job + url: /r3m0v3r + schedule: every 2 hours + +# There's a bug in the Python 2.7 runtime that prevents this from +# working properly. In theory, there should never be orphans anyway. +#- description: orphan cleanup job +# url: /r3m0v3rOrphans +# schedule: every 4 hours diff --git a/css/index.css b/css/index.css new file mode 100644 index 00000000..eae546b7 --- /dev/null +++ b/css/index.css @@ -0,0 +1,73 @@ +body +{ + font: 0.9em "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif; +} + +#main +{ + width: 60%; + margin-left: 20%; + background-color: #dae6ff; + padding: 2em; +} + +#greeting +{ +# margin-bottom: 1em; + border-color: #efefef; +} + + + +#logpassword:hover, #logpasswordtable:hover, #urlbox:hover, #typebox:hover, #helpbox:hover, #yourfile:hover +{ + border: thin solid #fffeff; +} + +h1 +{ + text-decoration: none; +} + +#logpasswordtable +{ + padding: 1em; +} + +#logpassword, #logpasswordtable { +// display: none; +} + +#urlbox, #typebox, #logpasswordtable, #logpassword, #helpbox, #yourfile +{ + margin: 1em; + padding: 1em; + border: thin dotted #fffeff; +} + +div.field +{ + margin-bottom: 0.5em; +} + +#submitbtn +{ + padding: 1em; +} + +#typelabel +{ +} + +#typeoptions +{ + margin-top: 0.5em; +} + +#error +{ + color: #f00; +} +.recent { + font-size: large; +} diff --git a/defaults.ini b/defaults.ini new file mode 100644 index 00000000..3f9d489f --- /dev/null +++ b/defaults.ini @@ -0,0 +1,519 @@ +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +[defaults] + +## [defaults] section applies to all formats and sites but may be +## overridden at several levels + +## All available titlepage_entries and the label used for them: +## _label:
      +

      + FanFictionDownLoader +

      + +
      + + +
      + +
      + +
      +

      Edit Config

      +
      + Editing configuration for {{ nickname }}. +
      +
      + +
      +
      + +
      + +
      +
      + +
      +

      Default System configuration

      +
      +{{ defaultsini }}
      +
      +
      + +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © Fanficdownloader team +
      + +
      + + +
      +
      + + diff --git a/epubmerge.py b/epubmerge.py new file mode 100644 index 00000000..f7e76b8c --- /dev/null +++ b/epubmerge.py @@ -0,0 +1,25 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if __name__ == "__main__": + print(''' +The this utility has been split out into it's own project. +See: http://code.google.com/p/epubmerge/ +...for a CLI epubmerge.py program and calibre plugin. +''') diff --git a/example.ini b/example.ini new file mode 100644 index 00000000..67392708 --- /dev/null +++ b/example.ini @@ -0,0 +1,40 @@ +## This is an example of what your personal configuration might look +## like. + +[defaults] +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## Most common, I expect will be using this to save username/passwords +## for different sites. +[www.twilighted.net] +#username:YourPenname +#password:YourPassword + +[www.ficwad.com] +#username:YourUsername +#password:YourPassword + +[www.adastrafanfic.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. +#is_adult:true + +## The [defaults] section here will override the system [defaults], +## but not format, site for site:format sections. +[defaults] +## Directories only useful in commandline or zip files. +#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext} +#output_filename: books/${site}/${authorId}/${title}-${storyId}${formatext} + +## For example, zip_output here will turn on zip for html and txt, but +## not epub because the system [epub] section explicitly says +## zip_output: false (epubs *are* specially formated zip files.) +#zip_output: true +#zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip + +## This section will override anything in the system defaults or other +## sections here. +[overrides] diff --git a/fanficdownloader/BeautifulSoup.py b/fanficdownloader/BeautifulSoup.py new file mode 100644 index 00000000..4b17b853 --- /dev/null +++ b/fanficdownloader/BeautifulSoup.py @@ -0,0 +1,2014 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.2.0" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
      (No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

      FooBar *

      * should pop to 'p', not 'b'. +

      FooBar *

      * should pop to 'table', not 'p'. +

      Foo

      Bar *

      * should pop to 'tr', not 'p'. + +

      • *
      • * should pop to 'ul', not the first 'li'. +
    5. ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
      ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

      tag should implicitly close the previous

      tag. + +

      Para1

      Para2 + should be transformed into: +

      Para1

      Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

      tag should _not_ implicitly close the previous +
      tag. + + Alice said:
      Bob said:
      Blah + should NOT be transformed into: + Alice said:
      Bob said:
      Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
      , + but not close a tag in another table. + +
      BlahBlah + should be transformed into: +
      BlahBlah + but, + Blah
      Blah + should NOT be transformed into + Blah
      Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + + + + + + + + +
      +

      + FanFictionDownLoader +

      + + +
      +
      + Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites much easier. Please paste a URL of the first chapter in the box to start. Alternatively, see your personal list of previously downloaded fanfics. +
      + +
      + Ebook format   +
      + +
      + +
      + + + +
      + + + +
      +
      + +

      + Login and Password +

      +
      + If the story requires a login and password to download (e.g. marked as Mature on FFA), you may need to provide your credentials to download it, otherwise just leave it empty +
      +
      +
      +
      Login
      +
      +
      + +
      +
      Password
      +
      +
      +
      +
      + + +
      + + +
      + +
      +
      + Few things to know, which will make your life substantially easier: +
        +
      1. Small post written by me — how to read fiction in Stanza or any other ebook reader.
      2. +
      3. Currently we support fanfiction.net, fictionpress.com, fanficauthors.net and ficwad.com
      4. +
      5. Paste a URL of the first chapter of the fanfic, not the index page
      6. +
      7. Fics with a single chapter are not supported (you can just copy and paste it)
      8. +
      9. Stories which are too long may not be downloaded correctly and application will report a time-out error — this is a limitation which is currently imposed by Google AppEngine on a long-running activities
      10. +
      11. FicWad support is somewhat flaky — if you feel it doesn't work for you, send all the details to me
      12. +
      13. You can download fanfics and store them for 'later' by just downloading them and visiting recent downloads section, but in future they will be deleted after 5 days to save the space
      14. +
      15. If Downloader simply opens a download file window rather than saves the fanfic and gives you a link, it means it is too large to save in the database and you need to download it straight away
      16. +
      17. If you think that something that should work in fact doesn't, drop me a mail to sigizmund@gmail.com
      18. +
      + Otherwise, just have fun, and if you want to say thank you — use the email above. +
      +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © Fanficdownloader team +
      + +
      + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..678cf532 --- /dev/null +++ b/index.html @@ -0,0 +1,359 @@ + + + + + FanFictionDownLoader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + + + + + + + +
      +

      + FanFictionDownLoader +

      + +
      + + +
      + + {{yourfile}} + + + {% if authorized %} +
      +
      +
      +

      Hi, {{ nickname }}! This is FanFictionDownLoader, which makes reading stories from various websites + much easier.

      +
      + +

      New Site

      +

      + Support for the Wonderful World of MakeBelieve(WWOMB) archive at +http://www.squidge.org/peja/cgi-bin/index.php +has been added. This does not support other sections of +www.squidge.org, or the other files under www.squidge.org/peja that +aren't in the eFiction instance. +

      +

      + Questions? Check out our + FAQs. +

      +

      + If you have any problems with this application, please + report them in + the FanFictionDownLoader Google Group. The + Previous Version is also available for you to use if necessary. +

      +
      + {{ error_message }} +
      +
      + +
      +
      URL:
      +
      +
      Ebook format
      +
      + EPub + HTML + Plain Text + Mobi(Kindle) +
      +
      +
      + +

      For most readers, including Sony Reader, Nook and iPad, use EPub.

      +
      +
      +
      +

      + Customize your User Configuration. +

      +

      + Or see your personal list of previously downloaded fanfics. +

      +

      + See a list of downloaded fanfics by all users by most popular or most recent. +

      +
      + + {% else %} +
      +
      +

      + This is a FanFictionDownLoader, which makes reading stories from various websites much easier. Before you + can start downloading fanfics, you need to login, so FanFictionDownLoader can remember your fanfics and store them. +

      +

      Login using Google account

      +
      +
      + {% endif %} + +
      +

      + FanFictionDownLoader calibre Plugin +

      + + There's now a version of this downloader that runs + entirely inside the + popular calibre + ebook management package as a plugin. + +

      + + Once you have calibre installed and running, inside + calibre, you can go to 'Get plugins to enhance calibre' or + 'Get new plugins' and + install FanFictionDownLoader. + +

      +
      +
      +

      Supported sites:

      +

      + There's a + Supported + Sites page in our wiki. If you have a site you'd like + to see supported, please check there first. +

      +
      +
      fictionalley.org
      +
      + Use the URL of the story's chapter list, such as +
      http://www.fictionalley.org/authors/drt/DA.html. +
      Or a chapter URL (or one-shot text), such as +
      http://www.fictionalley.org/authors/drt/JOTP01a.html. +
      Both will work for both chaptered and one-shot stories now. +
      +
      fanfiction.net
      +
      + Use the URL of any story chapter, with or without story title such as +
      http://www.fanfiction.net/s/5192986/1/A_Fox_in_Tokyo or +
      http://www.fanfiction.net/s/2345466/3/. +
      +
      fictionpress.com
      +
      + Use the URL of any story chapter, such as +
      http://www.fictionpress.com/s/2851771/1/Untouchable_Love or +
      http://www.fictionpress.com/s/2847338/6/. +
      +
      twilighted.net
      +
      + Use the URL of the start of the story, such as +
      http://twilighted.net/viewstory.php?sid=8422. +
      +
      twiwrite.net
      +
      + Use the URL of the start of the story, such as +
      http://twiwrite.net/viewstory.php?sid=427. +
      +
      ficwad.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.ficwad.com/story/74884. +
      Note that this is changed from the previous version. The system will still accept chapter URLs, however. +
      +
      harrypotterfanfiction.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.harrypotterfanfiction.com/viewstory.php?psid=289208. +
      +
      potionsandsnitches.net
      +
      + Use the URL of the story's chapter list, such as +
      http://potionsandsnitches.net/fanfiction/viewstory.php?sid=2332. +
      +
      mediaminer.org
      +
      + Use the URL of the story's chapter list, such as +
      http://www.mediaminer.org/fanfic/view_st.php/166653. +
      Or the story URL for one-shots, such as +
      http://www.mediaminer.org/fanfic/view_st.php/167618 or +
      http://www.mediaminer.org/fanfic/view_ch.php/1234123/123444#fic_c +
      +
      adastrafanfic.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.adastrafanfic.com/viewstory.php?sid=854. +
      +
      whofic.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.whofic.com/viewstory.php?sid=16334. +
      +
      thewriterscoffeeshop.com
      +
      + Use the URL of the story's chapter list, such as +
      http://www.thewriterscoffeeshop.com/library/viewstory.php?sid=2110. +
      +
      fanfiction.tenhawkpresents.com
      +
      + Use the URL of the story's chapter list, such as +
      http://fanfiction.tenhawkpresents.com/viewstory.php?sid=294. +
      +
      castlefans.org
      +
      + Use the URL of the story's chapter list, such as +
      http://castlefans.org/fanfic/viewstory.php?sid=123. +
      +
      fimfiction.net
      +
      + Use the URL of the story's chapter list, such as +
      http://www.fimfiction.com/story/123/ +
      or the URL of any chapter, such as +
      http://www.fimfiction.com/story/123/1/. +
      +
      tthfanfic.org
      +
      + Use the URL of any story, with or without chapter, title and notice, such as +
      http://www.tthfanfic.org/Story-5583 +
      http://www.tthfanfic.org/Story-5583/Greywizard+Marked+By+Kane.htm. +
      http://www.tthfanfic.org/T-99999999/Story-26448-15/batzulger+Willow+Rosenberg+and+the+Mind+Riders.htm. +
      +
      www.siye.co.uk
      +
      + Use the URL of the story's chapter list, such as +
      http://www.siye.co.uk/siye/viewstory.php?sid=123. +
      +
      archiveofourown.org
      +
      + Use the URL of the story, or one of it's chapters, such as +
      http://archiveofourown.org/works/76366. +
      http://archiveofourown.org/works/76366/chapters/101584. +
      +
      ficbook.net(Russian)
      +
      + Use the URL of the story, or one of it's chapters, such as +
      http://ficbook.net/readfic/93626. +
      http://ficbook.net/readfic/93626/246417#part_content. +
      +
      fanfiction.mugglenet.com
      +
      + Use the URL of the story's chapter list, such as +
      http://fanfiction.mugglenet.com/viewstory.php?sid=123. +
      +
      www.hpfandom.net
      +
      + Use the URL of the story's chapter list, such as +
      http://www.hpfandom.net/eff/viewstory.php?sid=123. +
      +
      thequidditchpitch.org
      +
      + Use the URL of the story's chapter list, such as +
      http://thequidditchpitch.org/viewstory.php?sid=123. +
      +
      fanfiction.portkey.org
      +
      + Use the URL of the story's chapter list, such as +
      http://fanfiction.portkey.org/story/123. +
      +
      nfacommunity.com
      +
      + Use the URL of the story's chapter list, such as +
      http://nfacommunity.com/viewstory.php?sid=1654. +
      +
      www.midnightwhispers.ca
      +
      + Use the URL of the story's chapter list, such as +
      http://www.midnightwhispers.ca/viewstory.php?sid=1124. +
      +
      ksarchive.com
      +
      + Use the URL of the story's chapter list, such as +
      http://ksarchive.com/viewstory.php?sid=1124. +
      +
      archive.skyehawke.com
      +
      + Use the URL of the story's summary, such as +
      http://archive.skyehawke.com/story.php?no=17466. +
      +
      www.squidge.org/peja (WWOMB)
      +
      + Use the URL of the story's summary, such as +
      http://www.squidge.org/peja/cgi-bin/viewstory.php?sid=1234. +
      +
      +

      + A few additional things to know, which will make your life substantially easier: +

      +
        +
      1. + First thing to know: We do not use your Google login and password. In fact, all we know about it is your ID – password + is being verified by Google and is absolutely, totally unknown to anyone but you. +
      2. +
      3. + + Small post written by Roman + — how to read fiction in Stanza or any other ebook reader. +
      4. +
      5. + You can download fanfiction directly from your iPhone, Kindle or (possibly) other ebook reader. +
      6. +
      7. + Downloaded stories are deleted after some time (which should give you enough of time to download it and will keep + Google happy about the app not going over the storage limit). +
      8. +
      9. + If you see some funny characters in downloaded Plain Text file, make sure you choose text file encoding UTF-8 and + not something else. +
      10. +
      11. + If you think that something that should work in fact doesn't, post a message to + our Google Group. we also encourage you to join it so + you will find out about latest updates and fixes as soon as possible +
      12. +
      + Otherwise, just have fun, and if you want to say thank you — use the contacts above. +
      +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © FanFictionDownLoader team +
      + +
      + + +
      +
      + + diff --git a/index.yaml b/index.yaml new file mode 100644 index 00000000..a55512f1 --- /dev/null +++ b/index.yaml @@ -0,0 +1,28 @@ +indexes: + +# notAUTOGENERATED + +# This index.yaml is automatically updated whenever the dev_appserver +# detects that a new type of query is run. If you want to manage the +# index.yaml file manually, remove the above marker line (the line +# saying "# AUTOGENERATED"). If you want to manage some indexes +# manually, move them above the marker line. The index.yaml file is +# automatically uploaded to the admin console when you next deploy +# your application using appcfg.py. + +- kind: DownloadData + properties: + - name: download + - name: index + +- kind: DownloadMeta + properties: + - name: user + - name: date + direction: desc + +- kind: SavedMeta + properties: + - name: count + - name: date + direction: desc diff --git a/js/fdownloader.js b/js/fdownloader.js new file mode 100644 index 00000000..8f6ab0a8 --- /dev/null +++ b/js/fdownloader.js @@ -0,0 +1,116 @@ +var g_CurrentKey = null; +var g_Counter = 0; + +var COUNTER_MAX = 50; + + +function setErrorState(error) +{ + olderr = error; + error = error + "
      " + "Complain about this error"; + $('#error').html(error); +} + +function clearErrorState() +{ + $('#error').html(''); +} + +function showFile(data) +{ + $('#yourfile').html('' + data.name + " by " + data.author + ""); + $('#yourfile').show(); +} + +function hideFile() +{ + $('#yourfile').hide(); +} + +function checkResults() +{ + if ( g_Counter >= COUNTER_MAX ) + { + return; + } + + g_Counter+=1; + + $.getJSON('/progress', { 'key' : g_CurrentKey }, function(data) + { + if ( data.result != "Nope") + { + if ( data.result != "OK" ) + { + leaveLoadingState(); + setErrorState(data.result); + } + else + { + showFile(data); + leaveLoadingState(); + // result = data.split("|"); + // showFile(result[1], result[2], result[3]); + } + + $("#progressbar").progressbar('destroy'); + g_Counter = 101; + } + }); + + if ( g_Counter < COUNTER_MAX ) + setTimeout("checkResults()", 1000); + else + { + leaveLoadingState(); + setErrorState("Operation takes too long - terminating by timeout (story too long?)"); + } +} + +function enterLoadingState() +{ + $('#submit_button').hide(); + $('#ajax_loader').show(); +} + +function leaveLoadingState() +{ + $('#submit_button').show(); + $('#ajax_loader').hide(); +} + +function downloadFanfic() +{ + clearErrorState(); + hideFile(); + + + format = $("#format").val(); + alert(format); + + return; + + var url = $('#url').val(); + var login = $('#login').val(); + var password = $('#password').val(); + + if ( url == '' ) + { + setErrorState('URL shouldn\'t be empty'); + return; + } + + if ( (url.indexOf('fanfiction.net') == -1 && url.indexOf('fanficauthors') == -1 && url.indexOf('ficwad') == -1 && url.indexOf('fictionpress') == -1) || (url.indexOf('adultfanfiction.net') != -1) ) + { + setErrorState("This source is not yet supported. Ping me if you want it!"); + return; + } + + $.post('/submitDownload', {'url' : url, 'login' : login, 'password' : password, 'format' : format}, function(data) + { + g_CurrentKey = data; + g_Counter = 0; + setTimeout("checkResults()", 1000); + enterLoadingState(); + }) +} \ No newline at end of file diff --git a/js/jquery-1.3.2.js b/js/jquery-1.3.2.js new file mode 100644 index 00000000..92635743 --- /dev/null +++ b/js/jquery-1.3.2.js @@ -0,0 +1,4376 @@ +/*! + * jQuery JavaScript Library v1.3.2 + * http://jquery.com/ + * + * Copyright (c) 2009 John Resig + * Dual licensed under the MIT and GPL licenses. + * http://docs.jquery.com/License + * + * Date: 2009-02-19 17:34:21 -0500 (Thu, 19 Feb 2009) + * Revision: 6246 + */ +(function(){ + +var + // Will speed up references to window, and allows munging its name. + window = this, + // Will speed up references to undefined, and allows munging its name. + undefined, + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + // Map over the $ in case of overwrite + _$ = window.$, + + jQuery = window.jQuery = window.$ = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context ); + }, + + // A simple way to check for HTML strings or ID strings + // (both of which we optimize for) + quickExpr = /^[^<]*(<(.|\s)+>)[^>]*$|^#([\w-]+)$/, + // Is it a simple selector + isSimple = /^.[^:#\[\.,]*$/; + +jQuery.fn = jQuery.prototype = { + init: function( selector, context ) { + // Make sure that a selection was provided + selector = selector || document; + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this[0] = selector; + this.length = 1; + this.context = selector; + return this; + } + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + var match = quickExpr.exec( selector ); + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) + selector = jQuery.clean( [ match[1] ], context ); + + // HANDLE: $("#id") + else { + var elem = document.getElementById( match[3] ); + + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem && elem.id != match[3] ) + return jQuery().find( selector ); + + // Otherwise, we inject the element directly into the jQuery object + var ret = jQuery( elem || [] ); + ret.context = document; + ret.selector = selector; + return ret; + } + + // HANDLE: $(expr, [context]) + // (which is just equivalent to: $(content).find(expr) + } else + return jQuery( context ).find( selector ); + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) + return jQuery( document ).ready( selector ); + + // Make sure that old selector state is passed along + if ( selector.selector && selector.context ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return this.setArray(jQuery.isArray( selector ) ? + selector : + jQuery.makeArray(selector)); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.3.2", + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num === undefined ? + + // Return a 'clean' array + Array.prototype.slice.call( this ) : + + // Return just the object + this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = jQuery( elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) + ret.selector = this.selector + (this.selector ? " " : "") + selector; + else if ( name ) + ret.selector = this.selector + "." + name + "(" + selector + ")"; + + // Return the newly-formed element set + return ret; + }, + + // Force the current matched set of elements to become + // the specified array of elements (destroying the stack in the process) + // You should use pushStack() in order to do this, but maintain the stack + setArray: function( elems ) { + // Resetting the length to 0, then using the native Array push + // is a super-fast way to populate an object with array-like properties + this.length = 0; + Array.prototype.push.apply( this, elems ); + + return this; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem && elem.jquery ? elem[0] : elem + , this ); + }, + + attr: function( name, value, type ) { + var options = name; + + // Look for the case where we're accessing a style value + if ( typeof name === "string" ) + if ( value === undefined ) + return this[0] && jQuery[ type || "attr" ]( this[0], name ); + + else { + options = {}; + options[ name ] = value; + } + + // Check to see if we're setting style values + return this.each(function(i){ + // Set all the styles + for ( name in options ) + jQuery.attr( + type ? + this.style : + this, + name, jQuery.prop( this, options[ name ], type, i, name ) + ); + }); + }, + + css: function( key, value ) { + // ignore negative width and height values + if ( (key == 'width' || key == 'height') && parseFloat(value) < 0 ) + value = undefined; + return this.attr( key, value, "curCSS" ); + }, + + text: function( text ) { + if ( typeof text !== "object" && text != null ) + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + + var ret = ""; + + jQuery.each( text || this, function(){ + jQuery.each( this.childNodes, function(){ + if ( this.nodeType != 8 ) + ret += this.nodeType != 1 ? + this.nodeValue : + jQuery.fn.text( [ this ] ); + }); + }); + + return ret; + }, + + wrapAll: function( html ) { + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).clone(); + + if ( this[0].parentNode ) + wrap.insertBefore( this[0] ); + + wrap.map(function(){ + var elem = this; + + while ( elem.firstChild ) + elem = elem.firstChild; + + return elem; + }).append(this); + } + + return this; + }, + + wrapInner: function( html ) { + return this.each(function(){ + jQuery( this ).contents().wrapAll( html ); + }); + }, + + wrap: function( html ) { + return this.each(function(){ + jQuery( this ).wrapAll( html ); + }); + }, + + append: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.appendChild( elem ); + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function(elem){ + if (this.nodeType == 1) + this.insertBefore( elem, this.firstChild ); + }); + }, + + before: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this ); + }); + }, + + after: function() { + return this.domManip(arguments, false, function(elem){ + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + }, + + end: function() { + return this.prevObject || jQuery( [] ); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: [].push, + sort: [].sort, + splice: [].splice, + + find: function( selector ) { + if ( this.length === 1 ) { + var ret = this.pushStack( [], "find", selector ); + ret.length = 0; + jQuery.find( selector, this[0], ret ); + return ret; + } else { + return this.pushStack( jQuery.unique(jQuery.map(this, function(elem){ + return jQuery.find( selector, elem ); + })), "find", selector ); + } + }, + + clone: function( events ) { + // Do the clone + var ret = this.map(function(){ + if ( !jQuery.support.noCloneEvent && !jQuery.isXMLDoc(this) ) { + // IE copies events bound via attachEvent when + // using cloneNode. Calling detachEvent on the + // clone will also remove the events from the orignal + // In order to get around this, we use innerHTML. + // Unfortunately, this means some modifications to + // attributes in IE that are actually only stored + // as properties will not be copied (such as the + // the name attribute on an input). + var html = this.outerHTML; + if ( !html ) { + var div = this.ownerDocument.createElement("div"); + div.appendChild( this.cloneNode(true) ); + html = div.innerHTML; + } + + return jQuery.clean([html.replace(/ jQuery\d+="(?:\d+|null)"/g, "").replace(/^\s*/, "")])[0]; + } else + return this.cloneNode(true); + }); + + // Copy the events from the original to the clone + if ( events === true ) { + var orig = this.find("*").andSelf(), i = 0; + + ret.find("*").andSelf().each(function(){ + if ( this.nodeName !== orig[i].nodeName ) + return; + + var events = jQuery.data( orig[i], "events" ); + + for ( var type in events ) { + for ( var handler in events[ type ] ) { + jQuery.event.add( this, type, events[ type ][ handler ], events[ type ][ handler ].data ); + } + } + + i++; + }); + } + + // Return the cloned set + return ret; + }, + + filter: function( selector ) { + return this.pushStack( + jQuery.isFunction( selector ) && + jQuery.grep(this, function(elem, i){ + return selector.call( elem, i ); + }) || + + jQuery.multiFilter( selector, jQuery.grep(this, function(elem){ + return elem.nodeType === 1; + }) ), "filter", selector ); + }, + + closest: function( selector ) { + var pos = jQuery.expr.match.POS.test( selector ) ? jQuery(selector) : null, + closer = 0; + + return this.map(function(){ + var cur = this; + while ( cur && cur.ownerDocument ) { + if ( pos ? pos.index(cur) > -1 : jQuery(cur).is(selector) ) { + jQuery.data(cur, "closest", closer); + return cur; + } + cur = cur.parentNode; + closer++; + } + }); + }, + + not: function( selector ) { + if ( typeof selector === "string" ) + // test special case where just one selector is passed in + if ( isSimple.test( selector ) ) + return this.pushStack( jQuery.multiFilter( selector, this, true ), "not", selector ); + else + selector = jQuery.multiFilter( selector, this ); + + var isArrayLike = selector.length && selector[selector.length - 1] !== undefined && !selector.nodeType; + return this.filter(function() { + return isArrayLike ? jQuery.inArray( this, selector ) < 0 : this != selector; + }); + }, + + add: function( selector ) { + return this.pushStack( jQuery.unique( jQuery.merge( + this.get(), + typeof selector === "string" ? + jQuery( selector ) : + jQuery.makeArray( selector ) + ))); + }, + + is: function( selector ) { + return !!selector && jQuery.multiFilter( selector, this ).length > 0; + }, + + hasClass: function( selector ) { + return !!selector && this.is( "." + selector ); + }, + + val: function( value ) { + if ( value === undefined ) { + var elem = this[0]; + + if ( elem ) { + if( jQuery.nodeName( elem, 'option' ) ) + return (elem.attributes.value || {}).specified ? elem.value : elem.text; + + // We need to handle select boxes special + if ( jQuery.nodeName( elem, "select" ) ) { + var index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type == "select-one"; + + // Nothing was selected + if ( index < 0 ) + return null; + + // Loop through all the selected options + for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) { + var option = options[ i ]; + + if ( option.selected ) { + // Get the specifc value for the option + value = jQuery(option).val(); + + // We don't need an array for one selects + if ( one ) + return value; + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + } + + // Everything else, we just grab the value + return (elem.value || "").replace(/\r/g, ""); + + } + + return undefined; + } + + if ( typeof value === "number" ) + value += ''; + + return this.each(function(){ + if ( this.nodeType != 1 ) + return; + + if ( jQuery.isArray(value) && /radio|checkbox/.test( this.type ) ) + this.checked = (jQuery.inArray(this.value, value) >= 0 || + jQuery.inArray(this.name, value) >= 0); + + else if ( jQuery.nodeName( this, "select" ) ) { + var values = jQuery.makeArray(value); + + jQuery( "option", this ).each(function(){ + this.selected = (jQuery.inArray( this.value, values ) >= 0 || + jQuery.inArray( this.text, values ) >= 0); + }); + + if ( !values.length ) + this.selectedIndex = -1; + + } else + this.value = value; + }); + }, + + html: function( value ) { + return value === undefined ? + (this[0] ? + this[0].innerHTML.replace(/ jQuery\d+="(?:\d+|null)"/g, "") : + null) : + this.empty().append( value ); + }, + + replaceWith: function( value ) { + return this.after( value ).remove(); + }, + + eq: function( i ) { + return this.slice( i, +i + 1 ); + }, + + slice: function() { + return this.pushStack( Array.prototype.slice.apply( this, arguments ), + "slice", Array.prototype.slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function(elem, i){ + return callback.call( elem, i, elem ); + })); + }, + + andSelf: function() { + return this.add( this.prevObject ); + }, + + domManip: function( args, table, callback ) { + if ( this[0] ) { + var fragment = (this[0].ownerDocument || this[0]).createDocumentFragment(), + scripts = jQuery.clean( args, (this[0].ownerDocument || this[0]), fragment ), + first = fragment.firstChild; + + if ( first ) + for ( var i = 0, l = this.length; i < l; i++ ) + callback.call( root(this[i], first), this.length > 1 || i > 0 ? + fragment.cloneNode(true) : fragment ); + + if ( scripts ) + jQuery.each( scripts, evalScript ); + } + + return this; + + function root( elem, cur ) { + return table && jQuery.nodeName(elem, "table") && jQuery.nodeName(cur, "tr") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; + } + } +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +function evalScript( i, elem ) { + if ( elem.src ) + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + + else + jQuery.globalEval( elem.text || elem.textContent || elem.innerHTML || "" ); + + if ( elem.parentNode ) + elem.parentNode.removeChild( elem ); +} + +function now(){ + return +new Date; +} + +jQuery.extend = jQuery.fn.extend = function() { + // copy reference to target object + var target = arguments[0] || {}, i = 1, length = arguments.length, deep = false, options; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) + target = {}; + + // extend jQuery itself if only one argument is passed + if ( length == i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) + // Extend the base object + for ( var name in options ) { + var src = target[ name ], copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) + continue; + + // Recurse if we're merging object values + if ( deep && copy && typeof copy === "object" && !copy.nodeType ) + target[ name ] = jQuery.extend( deep, + // Never move original objects, clone them + src || ( copy.length != null ? [ ] : { } ) + , copy ); + + // Don't bring in undefined values + else if ( copy !== undefined ) + target[ name ] = copy; + + } + + // Return the modified object + return target; +}; + +// exclude the following css properties to add px +var exclude = /z-?index|font-?weight|opacity|zoom|line-?height/i, + // cache defaultView + defaultView = document.defaultView || {}, + toString = Object.prototype.toString; + +jQuery.extend({ + noConflict: function( deep ) { + window.$ = _$; + + if ( deep ) + window.jQuery = _jQuery; + + return jQuery; + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return toString.call(obj) === "[object Function]"; + }, + + isArray: function( obj ) { + return toString.call(obj) === "[object Array]"; + }, + + // check if an element is in a (or is an) XML document + isXMLDoc: function( elem ) { + return elem.nodeType === 9 && elem.documentElement.nodeName !== "HTML" || + !!elem.ownerDocument && jQuery.isXMLDoc( elem.ownerDocument ); + }, + + // Evalulates a script in a global context + globalEval: function( data ) { + if ( data && /\S/.test(data) ) { + // Inspired by code by Andrea Giammarchi + // http://webreflection.blogspot.com/2007/08/global-scope-evaluation-and-dom.html + var head = document.getElementsByTagName("head")[0] || document.documentElement, + script = document.createElement("script"); + + script.type = "text/javascript"; + if ( jQuery.support.scriptEval ) + script.appendChild( document.createTextNode( data ) ); + else + script.text = data; + + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709). + head.insertBefore( script, head.firstChild ); + head.removeChild( script ); + } + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() == name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, length = object.length; + + if ( args ) { + if ( length === undefined ) { + for ( name in object ) + if ( callback.apply( object[ name ], args ) === false ) + break; + } else + for ( ; i < length; ) + if ( callback.apply( object[ i++ ], args ) === false ) + break; + + // A special, fast, case for the most common use of each + } else { + if ( length === undefined ) { + for ( name in object ) + if ( callback.call( object[ name ], name, object[ name ] ) === false ) + break; + } else + for ( var value = object[0]; + i < length && callback.call( value, i, value ) !== false; value = object[++i] ){} + } + + return object; + }, + + prop: function( elem, value, type, i, name ) { + // Handle executable functions + if ( jQuery.isFunction( value ) ) + value = value.call( elem, i ); + + // Handle passing in a number to a CSS property + return typeof value === "number" && type == "curCSS" && !exclude.test( name ) ? + value + "px" : + value; + }, + + className: { + // internal only, use addClass("class") + add: function( elem, classNames ) { + jQuery.each((classNames || "").split(/\s+/), function(i, className){ + if ( elem.nodeType == 1 && !jQuery.className.has( elem.className, className ) ) + elem.className += (elem.className ? " " : "") + className; + }); + }, + + // internal only, use removeClass("class") + remove: function( elem, classNames ) { + if (elem.nodeType == 1) + elem.className = classNames !== undefined ? + jQuery.grep(elem.className.split(/\s+/), function(className){ + return !jQuery.className.has( classNames, className ); + }).join(" ") : + ""; + }, + + // internal only, use hasClass("class") + has: function( elem, className ) { + return elem && jQuery.inArray( className, (elem.className || elem).toString().split(/\s+/) ) > -1; + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( var name in options ) + elem.style[ name ] = old[ name ]; + }, + + css: function( elem, name, force, extra ) { + if ( name == "width" || name == "height" ) { + var val, props = { position: "absolute", visibility: "hidden", display:"block" }, which = name == "width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ]; + + function getWH() { + val = name == "width" ? elem.offsetWidth : elem.offsetHeight; + + if ( extra === "border" ) + return; + + jQuery.each( which, function() { + if ( !extra ) + val -= parseFloat(jQuery.curCSS( elem, "padding" + this, true)) || 0; + if ( extra === "margin" ) + val += parseFloat(jQuery.curCSS( elem, "margin" + this, true)) || 0; + else + val -= parseFloat(jQuery.curCSS( elem, "border" + this + "Width", true)) || 0; + }); + } + + if ( elem.offsetWidth !== 0 ) + getWH(); + else + jQuery.swap( elem, props, getWH ); + + return Math.max(0, Math.round(val)); + } + + return jQuery.curCSS( elem, name, force ); + }, + + curCSS: function( elem, name, force ) { + var ret, style = elem.style; + + // We need to handle opacity special in IE + if ( name == "opacity" && !jQuery.support.opacity ) { + ret = jQuery.attr( style, "opacity" ); + + return ret == "" ? + "1" : + ret; + } + + // Make sure we're using the right name for getting the float value + if ( name.match( /float/i ) ) + name = styleFloat; + + if ( !force && style && style[ name ] ) + ret = style[ name ]; + + else if ( defaultView.getComputedStyle ) { + + // Only "float" is needed here + if ( name.match( /float/i ) ) + name = "float"; + + name = name.replace( /([A-Z])/g, "-$1" ).toLowerCase(); + + var computedStyle = defaultView.getComputedStyle( elem, null ); + + if ( computedStyle ) + ret = computedStyle.getPropertyValue( name ); + + // We should always get a number back from opacity + if ( name == "opacity" && ret == "" ) + ret = "1"; + + } else if ( elem.currentStyle ) { + var camelCase = name.replace(/\-(\w)/g, function(all, letter){ + return letter.toUpperCase(); + }); + + ret = elem.currentStyle[ name ] || elem.currentStyle[ camelCase ]; + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !/^\d+(px)?$/i.test( ret ) && /^\d/.test( ret ) ) { + // Remember the original values + var left = style.left, rsLeft = elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + elem.runtimeStyle.left = elem.currentStyle.left; + style.left = ret || 0; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + elem.runtimeStyle.left = rsLeft; + } + } + + return ret; + }, + + clean: function( elems, context, fragment ) { + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + if ( !fragment && elems.length === 1 && typeof elems[0] === "string" ) { + var match = /^<(\w+)\s*\/?>$/.exec(elems[0]); + if ( match ) + return [ context.createElement( match[1] ) ]; + } + + var ret = [], scripts = [], div = context.createElement("div"); + + jQuery.each(elems, function(i, elem){ + if ( typeof elem === "number" ) + elem += ''; + + if ( !elem ) + return; + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(/(<(\w+)[^>]*?)\/>/g, function(all, front, tag){ + return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i) ? + all : + front + ">"; + }); + + // Trim whitespace, otherwise indexOf won't work as expected + var tags = elem.replace(/^\s+/, "").substring(0, 10).toLowerCase(); + + var wrap = + // option or optgroup + !tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + tags.match(/^<(thead|tbody|tfoot|colg|cap)/) && + [ 1, "
      ", "
      " ] || + + !tags.indexOf("", "" ] || + + // matched above + (!tags.indexOf("", "" ] || + + !tags.indexOf("", "" ] || + + // IE can't serialize and + + +

      +

      + FanFictionDownLoader +

      + +
      + + +
      + + {% if fic.failure %} +
      + {{ fic.failure }} +
      + {% endif %} +
      + + +
      + + {% if is_login %} + +

      Login and Password

      +
      + {{ site }} requires a Login/Password for this story. + You need to provide your Login/Password for {{ site }} + to download it. +
      +
      +
      Login
      +
      +
      + +
      +
      Password
      +
      +
      + + {% else %} + + + +
      +
      Are you an Adult?
      +
      + + {% endif %} + +
      + +
      + +
      +
      + +
      + Powered by Google App Engine +

      + This is a web front-end to FanFictionDownLoader
      + Copyright © FanFictionDownLoader team +
      + +
      + + +
      +
      + + diff --git a/main.py b/main.py new file mode 100644 index 00000000..ae3b9757 --- /dev/null +++ b/main.py @@ -0,0 +1,621 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google Inc. +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +logging.getLogger().setLevel(logging.DEBUG) + +import os +from os.path import dirname, basename, normpath +import re +import sys +import zlib +import urllib +import datetime + +import traceback +from StringIO import StringIO +import ConfigParser + +## Just to shut up the appengine warning about "You are using the +## default Django version (0.96). The default Django version will +## change in an App Engine release in the near future. Please call +## use_library() to explicitly select a Django version. For more +## information see +## http://code.google.com/appengine/docs/python/tools/libraries.html#Django" +## Note that if you are using the SDK App Engine Launcher and hit an SDK +## Console page first, you will get a django version mismatch error when you +## to go hit one of the application pages. Just change a file again, and +## make sure to hit an app page before the SDK page to clear it. +#os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' +#from google.appengine.dist import use_library +#use_library('django', '1.2') + +from google.appengine.ext import db +from google.appengine.api import taskqueue +from google.appengine.api import users +#from google.appengine.ext import webapp +import webapp2 +from google.appengine.ext.webapp import template +#from google.appengine.ext.webapp2 import util +from google.appengine.runtime import DeadlineExceededError + +from ffstorage import * + +from fanficdownloader import adapters, writers, exceptions + +class UserConfigServer(webapp2.RequestHandler): + def getUserConfig(self,user): + config = ConfigParser.SafeConfigParser() + + logging.debug('reading defaults.ini config file') + config.read('defaults.ini') + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l and l[0].config: + uconfig=l[0] + #logging.debug('reading config from UserConfig(%s)'%uconfig.config) + config.readfp(StringIO(uconfig.config)) + + return config + +class MainHandler(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if user: + error = self.request.get('error') + template_values = {'nickname' : user.nickname(), 'authorized': True} + url = self.request.get('url') + template_values['url'] = url + + if error: + if error == 'login_required': + template_values['error_message'] = 'This story (or one of the chapters) requires you to be logged in.' + elif error == 'bad_url': + template_values['error_message'] = 'Unsupported URL: ' + url + elif error == 'custom': + template_values['error_message'] = 'Error happened: ' + self.request.get('errtext') + elif error == 'configsaved': + template_values['error_message'] = 'Configuration Saved' + elif error == 'recentcleared': + template_values['error_message'] = 'Your Recent Downloads List has been Cleared' + + filename = self.request.get('file') + if len(filename) > 1: + template_values['yourfile'] = '''''' % (filename, self.request.get('name'), self.request.get('author')) + + self.response.headers['Content-Type'] = 'text/html' + path = os.path.join(os.path.dirname(__file__), 'index.html') + + self.response.out.write(template.render(path, template_values)) + else: + logging.debug(users.create_login_url('/')) + url = users.create_login_url(self.request.uri) + template_values = {'login_url' : url, 'authorized': False} + path = os.path.join(os.path.dirname(__file__), 'index.html') + self.response.out.write(template.render(path, template_values)) + + +class EditConfigServer(UserConfigServer): + def get(self): + self.post() + + def post(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + template_values = {'nickname' : user.nickname(), 'authorized': True} + + ## Pull user's config record. + l = UserConfig.all().filter('user =', user).fetch(1) + if l: + uconfig=l[0] + else: + uconfig=None + + if self.request.get('update'): + if uconfig is None: + uconfig = UserConfig() + uconfig.user = user + uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case. + uconfig.put() + try: + config = self.getUserConfig(user) + self.redirect("/?error=configsaved") + except Exception, e: + logging.info("Saved Config Failed:%s"%e) + self.redirect("/?error=custom&errtext=%s"%urlEscape(str(e))) + else: # not update, assume display for edit + if uconfig is not None and uconfig.config: + config = uconfig.config + else: + configfile = open("example.ini","rb") + config = configfile.read() + configfile.close() + template_values['config'] = config + + configfile = open("defaults.ini","rb") + config = configfile.read() + configfile.close() + template_values['defaultsini'] = config + + path = os.path.join(os.path.dirname(__file__), 'editconfig.html') + self.response.headers['Content-Type'] = 'text/html' + self.response.out.write(template.render(path, template_values)) + + +class FileServer(webapp2.RequestHandler): + + def get(self): + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + return + + try: + download = getDownloadMeta(id=fileId) + + name = download.name.encode('utf-8') + + logging.info("Serving file: %s" % name) + + if name.endswith('.epub'): + self.response.headers['Content-Type'] = 'application/epub+zip' + elif name.endswith('.html'): + self.response.headers['Content-Type'] = 'text/html' + elif name.endswith('.txt'): + self.response.headers['Content-Type'] = 'text/plain' + elif name.endswith('.mobi'): + self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' + elif name.endswith('.zip'): + self.response.headers['Content-Type'] = 'application/zip' + else: + self.response.headers['Content-Type'] = 'application/octet-stream' + + self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name + + data = DownloadData.all().filter("download =", download).order("index") + # epubs are all already compressed. + # Each chunk is compress individually to avoid having + # to hold the whole in memory just for the + # compress/uncompress + if download.format != 'epub': + def dc(data): + try: + return zlib.decompress(data) + # if error, assume it's a chunk from before we started compessing. + except zlib.error: + return data + else: + def dc(data): + return data + + for datum in data: + self.response.out.write(dc(datum.blob)) + + except Exception, e: + fic = DownloadMeta() + fic.failure = unicode(e) + + template_values = dict(fic = fic, + #nickname = user.nickname(), + #escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + +class FileStatusServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + + escaped_url=False + + try: + download = getDownloadMeta(id=fileId) + + if download: + logging.info("Status url: %s" % download.url) + if download.completed and download.format=='epub': + escaped_url = urlEscape(self.request.host_url+"/file/"+download.name+"."+download.format+"?id="+fileId+"&fake=file."+download.format) + else: + download = DownloadMeta() + download.failure = "Download not found" + + except Exception, e: + download = DownloadMeta() + download.failure = unicode(e) + + template_values = dict(fic = download, + nickname = user.nickname(), + escaped_url = escaped_url + ) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + +class ClearRecentServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + logging.info("Clearing Recent List for user: "+user.nickname()) + q = DownloadMeta.all() + q.filter('user =', user) + num=0 + while( True ): + results = q.fetch(100) + if results: + for d in results: + d.delete() + for c in d.data_chunks: + c.delete() + num = num + 1 + logging.debug('Delete '+d.url) + else: + break + logging.info('Deleted %d instances download.' % num) + self.redirect("/?error=recentcleared") + +class RecentFilesServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + q = DownloadMeta.all() + q.filter('user =', user).order('-date') + fics = q.fetch(100) + logging.info("Recent fetched %d downloads for user %s."%(len(fics),user.nickname())) + + for fic in fics: + if fic.completed and fic.format == 'epub': + fic.escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+str(fic.key())+"&fake=file."+fic.format) + + template_values = dict(fics = fics, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'recent.html') + self.response.out.write(template.render(path, template_values)) + +class AllRecentFilesServer(webapp2.RequestHandler): + def get(self): + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + q = SavedMeta.all() + if self.request.get('bydate'): + q.order('-date') + else: + q.order('-count') + + fics = q.fetch(200) + logging.info("Recent fetched %d downloads for user %s."%(len(fics),user.nickname())) + + sendslugs = [] + + for fic in fics: + ficslug = FicSlug(fic) + sendslugs.append(ficslug) + + template_values = dict(fics = sendslugs, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'allrecent.html') + self.response.out.write(template.render(path, template_values)) + +class FicSlug(): + def __init__(self,savedmeta): + self.url = savedmeta.url + self.count = savedmeta.count + for k, v in savedmeta.meta.iteritems(): + setattr(self,k,v) + +class FanfictionDownloader(UserConfigServer): + def get(self): + self.post() + + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + user = users.get_current_user() + if not user: + self.redirect(users.create_login_url(self.request.uri)) + return + + format = self.request.get('format') + url = self.request.get('url') + + if not url or url.strip() == "": + self.redirect('/') + return + + logging.info("Queuing Download: %s" % url) + login = self.request.get('login') + password = self.request.get('password') + is_adult = self.request.get('is_adult') == "on" + + # use existing record if available. Fetched/Created before + # the adapter can normalize the URL in case we need to record + # an exception. + download = getDownloadMeta(url=url,user=user,format=format,new=True) + + adapter = None + try: + try: + config = self.getUserConfig(user) + except Exception, e: + self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e))) + return + + adapter = adapters.getAdapter(config,url,format) + logging.info('Created an adaper: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password + adapter.is_adult=is_adult + + ## This scrapes the metadata, which will be + ## duplicated in the queue task, but it + ## detects bad URLs, bad login, bad story, etc + ## without waiting for the queue. So I think + ## it's worth the double up. Could maybe save + ## it all in the download object someday. + story = adapter.getStoryMetadataOnly() + + ## Fetch again using normalized story URL. The one + ## fetched/created above, if different, will not be saved. + download = getDownloadMeta(url=story.getMetadata('storyUrl'), + user=user,format=format,new=True) + + download.title = story.getMetadata('title') + download.author = story.getMetadata('author') + download.url = story.getMetadata('storyUrl') + download.put() + + taskqueue.add(url='/fdowntask', + queue_name="download", + params={'id':str(download.key()), + 'format':format, + 'url':download.url, + 'login':login, + 'password':password, + 'user':user.email(), + 'is_adult':is_adult}) + + logging.info("enqueued download key: " + str(download.key())) + + except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e: + download.failure = unicode(e) + download.put() + logging.info(unicode(e)) + is_login= ( isinstance(e, exceptions.FailedToLogin) ) + template_values = dict(nickname = user.nickname(), + url = url, + format = format, + site = adapter.getConfigSection(), + fic = download, + is_login=is_login, + ) + # thewriterscoffeeshop.com can do adult check *and* user required. + if isinstance(e,exceptions.AdultCheckRequired): + template_values['login']=login + template_values['password']=password + + path = os.path.join(os.path.dirname(__file__), 'login.html') + self.response.out.write(template.render(path, template_values)) + return + except (exceptions.InvalidStoryURL,exceptions.UnknownSite,exceptions.StoryDoesNotExist), e: + logging.warn(unicode(e)) + download.failure = unicode(e) + download.put() + except Exception, e: + logging.error("Failure Queuing Download: url:%s" % url) + logging.exception(e) + download.failure = unicode(e) + download.put() + + self.redirect('/status?id='+str(download.key())) + + return + + +class FanfictionDownloaderTask(UserConfigServer): + + def post(self): + logging.getLogger().setLevel(logging.DEBUG) + fileId = self.request.get('id') + # User object can't pass, just email address + user = users.User(self.request.get('user')) + format = self.request.get('format') + url = self.request.get('url') + login = self.request.get('login') + password = self.request.get('password') + is_adult = self.request.get('is_adult') + + logging.info("Downloading: " + url + " for user: "+user.nickname()) + logging.info("ID: " + fileId) + + adapter = None + writerClass = None + + # use existing record if available. + # fileId should have record from /fdown. + download = getDownloadMeta(id=fileId,url=url,user=user,format=format,new=True) + for c in download.data_chunks: + c.delete() + download.put() + + logging.info('Creating adapter...') + + try: + config = self.getUserConfig(user) + adapter = adapters.getAdapter(config,url,format) + + logging.info('Created an adapter: %s' % adapter) + + if len(login) > 1: + adapter.username=login + adapter.password=password + adapter.is_adult=is_adult + + # adapter.getStory() is what does all the heavy lifting. + # adapter.getStoryMetadataOnly() only fetches enough to + # get metadata. writer.writeStory() will call + # adapter.getStory(), too. + writer = writers.getWriter(format,config,adapter) + download.name = writer.getOutputFileName() + #logging.debug('output_filename:'+writer.getConfig('output_filename')) + logging.debug('getOutputFileName:'+writer.getOutputFileName()) + download.title = adapter.getStory().getMetadata('title') + download.author = adapter.getStory().getMetadata('author') + download.url = adapter.getStory().getMetadata('storyUrl') + download.put() + + allmeta = adapter.getStory().getAllMetadata(removeallentities=True,doreplacements=False) + + outbuffer = StringIO() + writer.writeStory(outbuffer) + data = outbuffer.getvalue() + outbuffer.close() + del outbuffer + #del writer.adapter + #del writer.story + del writer + #del adapter.story + del adapter + + # epubs are all already compressed. Each chunk is + # compressed individually to avoid having to hold the + # whole in memory just for the compress/uncompress. + if format != 'epub': + def c(data): + return zlib.compress(data) + else: + def c(data): + return data + + index=0 + while( len(data) > 0 ): + DownloadData(download=download, + index=index, + blob=c(data[:1000000])).put() + index += 1 + data = data[1000000:] + download.completed=True + download.put() + + smetal = SavedMeta.all().filter('url =', allmeta['storyUrl'] ).fetch(1) + if smetal and smetal[0]: + smeta = smetal[0] + smeta.count += 1 + else: + smeta=SavedMeta() + smeta.count = 1 + + smeta.url = allmeta['storyUrl'] + smeta.title = allmeta['title'] + smeta.author = allmeta['author'] + smeta.meta = allmeta + smeta.date = datetime.datetime.now() + smeta.put() + + logging.info("Download finished OK") + del data + + except Exception, e: + logging.exception(e) + download.failure = unicode(e) + download.put() + return + + return + +def getDownloadMeta(id=None,url=None,user=None,format=None,new=False): + ## try to get download rec from passed id first. then fall back + ## to user/url/format + download = None + if id: + try: + download = db.get(db.Key(id)) + logging.info("DownloadMeta found by ID:"+id) + except: + pass + + if not download and url and user and format: + try: + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is not None and len(q) > 0 ): + logging.debug("DownloadMeta found by user:%s url:%s format:%s"%(user,url,format)) + download = q[0] + except: + pass + + if new: + # NOT clearing existing chunks here, because this record may + # never be saved. + if not download: + logging.debug("New DownloadMeta") + download = DownloadMeta() + + download.completed=False + download.failure=None + download.date=datetime.datetime.now() + + download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID']) + if user: + download.user = user + if url: + download.url = url + if format: + download.format = format + + return download + +def toPercentDecimal(match): + "Return the %decimal number for the character for url escaping" + s = match.group(1) + return "%%%02x" % ord(s) + +def urlEscape(data): + "Escape text, including unicode, for use in URLs" + p = re.compile(r'([^\w])') + return p.sub(toPercentDecimal, data.encode("utf-8")) + +logging.getLogger().setLevel(logging.DEBUG) +app = webapp2.WSGIApplication([('/', MainHandler), + ('/fdowntask', FanfictionDownloaderTask), + ('/fdown', FanfictionDownloader), + (r'/file.*', FileServer), + ('/status', FileStatusServer), + ('/allrecent', AllRecentFilesServer), + ('/recent', RecentFilesServer), + ('/editconfig', EditConfigServer), + ('/clearrecent', ClearRecentServer), + ], + debug=False) diff --git a/makeplugin.py b/makeplugin.py new file mode 100644 index 00000000..e4abac41 --- /dev/null +++ b/makeplugin.py @@ -0,0 +1,38 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from glob import glob + +from makezip import createZipFile + +if __name__=="__main__": + filename="FanFictionDownLoader.zip" + exclude=['*.pyc','*~','*.xcf'] + # from top dir. 'w' for overwrite + createZipFile(filename,"w", + ['plugin-defaults.ini','plugin-example.ini','epubmerge.py','fanficdownloader'], + exclude=exclude) + #from calibre-plugin dir. 'a' for append + os.chdir('calibre-plugin') + files=['about.txt','images',] + files.extend(glob('*.py')) + files.extend(glob('plugin-import-name-*.txt')) + createZipFile("../"+filename,"a", + files,exclude=exclude) diff --git a/makezip.py b/makezip.py new file mode 100644 index 00000000..55a10197 --- /dev/null +++ b/makezip.py @@ -0,0 +1,54 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# epubmerge.py 1.0 + +# Copyright 2011, Jim Miller + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, zipfile, sys +from glob import glob + +def addFolderToZip(myZipFile,folder,exclude=[]): + folder = folder.encode('ascii') #convert path to ascii for ZipFile Method + excludelist=[] + for ex in exclude: + excludelist.extend(glob(folder+"/"+ex)) + for file in glob(folder+"/*"): + if file in excludelist: + continue + if os.path.isfile(file): + #print file + myZipFile.write(file, file, zipfile.ZIP_DEFLATED) + elif os.path.isdir(file): + addFolderToZip(myZipFile,file,exclude=exclude) + +def createZipFile(filename,mode,files,exclude=[]): + myZipFile = zipfile.ZipFile( filename, mode ) # Open the zip file for writing + excludelist=[] + for ex in exclude: + excludelist.extend(glob(ex)) + for file in files: + if file in excludelist: + continue + file = file.encode('ascii') #convert path to ascii for ZipFile Method + if os.path.isfile(file): + (filepath, filename) = os.path.split(file) + #print file + myZipFile.write( file, filename, zipfile.ZIP_DEFLATED ) + if os.path.isdir(file): + addFolderToZip(myZipFile,file,exclude=exclude) + myZipFile.close() + return (1,filename) + diff --git a/plugin-defaults.ini b/plugin-defaults.ini new file mode 100644 index 00000000..05961eed --- /dev/null +++ b/plugin-defaults.ini @@ -0,0 +1,502 @@ +# Copyright 2012 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +[defaults] + +## [defaults] section applies to all formats and sites but may be +## overridden at several levels + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +## All available titlepage_entries and the label used for them: +## _label:
      -

      New Site

      +

      New Sites

      - Support for the Wonderful World of MakeBelieve(WWOMB) archive at -http://www.squidge.org/peja/cgi-bin/index.php -has been added. This does not support other sections of -www.squidge.org, or the other files under www.squidge.org/peja that -aren't in the eFiction instance. + We now support www.wraithbait.com and + www.libraryofmoria.com, thanks to Ida for adding these! +

      +

      + Support for the Wonderful World of MakeBelieve(WWOMB) + archive at + http://www.squidge.org/peja/cgi-bin/index.php + has also been added. This does not support other sections of + www.squidge.org, or the other files under www.squidge.org/peja that + aren't in the eFiction instance.

      Questions? Check out our @@ -295,10 +300,21 @@ aren't in the eFiction instance. Use the URL of the story's summary, such as
      http://archive.skyehawke.com/story.php?no=17466. +

      www.libraryofmoria.com
      +
      + Use the URL of the story's summary, such as +
      http://www.libraryofmoria.com/a/viewstory.php?sid=434. +
      +
      www.wraithbait.com
      +
      + Use the URL of the story's summary, such as +
      http://www.wraithbait.com/viewstory.php?sid=14305. +
      www.squidge.org/peja (WWOMB)
      Use the URL of the story's summary, such as -
      http://www.squidge.org/peja/cgi-bin/viewstory.php?sid=1234. +
      http://www.squidge.org/peja/cgi-bin/viewstory.php?sid=1234.
      + This is only for squidge.org/peja, not other parts of squidge.org.

      diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 05961eed..69ae1cf3 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -413,6 +413,12 @@ extratags: ## this should go in your personal.ini, not defaults.ini. #is_adult:true +[www.libraryofmoria.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + [www.mediaminer.org] [www.midnightwhispers.ca] @@ -429,6 +435,17 @@ cover_exclusion_regexp:/stories/999/images/.*?_trophy.png [www.siye.co.uk] +[www.squidge.org/peja] +# www.squidge.org/peja calls it Fandom +category_label:Fandom +# Remove numWords -- www.squidge.org/peja word counts are inaccurate +titlepage_entries: series,category,genre,language,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,site,description + +[www.squidge.org/peja:txt] +## Add URLs since there aren't links. +# Remove numWords -- www.squidge.org/peja word counts are inaccurate +titlepage_entries: series,category,genre,language,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,site,storyUrl, authorUrl, description + [www.thewriterscoffeeshop.com] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In @@ -482,16 +499,11 @@ collect_series: false [www.whofic.com] -[www.squidge.org/peja] -# www.squidge.org/peja calls it Fandom -category_label:Fandom -# Remove numWords -- www.squidge.org/peja word counts are inaccurate -titlepage_entries: series,category,genre,language,characters,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,site,description - -[www.squidge.org/peja:txt] -## Add URLs since there aren't links. -# Remove numWords -- www.squidge.org/peja word counts are inaccurate -titlepage_entries: series,category,genre,language,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,site,storyUrl, authorUrl, description +[www.wraithbait.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true [overrides] ## It may sometimes be useful to override all of the specific format, From 6413c52584c2732088e108ebf8d76197ea1588dd Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sat, 12 May 2012 18:48:40 -0500 Subject: [PATCH 461/482] Added tag calibre-plugin-1.5.20 for changeset 5b585e031b72 From c4bbf3f0721f9515cb6611270bd64cb65f8d5822 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sat, 12 May 2012 18:49:02 -0500 Subject: [PATCH 462/482] Added tag FanFictionDownLoader-4.4.9 for changeset 5b585e031b72 From 5072572f1c523dc38dc0c879b8ebd05d527f48a3 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 18 May 2012 12:11:16 -0500 Subject: [PATCH 463/482] Plugin only, optionally search epub text for story URL, option to GC only new. --- calibre-plugin/__init__.py | 2 +- calibre-plugin/config.py | 16 +++++++++++++++ calibre-plugin/ffdl_plugin.py | 14 ++++++++----- fanficdownloader/epubutils.py | 37 +++++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 6 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 44a7dda2..a9b76e38 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 20) + version = (1, 5, 21) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py index 336eb937..6f97fb3e 100644 --- a/calibre-plugin/config.py +++ b/calibre-plugin/config.py @@ -47,6 +47,7 @@ all_prefs.defaults['collision'] = OVERWRITE all_prefs.defaults['deleteotherforms'] = False all_prefs.defaults['adddialogstaysontop'] = False all_prefs.defaults['includeimages'] = False +all_prefs.defaults['lookforurlinhtml'] = False all_prefs.defaults['send_lists'] = '' all_prefs.defaults['read_lists'] = '' @@ -54,6 +55,7 @@ all_prefs.defaults['addtolists'] = False all_prefs.defaults['addtoreadlists'] = False all_prefs.defaults['addtolistsonread'] = False +all_prefs.defaults['gcnewonly'] = False all_prefs.defaults['gc_site_settings'] = {} all_prefs.defaults['allow_gc_from_ini'] = True @@ -72,6 +74,8 @@ copylist = ['personal.ini', 'deleteotherforms', 'adddialogstaysontop', 'includeimages', + 'lookforurlinhtml', + 'gcnewonly', 'gc_site_settings', 'allow_gc_from_ini'] @@ -176,6 +180,7 @@ class ConfigWidget(QWidget): prefs['deleteotherforms'] = self.basic_tab.deleteotherforms.isChecked() prefs['adddialogstaysontop'] = self.basic_tab.adddialogstaysontop.isChecked() prefs['includeimages'] = self.basic_tab.includeimages.isChecked() + prefs['lookforurlinhtml'] = self.basic_tab.lookforurlinhtml.isChecked() if self.readinglist_tab: # lists @@ -196,6 +201,7 @@ class ConfigWidget(QWidget): prefs['personal.ini'] = get_resources('plugin-example.ini') # Generate Covers tab + prefs['gcnewonly'] = self.generatecover_tab.gcnewonly.isChecked() gc_site_settings = {} for (site,combo) in self.generatecover_tab.gc_dropdowns.iteritems(): val = unicode(combo.itemData(combo.currentIndex()).toString()) @@ -309,6 +315,11 @@ class BasicTab(QWidget): self.includeimages.setChecked(prefs['includeimages']) self.l.addWidget(self.includeimages) + self.lookforurlinhtml = QCheckBox("Search EPUB text for Story URL?",self) + self.lookforurlinhtml.setToolTip("Look for first valid story URL inside EPUB text if not found in metadata.\nSomewhat risky, could find wrong URL depending on EPUB content.\nAlso finds and corrects bad ffnet URLs from ficsaver.com files.") + self.lookforurlinhtml.setChecked(prefs['lookforurlinhtml']) + self.l.addWidget(self.lookforurlinhtml) + self.l.insertStretch(-1) def set_collisions(self): @@ -511,6 +522,11 @@ class GenerateCoverTab(QWidget): horz.addWidget(dropdown) self.sl.addLayout(horz) + self.gcnewonly = QCheckBox("Run Generate Cover Only on New Books",self) + self.gcnewonly.setToolTip("Default is to run GC any time the calibre metadata is updated.") + self.gcnewonly.setChecked(prefs['gcnewonly']) + self.l.addWidget(self.gcnewonly) + self.allow_gc_from_ini = QCheckBox('Allow generate_cover_settings from personal.ini to override.',self) self.allow_gc_from_ini.setToolTip("The INI parameter generate_cover_settings allows you to choose a GC setting based on metadata rather than site,\nbut it's much more complex. generate_cover_settings is ignored when this is off.") self.allow_gc_from_ini.setChecked(prefs['allow_gc_from_ini']) diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index a2d2e811..c142f0ad 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -36,7 +36,7 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML -from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount +from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount, get_story_url_from_html from calibre_plugins.fanfictiondownloader_plugin.config import (prefs, permitted_values) from calibre_plugins.fanfictiondownloader_plugin.dialogs import ( @@ -815,8 +815,8 @@ make_firstimage_cover:true db.commit() - if 'Generate Cover' in self.gui.iactions: - + print("book['added']:%s"%book['added']) + if 'Generate Cover' in self.gui.iactions and (book['added'] or not prefs['gcnewonly']): gc_plugin = self.gui.iactions['Generate Cover'] setting_name = None if prefs['allow_gc_from_ini']: @@ -1018,8 +1018,12 @@ make_firstimage_cover:true if 'url' in identifiers: #print("url from epub:"+identifiers['url'].replace('|',':')) return identifiers['url'].replace('|',':') - # look for dc:source - return get_dcsource(existingepub) + # look for dc:source first, then scan HTML if + link = get_dcsource(existingepub) + if link: + return link + elif prefs['lookforurlinhtml']: + return get_story_url_from_html(existingepub,self._is_good_downloader_url) return None def _is_good_downloader_url(self,url): diff --git a/fanficdownloader/epubutils.py b/fanficdownloader/epubutils.py index 8fcf30f1..845f3597 100644 --- a/fanficdownloader/epubutils.py +++ b/fanficdownloader/epubutils.py @@ -94,3 +94,40 @@ def get_path_part(n): if( len(relpath) > 0 ): relpath=relpath+"/" return relpath + +def get_story_url_from_html(inputio,_is_good_url=None): + + #print("get_story_url_from_html called") + epub = ZipFile(inputio, 'r') + + ## Find the .opf file. + container = epub.read("META-INF/container.xml") + containerdom = parseString(container) + rootfilenodelist = containerdom.getElementsByTagName("rootfile") + rootfilename = rootfilenodelist[0].getAttribute("full-path") + + contentdom = parseString(epub.read(rootfilename)) + #firstmetadom = contentdom.getElementsByTagName("metadata")[0] + + ## Save the path to the .opf file--hrefs inside it are relative to it. + relpath = get_path_part(rootfilename) + + # spin through the manifest--only place there are item tags. + for item in contentdom.getElementsByTagName("item"): + # First, count the 'chapter' files. FFDL uses file0000.xhtml, + # but can also update epubs downloaded from Twisting the + # Hellmouth, which uses chapter0.html. + #print("---- item:%s"%item) + if( item.getAttribute("media-type") == "application/xhtml+xml" ): + filehref=relpath+item.getAttribute("href") + soup = bs.BeautifulSoup(epub.read(filehref).decode("utf-8")) + for link in soup.findAll('a',href=re.compile(r'^http.*')): + ahref=link['href'] + #print("href:(%s)"%ahref) + # hack for bad ficsaver ffnet URLs. + m = re.match(r"^http://www.fanfiction.net/s(?P\d+)//$",ahref) + if m != None: + ahref="http://www.fanfiction.net/s/%s/1/"%m.group('id') + if _is_good_url == None or _is_good_url(ahref): + return ahref + return None From d87e79d37961cde5dd776f7754e17c0f63e2b95c Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 18 May 2012 12:11:32 -0500 Subject: [PATCH 464/482] Added tag calibre-plugin-1.5.21 for changeset 831c272dde3a From 73cefeb85864d84fd99fdb1777776bc463e02a17 Mon Sep 17 00:00:00 2001 From: Ida Date: Fri, 18 May 2012 15:37:29 -0400 Subject: [PATCH 465/482] First version of Checkmated.com adapter --- .../adapters/adapter_checkmatedcom.py | 238 ++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 fanficdownloader/adapters/adapter_checkmatedcom.py diff --git a/fanficdownloader/adapters/adapter_checkmatedcom.py b/fanficdownloader/adapters/adapter_checkmatedcom.py new file mode 100644 index 00000000..40dda928 --- /dev/null +++ b/fanficdownloader/adapters/adapter_checkmatedcom.py @@ -0,0 +1,238 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + + +def getClass(): + return CheckmatedComAdapter + + +class CheckmatedComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + self._setURL('http://' + self.getSiteDomain() + '/story.php?story='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','chm') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Harry Potter") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%b %d, %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.checkmated.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/story.php?story=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/story.php?story=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. This story is in The Bedchamber + def needToLoginCheck(self, data): + if 'This story is in The Bedchamber' in data \ + or 'That username is not in our database' in data \ + or "That password is not correct, please try again" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['name'] = self.username + params['pass'] = self.password + else: + params['name'] = self.getConfig("username") + params['pass'] = self.getConfig("password") + params['login'] = 'yes' + params['submit'] = 'login' + + loginUrl = 'http://' + self.getSiteDomain()+'/login.php' + d = self._fetchUrl(loginUrl,params) + e = self._fetchUrl(url) + + if "Welcome back," not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['name'])) + raise exceptions.FailedToLogin(url,params['name']) + return False + elif "This story is in The Bedchamber" in e: + logging.info("Your account does not have sufficient priviliges to read this story.") + return False + else: + return True + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.findAll('span', {'class' : 'storytitle'}) + self.story.setMetadata('title',a[0].string) + + # Find authorid and URL from... author url. + a = a[1].find('a', href=re.compile(r"authors.php\?name\=\w+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + a = soup.find('select', {'name' : 'chapter'}) + if a == None: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + for chapter in a.findAll('option'): + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/story.php?story='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + + # website does not keep track of word count, and there is no convenient way to calculate it + + summary = soup.find('fieldset') + summary.find('legend').extract() + self.setDescription(url,summary) + + + # Rated: NC-17
      etc + table = soup.findAll('div', {'class' : 'text'})[1] + for labels in table.findAll('tr'): + value = labels.findAll('td')[1] + label = labels.findAll('td')[0] + + + if 'Rating' in stripHTML(label): + self.story.setMetadata('rating', stripHTML(value)) + + if 'Ship' in stripHTML(label): + for char in value.string.split('/'): + if char != 'none': + self.story.addToList('characters',char) + + if 'Status' in stripHTML(label): + if value.find('img', {'src' : 'img/incomplete.gif'}) == None: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in stripHTML(label): + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in stripHTML(label): + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + a = self._fetchUrl(self.story.getMetadata('authorUrl')+'&cat=stories') + for story in bs.BeautifulSoup(a).findAll('table', {'class' : 'storyinfo'}): + a = story.find('a', href=re.compile(r"review.php\?s\="+self.story.getMetadata('storyId')+'&act=view')) + if a != None: + for labels in story.findAll('tr'): + value = labels.findAll('td')[1] + label = labels.findAll('td')[0] + if 'genre' in stripHTML(label): + for genre in value.findAll('img'): + self.story.addToList('genre',genre['title']) + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'resizeableText'}) + div.find('div', {'class' : 'storyTools'}).extract() + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) From a67e3a7234abc4966f4f5f7f4be2b5dc6edb842d Mon Sep 17 00:00:00 2001 From: Ida Date: Fri, 18 May 2012 16:34:40 -0400 Subject: [PATCH 466/482] Quick change for the summary colleciton in checkmated.com --- fanficdownloader/adapters/adapter_checkmatedcom.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fanficdownloader/adapters/adapter_checkmatedcom.py b/fanficdownloader/adapters/adapter_checkmatedcom.py index 40dda928..3b6888a7 100644 --- a/fanficdownloader/adapters/adapter_checkmatedcom.py +++ b/fanficdownloader/adapters/adapter_checkmatedcom.py @@ -179,6 +179,7 @@ class CheckmatedComAdapter(BaseSiteAdapter): summary = soup.find('fieldset') summary.find('legend').extract() + summary.name='div' self.setDescription(url,summary) From 86375876a45a2ed3f5735a11cb78e302ee68430f Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 18 May 2012 15:45:44 -0500 Subject: [PATCH 467/482] Integrate new adapter www.checkmated.com. --- app.yaml | 2 +- calibre-plugin/__init__.py | 2 +- defaults.ini | 8 ++++++++ fanficdownloader/adapters/__init__.py | 1 + index.html | 20 ++++++++------------ plugin-defaults.ini | 8 ++++++++ 6 files changed, 27 insertions(+), 14 deletions(-) diff --git a/app.yaml b/app.yaml index a0db5945..49b3efd4 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanfictiondownloader application: fanfictiondownloader -version: 4-4-9 +version: 4-4-10 runtime: python27 api_version: 1 threadsafe: true diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index a9b76e38..cf2778b2 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 21) + version = (1, 5, 22) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/defaults.ini b/defaults.ini index eae7928c..c38fc118 100644 --- a/defaults.ini +++ b/defaults.ini @@ -382,6 +382,14 @@ extratags: FanFiction,Testing,HTML ## personal.ini, not defaults.ini. #is_adult:true +[www.checkmated.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + [www.fanfiction.net] [www.ficbook.net] diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py index 741e3beb..c534b27f 100644 --- a/fanficdownloader/adapters/__init__.py +++ b/fanficdownloader/adapters/__init__.py @@ -55,6 +55,7 @@ import adapter_archiveskyehawkecom import adapter_squidgeorgpeja import adapter_libraryofmoriacom import adapter_wraithbaitcom +import adapter_checkmatedcom ## This bit of complexity allows adapters to be added by just adding ## importing. It eliminates the long if/else clauses we used to need diff --git a/index.html b/index.html index e58ee91f..92fb4db8 100644 --- a/index.html +++ b/index.html @@ -54,18 +54,9 @@ much easier.

      -

      New Sites

      +

      New Site

      - We now support www.wraithbait.com and - www.libraryofmoria.com, thanks to Ida for adding these! -

      -

      - Support for the Wonderful World of MakeBelieve(WWOMB) - archive at - http://www.squidge.org/peja/cgi-bin/index.php - has also been added. This does not support other sections of - www.squidge.org, or the other files under www.squidge.org/peja that - aren't in the eFiction instance. + We now support www.checkmated.com, thanks to Ida for adding this!

      Questions? Check out our @@ -75,7 +66,7 @@ If you have any problems with this application, please report them in the FanFictionDownLoader Google Group. The - Previous Version is also available for you to use if necessary. + Previous Version is also available for you to use if necessary.

      {{ error_message }} @@ -316,6 +307,11 @@
      http://www.squidge.org/peja/cgi-bin/viewstory.php?sid=1234.
      This is only for squidge.org/peja, not other parts of squidge.org. +
      www.checkmated.com
      +
      + Use the URL of the story's first chapter, such as +
      http://www.checkmated.com/story.php?story=10898. +

      A few additional things to know, which will make your life substantially easier: diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 69ae1cf3..cab7f58d 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -368,6 +368,14 @@ extratags: FanFiction,Testing,HTML ## personal.ini, not defaults.ini. #is_adult:true +[www.checkmated.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + [www.fanfiction.net] [www.ficbook.net] From f7e276c29c8c63d7b35ceee4038ee51e68a3a666 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 18 May 2012 15:46:09 -0500 Subject: [PATCH 468/482] Added tag calibre-plugin-1.5.22 for changeset 24c3362b0f89 From 6040d69bf10e2323530f556b3ff585e3dfa95129 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 18 May 2012 15:46:32 -0500 Subject: [PATCH 469/482] Added tag FanFictionDownLoader-4.4.10 for changeset 24c3362b0f89 From f680d997cefddaa3d8897515665554bfe47ff085 Mon Sep 17 00:00:00 2001 From: Ida Date: Sat, 19 May 2012 20:56:43 -0400 Subject: [PATCH 470/482] First version of adapter for sycophanthex.com, phoenixsong.net, dramione.org, and walkingtheplank.org Plus a quick correction to checkmated.com --- .../adapter_ashwindersycophanthexcom.py | 257 +++++++++++++++++ .../adapters/adapter_chaossycophanthexcom.py | 239 +++++++++++++++ .../adapters/adapter_checkmatedcom.py | 2 +- .../adapters/adapter_dramioneorg.py | 271 ++++++++++++++++++ .../adapter_erosnsapphosycophanthexcom.py | 255 ++++++++++++++++ .../adapters/adapter_lumossycophanthexcom.py | 239 +++++++++++++++ .../adapter_occlumencysycophanthexcom.py | 265 +++++++++++++++++ .../adapters/adapter_phoenixsongnet.py | 242 ++++++++++++++++ .../adapters/adapter_walkingtheplankorg.py | 233 +++++++++++++++ 9 files changed, 2002 insertions(+), 1 deletion(-) create mode 100644 fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py create mode 100644 fanficdownloader/adapters/adapter_chaossycophanthexcom.py create mode 100644 fanficdownloader/adapters/adapter_dramioneorg.py create mode 100644 fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py create mode 100644 fanficdownloader/adapters/adapter_lumossycophanthexcom.py create mode 100644 fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py create mode 100644 fanficdownloader/adapters/adapter_phoenixsongnet.py create mode 100644 fanficdownloader/adapters/adapter_walkingtheplankorg.py diff --git a/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py b/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py new file mode 100644 index 00000000..31b71378 --- /dev/null +++ b/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py @@ -0,0 +1,257 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return AshwinderSycophantHexComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class AshwinderSycophantHexComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','asph') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Harry Potter") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'ashwinder.sycophanthex.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'This story contains adult content and/or themes.' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['rememberme'] = '1' + params['sid'] = '' + params['intent'] = '' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "User Account Page" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + try: + # in case link points somewhere other than the first chapter + a = soup.findAll('option')[1]['value'] + self.story.setMetadata('storyId',a.split('=',)[1]) + url = 'http://'+self.host+'/'+a + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except: + pass + + for info in asoup.findAll('table', {'bordercolor' : '#1A1919'}): + a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + if a != None: + self.story.setMetadata('title',a.string) + break + + + # Find the chapters: + chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$')) + if len(chapters) == 0: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d): + try: + return d.name + except: + return "" + + cats = info.findAll('a',href=re.compile('categories.php')) + for cat in cats: + self.story.addToList('category',cat.string) + + + a = info.find('a', href=re.compile(r'reviews.php\?sid='+self.story.getMetadata('storyId'))) + val = a.nextSibling + svalue = "" + while not defaultGetattr(val) == 'br': + val = val.nextSibling + val = val.nextSibling + while not defaultGetattr(val) == 'table': + svalue += str(val) + val = val.nextSibling + self.setDescription(url,svalue) + + # Rated: NC-17
      etc + labels = info.findAll('b') + for labelspan in labels: + value = labelspan.nextSibling + label = stripHTML(labelspan) + + if 'Rating' in label: + self.story.setMetadata('rating', value) + + if 'Word Count' in label: + self.story.setMetadata('numWords', value) + + if 'Genres' in label: + genres = value.string.split(', ') + for genre in genres: + if genre != 'none': + self.story.addToList('genre',genre) + + if 'Warnings' in label: + warnings = value.string.split(', ') + for warning in warnings: + if warning != ' none': + self.story.addToList('warnings',warning) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + + soup = bs.BeautifulSoup(data, selfClosingTags=('br','hr','span','center')) # some chapters seem to be hanging up on those tags, so it is safer to close them + + story = soup.find('div', {"align" : "left"}) + + if None == story: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,story) diff --git a/fanficdownloader/adapters/adapter_chaossycophanthexcom.py b/fanficdownloader/adapters/adapter_chaossycophanthexcom.py new file mode 100644 index 00000000..a9c81f2c --- /dev/null +++ b/fanficdownloader/adapters/adapter_chaossycophanthexcom.py @@ -0,0 +1,239 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return ChaosSycophantHexComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class ChaosSycophantHexComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','csph') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + #self.story.addToList("category","Harry Potter") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'chaos.sycophanthex.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=19" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + if "Age Consent Required" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + pt = soup.find('div', {'id' : 'pagetitle'}) + a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + rating=pt.text.split('(')[1].split(')')[0] + self.story.setMetadata('rating', rating) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + + # Rated: NC-17
      etc + + labels = soup.findAll('span',{'class':'label'}) + + value = labels[0].previousSibling + svalue = "" + while value != None: + val = value + value = value.previousSibling + while not defaultGetattr(val,'class') == 'label': + svalue += str(val) + val = val.nextSibling + self.setDescription(url,svalue) + + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Word count' in label: + self.story.setMetadata('numWords', value.split(' -')[0]) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Complete' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' -')[0]), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_checkmatedcom.py b/fanficdownloader/adapters/adapter_checkmatedcom.py index 3b6888a7..874e3f6c 100644 --- a/fanficdownloader/adapters/adapter_checkmatedcom.py +++ b/fanficdownloader/adapters/adapter_checkmatedcom.py @@ -105,7 +105,7 @@ class CheckmatedComAdapter(BaseSiteAdapter): raise exceptions.FailedToLogin(url,params['name']) return False elif "This story is in The Bedchamber" in e: - logging.info("Your account does not have sufficient priviliges to read this story.") + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Your account does not have sufficient priviliges to read this story.") return False else: return True diff --git a/fanficdownloader/adapters/adapter_dramioneorg.py b/fanficdownloader/adapters/adapter_dramioneorg.py new file mode 100644 index 00000000..3c869b21 --- /dev/null +++ b/fanficdownloader/adapters/adapter_dramioneorg.py @@ -0,0 +1,271 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return DramioneOrgAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class DramioneOrgAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','drmn') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Harry Potter") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%dth %B %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'dramione.org' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Registered Users Only' in data \ + or 'There is no such account on our website' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['cookiecheck'] = '1' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Member Account" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&warning=5" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + if "Stories that are suitable for ages 16 and older" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + genres=soup.findAll('a', {'class' : "tag-1"}) + for genre in genres: + self.story.addToList('genre',genre.string) + + warnings=soup.findAll('a', {'class' : "tag-2"}) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + # Rated: NC-17
      etc + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + value=value.replace('st','th').replace('nd','th').replace('rd','th') + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + value=value.replace('st','th').replace('nd','th').replace('rd','th') + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py b/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py new file mode 100644 index 00000000..3a0e72b0 --- /dev/null +++ b/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py @@ -0,0 +1,255 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return ErosnSapphoSycophantHexComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','essph') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Harry Potter") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%d/%m/%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'erosnsappho.sycophanthex.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=18" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data) + if m != None: + if self.is_adult or self.getConfig("is_adult"): + # We tried the default and still got a warning, so + # let's pull the warning number from the 'continue' + # link and reload data. + addurl = m.group(1) + # correct stupid & error in url. + addurl = addurl.replace("&","&") + url = self.url+'&index=1'+addurl + logging.debug("URL 2nd try: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + else: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + pt = soup.find('div', {'id' : 'pagetitle'}) + a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + rating=pt.text.split('(')[1].split(')')[0] + self.story.setMetadata('rating', rating) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + + # Rated: NC-17
      etc + + labels = soup.findAll('span',{'class':'label'}) + + value = labels[0].previousSibling + svalue = "" + while value != None: + val = value + value = value.previousSibling + while not defaultGetattr(val,'class') == 'label': + svalue += str(val) + val = val.nextSibling + self.setDescription(url,svalue) + + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Word count' in label: + self.story.setMetadata('numWords', value.split(' -')[0]) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Complete' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' -')[0]), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_lumossycophanthexcom.py b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py new file mode 100644 index 00000000..cd43de0c --- /dev/null +++ b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py @@ -0,0 +1,239 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return ChaosSycophantHexComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class ChaosSycophantHexComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','lsph') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Harry Potter") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'lumos.sycophanthex.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&ageconsent=ok&warning=19" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + if "Age Consent Required" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + pt = soup.find('div', {'id' : 'pagetitle'}) + a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + rating=pt.text.split('(')[1].split(')')[0] + self.story.setMetadata('rating', rating) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + + # Rated: NC-17
      etc + + labels = soup.findAll('span',{'class':'label'}) + + value = labels[0].previousSibling + svalue = "" + while value != None: + val = value + value = value.previousSibling + while not defaultGetattr(val,'class') == 'label': + svalue += str(val) + val = val.nextSibling + self.setDescription(url,svalue) + + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Word count' in label: + self.story.setMetadata('numWords', value.split(' -')[0]) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + for cat in cats: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + for char in chars: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Complete' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' -')[0]), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) diff --git a/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py b/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py new file mode 100644 index 00000000..ce07e53b --- /dev/null +++ b/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py @@ -0,0 +1,265 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return OcclumencySycophantHexComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class OcclumencySycophantHexComAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','osph') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Harry Potter") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%m/%d/%Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'occlumency.sycophanthex.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'This story contains adult content and/or themes.' in data \ + or "That password doesn't match the one in our database" in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['penname'] = self.username + params['password'] = self.password + else: + params['penname'] = self.getConfig("username") + params['password'] = self.getConfig("password") + params['rememberme'] = '1' + params['sid'] = '' + params['intent'] = '' + params['submit'] = 'Submit' + + loginUrl = 'http://' + self.getSiteDomain() + '/user.php' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['penname'])) + + d = self._fetchUrl(loginUrl, params) + + if "Logout" not in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['penname'])) + raise exceptions.FailedToLogin(url,params['penname']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + try: + # in case link points somewhere other than the first chapter + a = soup.findAll('option')[1]['value'] + self.story.setMetadata('storyId',a.split('=',)[1]) + url = 'http://'+self.host+'/'+a + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except: + pass + + for info in asoup.findAll('table', {'class' : 'border'}): + a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + if a != None: + self.story.setMetadata('title',a.string) + break + + + # Find the chapters: + chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$')) + if len(chapters) == 0: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d): + try: + return d.name + except: + return "" + + cats = info.findAll('a',href=re.compile('categories.php')) + for cat in cats: + self.story.addToList('category',cat.string) + + + a = info.find('a', href=re.compile(r'reviews.php\?sid='+self.story.getMetadata('storyId'))) + val = a.nextSibling + svalue = "" + while not defaultGetattr(val) == 'br': + val = val.nextSibling + val = val.nextSibling + while not defaultGetattr(val) == 'table': + svalue += str(val) + val = val.nextSibling + self.setDescription(url,svalue) + + # Rated: NC-17
      etc + labels = info.findAll('b') + for labelspan in labels: + value = labelspan.nextSibling + label = stripHTML(labelspan) + + if 'Rating' in label: + self.story.setMetadata('rating', value) + + if 'Word Count' in label: + self.story.setMetadata('numWords', value) + + if 'Genres' in label: + genres = value.string.split(', ') + for genre in genres: + if genre != 'none': + self.story.addToList('genre',genre) + + if 'Characters' in label: + chars = value.string.split(', ') + for char in chars: + if char != 'none': + self.story.addToList('characters',char) + + if 'Warnings' in label: + warnings = value.string.split(', ') + for warning in warnings: + if warning != ' none': + self.story.addToList('warnings',warning) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + data = self._fetchUrl(url) + data = data.replace('

      ') + + soup = bs.BeautifulSoup(data, selfClosingTags=('br','hr')) + + story = soup.find('div', {"align" : "left"}) + + if None == story: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,story) diff --git a/fanficdownloader/adapters/adapter_phoenixsongnet.py b/fanficdownloader/adapters/adapter_phoenixsongnet.py new file mode 100644 index 00000000..928c6f5b --- /dev/null +++ b/fanficdownloader/adapters/adapter_phoenixsongnet.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2, urllib, cookielib + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return PhoenixSongNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class PhoenixSongNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/') + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','phs') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Harry Potter") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%B %d %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.phoenixsong.net' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/fanfiction/story/1234/" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/fanfiction/story/")+r"\d+/?$" + + ## Login seems to be reasonably standard across eFiction sites. + def needToLoginCheck(self, data): + if 'Please login to continue.' in data: + return True + else: + return False + + def performLogin(self, url): + params = {} + + if self.password: + params['txtusername'] = self.username + params['txtpassword'] = self.password + else: + params['txtusername'] = self.getConfig("username") + params['txtpassword'] = self.getConfig("password") + #params['remember'] = '1' + params['login'] = 'Login' + + loginUrl = 'http://' + self.getSiteDomain() + '/users/processlogin.php' + logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, + params['txtusername'])) + d = self._fetchUrl(loginUrl, params) + + if 'Please login to continue.' in d : #Member Account + logging.info("Failed to login to URL %s as %s" % (loginUrl, + params['txtusername'])) + raise exceptions.FailedToLogin(url,params['txtusername']) + return False + else: + return True + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if self.needToLoginCheck(data): + # need to log in for this one. + self.performLogin(url) + data = self._fetchUrl(url) + + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + b = soup.find('div', {'id' : 'nav25'}) + a = b.find('a', href=re.compile(r'fanfiction/story/'+self.story.getMetadata('storyId')+"/$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. /fanfiction/stories.php?psid=125 + a = b.find('a', href=re.compile(r"/fanfiction/stories.php\?psid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + chapters = soup.find('select') + if chapters == None: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + for b in soup.findAll('b'): + if b.text == "Updated": + date = b.nextSibling.string.split(': ')[1].split(',') + self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat)) + self.story.setMetadata('dateUpdated', makeDate(date[0]+date[1], self.dateformat)) + else: + i = 0 + chapters = chapters.findAll('option') + for chapter in chapters: + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['value'])) + if i == 0: + self.story.setMetadata('storyId',chapter['value'].split('/')[3]) + head = bs.BeautifulSoup(self._fetchUrl('http://'+self.host+chapter['value'])).findAll('b') + for b in head: + if b.text == "Updated": + date = b.nextSibling.string.split(': ')[1].split(',') + self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat)) + + if i == (len(chapters)-1): + head = bs.BeautifulSoup(self._fetchUrl('http://'+self.host+chapter['value'])).findAll('b') + for b in head: + if b.text == "Updated": + date = b.nextSibling.string.split(': ')[1].split(',') + self.story.setMetadata('dateUpdated', makeDate(date[0]+date[1], self.dateformat)) + i = i+1 + + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + info = asoup.find('a', href=re.compile(r'fanfiction/story/'+self.story.getMetadata('storyId')+"/$")) + while info != None: + info = info.findNext('div') + b = info.find('b') + val = b.nextSibling + + if 'Rating' in b.string: + self.story.setMetadata('rating', val.string.split(': ')[1]) + + if 'Words' in b.string: + self.story.setMetadata('numWords', val.string.split(': ')[1]) + + if 'Setting' in b.string: + self.story.addToList('category', val.string.split(': ')[1]) + + if 'Status' in b.string: + if 'Completed' in val: + val = 'Completed' + else: + val = 'In-Progress' + self.story.setMetadata('status', val) + + if 'Summary' in b.string: + b.extract() + info.find('br').extract() + self.setDescription(url,info) + break + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + chapter=bs.BeautifulSoup('
      ') + for p in soup.findAll('p'): + if "This is for problems with the formatting or the layout of the chapter." in stripHTML(p): + break + chapter.append(p) + + for a in chapter.findAll('div'): + a.extract() + for a in chapter.findAll('table'): + a.extract() + for a in chapter.findAll('script'): + a.extract() + for a in chapter.findAll('form'): + a.extract() + for a in chapter.findAll('textarea'): + a.extract() + + + if None == chapter: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,chapter) diff --git a/fanficdownloader/adapters/adapter_walkingtheplankorg.py b/fanficdownloader/adapters/adapter_walkingtheplankorg.py new file mode 100644 index 00000000..4b8e5bc5 --- /dev/null +++ b/fanficdownloader/adapters/adapter_walkingtheplankorg.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return WalkingThePlankOrgAdapter + +class WalkingThePlankOrgAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/archive//viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','wtp') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Harry Potter") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%b %d, %Y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.walkingtheplank.org' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/archive/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/archive/viewstory.php?sid=")+r"\d+$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + if self.is_adult or self.getConfig("is_adult"): + # Weirdly, different sites use different warning numbers. + # If the title search below fails, there's a good chance + # you need a different number. print data at that point + # and see what the 'click here to continue' url says. + addurl = "&warning=4" + else: + addurl="" + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url+'&index=1'+addurl + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + + # The actual text that is used to announce you need to be an + # adult varies from site to site. Again, print data before + # the title search to troubleshoot. + + if "By clicking this link, you acknowledge" in data: + raise exceptions.AdultCheckRequired(self.url) + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title + a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + self.story.setMetadata('title',a.string) + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + + # Find the chapters: + for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/archive/'+chapter['href']+addurl)) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + # eFiction sites don't help us out a lot with their meta data + # formating, so it's a little ugly. + + # utility method + def defaultGetattr(d,k): + try: + return d[k] + except: + return "" + + labels = soup.findAll('span',{'class':'label'}) + for labelspan in labels: + value = labelspan.nextSibling + label = labelspan.string + + if 'Summary' in label: + ## Everything until the next span class='label' + svalue = "" + while not defaultGetattr(value,'class') == 'label': + svalue += str(value) + value = value.nextSibling + self.setDescription(url,svalue) + #self.story.setMetadata('description',stripHTML(svalue)) + + if 'Rated' in label: + self.story.setMetadata('rating', value) + + if 'Word count' in label: + self.story.setMetadata('numWords', value) + + if 'Categories' in label: + cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) + catstext = [cat.string for cat in cats] + for cat in catstext: + self.story.addToList('category',cat.string) + + if 'Characters' in label: + chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters')) + charstext = [char.string for char in chars] + for char in charstext: + self.story.addToList('characters',char.string) + + if 'Genre' in label: + genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) + for genre in genres: + self.story.addToList('genre',genre.string) + + if 'Warnings' in label: + warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX + for warning in warnings: + self.story.addToList('warnings',warning.string) + + if 'Completed' in label: + if 'Yes' in value: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + try: + # Find Series name from series URL. + a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) + series_name = a.string + series_url = 'http://'+self.host+'/archive/'+a['href'] + + # use BeautifulSoup HTML parser to make everything easier to find. + seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) + storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) + i=1 + for a in storyas: + if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')): + self.setSeries(series_name, i) + break + i+=1 + + except: + # I find it hard to care if the series parsing fails + pass + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'story'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div) From 2d8c13728d7a1ab66766a4165019c32b7ef77ddf Mon Sep 17 00:00:00 2001 From: Ida Date: Sun, 20 May 2012 10:57:39 -0400 Subject: [PATCH 471/482] Changed the way title is processed for sycophanthex.com --- fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py | 2 +- fanficdownloader/adapters/adapter_chaossycophanthexcom.py | 2 +- fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py | 2 +- fanficdownloader/adapters/adapter_lumossycophanthexcom.py | 2 +- fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py b/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py index 31b71378..dc4ef003 100644 --- a/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_ashwindersycophanthexcom.py @@ -160,7 +160,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter): for info in asoup.findAll('table', {'bordercolor' : '#1A1919'}): a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) if a != None: - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',a.text) break diff --git a/fanficdownloader/adapters/adapter_chaossycophanthexcom.py b/fanficdownloader/adapters/adapter_chaossycophanthexcom.py index a9c81f2c..b6cf945b 100644 --- a/fanficdownloader/adapters/adapter_chaossycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_chaossycophanthexcom.py @@ -119,7 +119,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter): ## Title pt = soup.find('div', {'id' : 'pagetitle'}) a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',a.text) # Find authorid and URL from... author url. a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py b/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py index 3a0e72b0..1f5567c9 100644 --- a/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_erosnsapphosycophanthexcom.py @@ -135,7 +135,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter): ## Title pt = soup.find('div', {'id' : 'pagetitle'}) a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',a.text) # Find authorid and URL from... author url. a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_lumossycophanthexcom.py b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py index cd43de0c..e011b2e9 100644 --- a/fanficdownloader/adapters/adapter_lumossycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py @@ -119,7 +119,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter): ## Title pt = soup.find('div', {'id' : 'pagetitle'}) a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',a.text) # Find authorid and URL from... author url. a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) diff --git a/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py b/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py index ce07e53b..c31a674c 100644 --- a/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_occlumencysycophanthexcom.py @@ -161,7 +161,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter): for info in asoup.findAll('table', {'class' : 'border'}): a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) if a != None: - self.story.setMetadata('title',a.string) + self.story.setMetadata('title',a.text) break From bc233d527a57e5a206ed40536149b07ef3b3daa7 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 20 May 2012 10:42:32 -0500 Subject: [PATCH 472/482] Integrate 8 new sites from Ida, plus a couple minor fixes. --- app.yaml | 2 +- calibre-plugin/__init__.py | 2 +- defaults.ini | 80 +++++++++++++++++-- fanficdownloader/adapters/__init__.py | 8 ++ .../adapters/adapter_lumossycophanthexcom.py | 4 +- .../adapters/adapter_walkingtheplankorg.py | 2 +- fanficdownloader/exceptions.py | 1 + index.html | 57 ++++++++++++- plugin-defaults.ini | 80 +++++++++++++++++-- 9 files changed, 214 insertions(+), 22 deletions(-) diff --git a/app.yaml b/app.yaml index 49b3efd4..5ffbce47 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanfictiondownloader application: fanfictiondownloader -version: 4-4-10 +version: 4-4-11 runtime: python27 api_version: 1 threadsafe: true diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index cf2778b2..8d7c4f71 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 22) + version = (1, 5, 23) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/defaults.ini b/defaults.ini index c38fc118..33a621ba 100644 --- a/defaults.ini +++ b/defaults.ini @@ -306,6 +306,16 @@ extratags: FanFiction,Testing,Text [test1.com:html] extratags: FanFiction,Testing,HTML +[archive.skyehawke.com] + +[ashwinder.sycophanthex.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + [castlefans.org] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In @@ -319,6 +329,39 @@ extratags: FanFiction,Testing,HTML ## personal.ini, not defaults.ini. #is_adult:true +[chaos.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/images/.*?ribbon.gif + +[dramione.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[erosnsappho.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/images/.*?ribbon.gif + [fanfiction.mugglenet.com] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In @@ -346,16 +389,25 @@ extratags: FanFiction,Testing,HTML #username:YourName #password:yourpassword +[lumos.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + [nfacommunity.com] ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. #is_adult:true -## Some sites also require the user to confirm they are adult for -## adult content. In commandline version, this should go in your -## personal.ini, not defaults.ini. -#is_adult:true +[occlumency.sycophanthex.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword [thequidditchpitch.org] ## Some sites require login (or login for some rated stories) The @@ -377,9 +429,9 @@ extratags: FanFiction,Testing,HTML #is_adult:true [www.archiveofourown.org] -## Some sites also require the user to confirm they are adult for -## adult content. In commandline version, this should go in your -## personal.ini, not defaults.ini. +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. #is_adult:true [www.checkmated.com] @@ -456,6 +508,14 @@ extratags: ## cover image. This lets you exclude them. cover_exclusion_regexp:/stories/999/images/.*?_trophy.png +[www.phoenixsong.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + [www.potionsandsnitches.net] [www.siye.co.uk] @@ -522,6 +582,12 @@ collect_series: false ## twiwrite.net (ab)uses series as personal reading lists. collect_series: false +[www.walkingtheplank.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + [www.whofic.com] [www.wraithbait.com] diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py index c534b27f..871a0b08 100644 --- a/fanficdownloader/adapters/__init__.py +++ b/fanficdownloader/adapters/__init__.py @@ -56,6 +56,14 @@ import adapter_squidgeorgpeja import adapter_libraryofmoriacom import adapter_wraithbaitcom import adapter_checkmatedcom +import adapter_chaossycophanthexcom +import adapter_dramioneorg +import adapter_erosnsapphosycophanthexcom +import adapter_lumossycophanthexcom +import adapter_occlumencysycophanthexcom +import adapter_phoenixsongnet +import adapter_walkingtheplankorg +import adapter_ashwindersycophanthexcom ## This bit of complexity allows adapters to be added by just adding ## importing. It eliminates the long if/else clauses we used to need diff --git a/fanficdownloader/adapters/adapter_lumossycophanthexcom.py b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py index e011b2e9..aa995f75 100644 --- a/fanficdownloader/adapters/adapter_lumossycophanthexcom.py +++ b/fanficdownloader/adapters/adapter_lumossycophanthexcom.py @@ -27,11 +27,11 @@ from .. import exceptions as exceptions from base_adapter import BaseSiteAdapter, makeDate def getClass(): - return ChaosSycophantHexComAdapter + return LumosSycophantHexComAdapter # Class name has to be unique. Our convention is camel case the # sitename with Adapter at the end. www is skipped. -class ChaosSycophantHexComAdapter(BaseSiteAdapter): +class LumosSycophantHexComAdapter(BaseSiteAdapter): def __init__(self, config, url): BaseSiteAdapter.__init__(self, config, url) diff --git a/fanficdownloader/adapters/adapter_walkingtheplankorg.py b/fanficdownloader/adapters/adapter_walkingtheplankorg.py index 4b8e5bc5..60784052 100644 --- a/fanficdownloader/adapters/adapter_walkingtheplankorg.py +++ b/fanficdownloader/adapters/adapter_walkingtheplankorg.py @@ -48,7 +48,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter): logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/archive//viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('http://' + self.getSiteDomain() + '/archive/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','wtp') diff --git a/fanficdownloader/exceptions.py b/fanficdownloader/exceptions.py index 8314c26f..525c64e0 100644 --- a/fanficdownloader/exceptions.py +++ b/fanficdownloader/exceptions.py @@ -63,6 +63,7 @@ class UnknownSite(Exception): def __init__(self,url,supported_sites_list): self.url=url self.supported_sites_list=supported_sites_list + self.supported_sites_list.sort() def __str__(self): return "Unknown Site(%s). Supported sites: (%s)" % (self.url, ", ".join(self.supported_sites_list)) diff --git a/index.html b/index.html index 92fb4db8..3f4bf5fc 100644 --- a/index.html +++ b/index.html @@ -54,9 +54,19 @@ much easier.

      -

      New Site

      +

      More New Sites

      - We now support www.checkmated.com, thanks to Ida for adding this! + Ida's been busy! We now have support for an additional 8 new sites: +

        +
      • ashwinder.sycophanthex.com
      • +
      • chaos.sycophanthex.com
      • +
      • erosnsappho.sycophanthex.com
      • +
      • lumos.sycophanthex.com
      • +
      • occlumency.sycophanthex.com
      • +
      • dramione.org
      • +
      • www.phoenixsong.net
      • +
      • www.walkingtheplank.org
      • +

      Questions? Check out our @@ -66,7 +76,7 @@ If you have any problems with this application, please report them in the FanFictionDownLoader Google Group. The - Previous Version is also available for you to use if necessary. + Previous Version is also available for you to use if necessary.

      {{ error_message }} @@ -312,6 +322,47 @@ Use the URL of the story's first chapter, such as
      http://www.checkmated.com/story.php?story=10898. + +
      ashwinder.sycophanthex.com
      +
      + Use the URL of the story's chapter list, such as +
      http://ashwinder.sycophanthex.com/viewstory.php?sid=1234 +
      +
      chaos.sycophanthex.com
      +
      + Use the URL of the story's chapter list, such as +
      http://chaos.sycophanthex.com/viewstory.php?sid=1234 +
      +
      erosnsappho.sycophanthex.com
      +
      + Use the URL of the story's chapter list, such as +
      http://erosnsappho.sycophanthex.com/viewstory.php?sid=1234 +
      +
      lumos.sycophanthex.com
      +
      + Use the URL of the story's chapter list, such as +
      http://lumos.sycophanthex.com/viewstory.php?sid=1234 +
      +
      occlumency.sycophanthex.com
      +
      + Use the URL of the story's chapter list, such as +
      http://erosnsappho.sycophanthex.com/viewstory.php?sid=1234 +
      +
      dramione.org
      +
      + Use the URL of the story's chapter list, such as +
      http://dramione.org/viewstory.php?sid=1234 +
      +
      www.phoenixsong.net
      +
      + Use the URL of any story chapter, such as +
      http://www.phoenixsong.net/fanfiction/story/1234/ +
      +
      www.walkingtheplank.org
      +
      + Use the URL of the story's first chapter, such as +
      http://www.walkingtheplank.org/archive/viewstory.php?sid=1234 +

      A few additional things to know, which will make your life substantially easier: diff --git a/plugin-defaults.ini b/plugin-defaults.ini index cab7f58d..9788dca3 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -292,6 +292,16 @@ extratags: FanFiction,Testing,Text [test1.com:html] extratags: FanFiction,Testing,HTML +[archive.skyehawke.com] + +[ashwinder.sycophanthex.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + [castlefans.org] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In @@ -305,6 +315,39 @@ extratags: FanFiction,Testing,HTML ## personal.ini, not defaults.ini. #is_adult:true +[chaos.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/images/.*?ribbon.gif + +[dramione.org] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + +## Some sites also require the user to confirm they are adult for +## adult content. In commandline version, this should go in your +## personal.ini, not defaults.ini. +#is_adult:true + +[erosnsappho.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/images/.*?ribbon.gif + [fanfiction.mugglenet.com] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In @@ -332,16 +375,25 @@ extratags: FanFiction,Testing,HTML #username:YourName #password:yourpassword +[lumos.sycophanthex.com] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + [nfacommunity.com] ## Some sites do not require a login, but do require the user to ## confirm they are adult for adult content. In commandline version, ## this should go in your personal.ini, not defaults.ini. #is_adult:true -## Some sites also require the user to confirm they are adult for -## adult content. In commandline version, this should go in your -## personal.ini, not defaults.ini. -#is_adult:true +[occlumency.sycophanthex.com] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword [thequidditchpitch.org] ## Some sites require login (or login for some rated stories) The @@ -363,9 +415,9 @@ extratags: FanFiction,Testing,HTML #is_adult:true [www.archiveofourown.org] -## Some sites also require the user to confirm they are adult for -## adult content. In commandline version, this should go in your -## personal.ini, not defaults.ini. +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. #is_adult:true [www.checkmated.com] @@ -439,6 +491,14 @@ extratags: ## cover image. This lets you exclude them. cover_exclusion_regexp:/stories/999/images/.*?_trophy.png +[www.phoenixsong.net] +## Some sites require login (or login for some rated stories) The +## program can prompt you, or you can save it in config. In +## commandline version, this should go in your personal.ini, not +## defaults.ini. +#username:YourName +#password:yourpassword + [www.potionsandsnitches.net] [www.siye.co.uk] @@ -505,6 +565,12 @@ collect_series: false ## twiwrite.net (ab)uses series as personal reading lists. collect_series: false +[www.walkingtheplank.org] +## Some sites do not require a login, but do require the user to +## confirm they are adult for adult content. In commandline version, +## this should go in your personal.ini, not defaults.ini. +#is_adult:true + [www.whofic.com] [www.wraithbait.com] From d5ff2c477a37b76fbecce7ece23fa17160397b7c Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 20 May 2012 10:42:47 -0500 Subject: [PATCH 473/482] Added tag calibre-plugin-1.5.23 for changeset a09543417484 From 5b5e8a3a14bd681a967f0bfa87c692cc95565b58 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 20 May 2012 10:43:12 -0500 Subject: [PATCH 474/482] Added tag FanFictionDownLoader-4.4.11 for changeset a09543417484 From 9402cd90764445d6cc6c85e8f09d0f991d04119e Mon Sep 17 00:00:00 2001 From: Ida Date: Fri, 25 May 2012 15:08:51 -0400 Subject: [PATCH 475/482] First version of the adapter for the thehexfiles.net --- .../adapters/adapter_thehexfilesnet.py | 199 ++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 fanficdownloader/adapters/adapter_thehexfilesnet.py diff --git a/fanficdownloader/adapters/adapter_thehexfilesnet.py b/fanficdownloader/adapters/adapter_thehexfilesnet.py new file mode 100644 index 00000000..20123684 --- /dev/null +++ b/fanficdownloader/adapters/adapter_thehexfilesnet.py @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return TheHexFilesNetAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class TheHexFilesNetAdapter(BaseSiteAdapter): + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','thf') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","Harry Potter") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + self.dateformat = "%Y.%m.%d" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'thehexfiles.net' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/viewstory.php?sid=1234" + + def getSiteURLPattern(self): + return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + + # Find authorid and URL from... author url. + a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) + self.story.setMetadata('authorId',a['href'].split('=')[1]) + self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('author',a.string) + asoup = bs.BeautifulSoup(self._fetchUrl(self.story.getMetadata('authorUrl'))) + + try: + # in case link points somewhere other than the first chapter + a = soup.findAll('option')[1]['value'] + self.story.setMetadata('storyId',a.split('=',)[1]) + url = 'http://'+self.host+'/'+a + soup = bs.BeautifulSoup(self._fetchUrl(url)) + except: + pass + + for info in asoup.findAll('table', {'cellspacing' : '4'}): + a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$")) + if a != None: + self.story.setMetadata('title',a.string) + break + + + # Find the chapters: + chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$')) + if len(chapters) == 0: + self.chapterUrls.append((self.story.getMetadata('title'),url)) + else: + for chapter in chapters: + # just in case there's tags, like in chapter titles. + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + + cats = info.findAll('a',href=re.compile('categories.php')) + for cat in cats: + self.story.addToList('category',cat.string) + + words = info.find(text=re.compile('Words:')).split('|')[1].split(': ')[1] + self.story.setMetadata('numWords', words) + + comp = info.find('span', {'class' : 'completed'}).string.split(': ')[1] + if 'Yes' in comp: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + summary = info.find('td', {'class' : 'summary'}) + self.setDescription(url,summary) + + rating=stripHTML(info.find('td', {'align' : 'left'})).split('(')[1].split(')')[0] + self.story.setMetadata('rating', rating) + + labels = info.findAll('td', {'width' : '10%'}) + values = info.findAll('td', {'width' : '40%'}) + for i in range(0,len(labels)): + value = stripHTML(values[i]) + label = stripHTML(labels[i]) + + if 'Genres' in label: + genres = value.split(', ') + for genre in genres: + if genre != 'none': + self.story.addToList('genre',genre) + + + if 'Warnings' in label: + warnings = value.split(', ') + for warning in warnings: + if warning != 'none': + self.story.addToList('warnings',warning) + + if 'Published' in label: + self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat)) + + if 'Updated' in label: + # there's a stray [ at the end. + #value = value[0:-1] + self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat)) + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + for a in soup.findAll('table'): + a.extract() + + if None == soup: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,soup) \ No newline at end of file From b337c9aa7906160a2eddcbdfc67fec50c18b5326 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 25 May 2012 17:03:46 -0500 Subject: [PATCH 476/482] Integrate new adapter for thehexfiles.net, new inject series plugin feature. --- app.yaml | 2 +- calibre-plugin/__init__.py | 2 +- calibre-plugin/config.py | 8 ++++++++ calibre-plugin/ffdl_plugin.py | 11 +++++++--- calibre-plugin/jobs.py | 5 +++++ defaults.ini | 2 ++ fanficdownloader/adapters/__init__.py | 1 + fanficdownloader/adapters/adapter_test1.py | 24 ++++++++++------------ fanficdownloader/adapters/base_adapter.py | 2 +- index.html | 21 ++++++++----------- plugin-defaults.ini | 2 ++ 11 files changed, 48 insertions(+), 32 deletions(-) diff --git a/app.yaml b/app.yaml index 5ffbce47..7f3f3ad6 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # ffd-retief-hrd fanfictiondownloader application: fanfictiondownloader -version: 4-4-11 +version: 4-4-12 runtime: python27 api_version: 1 threadsafe: true diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index 8d7c4f71..bdc4f3b9 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 23) + version = (1, 5, 24) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py index 6f97fb3e..ad68bf80 100644 --- a/calibre-plugin/config.py +++ b/calibre-plugin/config.py @@ -48,6 +48,7 @@ all_prefs.defaults['deleteotherforms'] = False all_prefs.defaults['adddialogstaysontop'] = False all_prefs.defaults['includeimages'] = False all_prefs.defaults['lookforurlinhtml'] = False +all_prefs.defaults['injectseries'] = False all_prefs.defaults['send_lists'] = '' all_prefs.defaults['read_lists'] = '' @@ -75,6 +76,7 @@ copylist = ['personal.ini', 'adddialogstaysontop', 'includeimages', 'lookforurlinhtml', + 'injectseries', 'gcnewonly', 'gc_site_settings', 'allow_gc_from_ini'] @@ -181,6 +183,7 @@ class ConfigWidget(QWidget): prefs['adddialogstaysontop'] = self.basic_tab.adddialogstaysontop.isChecked() prefs['includeimages'] = self.basic_tab.includeimages.isChecked() prefs['lookforurlinhtml'] = self.basic_tab.lookforurlinhtml.isChecked() + prefs['injectseries'] = self.basic_tab.injectseries.isChecked() if self.readinglist_tab: # lists @@ -320,6 +323,11 @@ class BasicTab(QWidget): self.lookforurlinhtml.setChecked(prefs['lookforurlinhtml']) self.l.addWidget(self.lookforurlinhtml) + self.injectseries = QCheckBox("Inject calibre Series when none found?",self) + self.injectseries.setToolTip("If no series is found, inject the calibre series (if there is one) so it appears on the FFDL title page(not cover).") + self.injectseries.setChecked(prefs['injectseries']) + self.l.addWidget(self.injectseries) + self.l.insertStretch(-1) def set_collisions(self): diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index c142f0ad..1a72a6ec 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -572,7 +572,13 @@ make_firstimage_cover:true # this behavior matches how epubs come out when imported # dateCreated == packaged--epub/etc created. book['timestamp'] = story.getMetadataRaw('dateCreated').replace(tzinfo=local_tz) - + + if book_id != None and prefs['injectseries']: + mi = db.get_metadata(book_id,index_is_id=True) + if not book['series'] and mi.series != None: + book['calibre_series'] = (mi.series,mi.series_index) + print("calibre_series:%s [%s]"%book['calibre_series']) + if book['good']: # there shouldn't be any !'good' books at this point. # if still 'good', make a temp file to write the output to. tmp = PersistentTemporaryFile(prefix='new-%s-'%book['calibre_id'], @@ -795,7 +801,7 @@ make_firstimage_cover:true coldef = custom_columns[col] if not meta.startswith('status-') and meta not in book['all_metadata'] or \ meta.startswith('status-') and 'status' not in book['all_metadata']: - print("No value for %s, skipping."%meta) + print("No value for %s, skipping custom column(%s) update."%(meta,coldef['name'])) continue if meta not in permitted_values[coldef['datatype']]: print("%s not a valid column type for %s, skipping."%(col,meta)) @@ -815,7 +821,6 @@ make_firstimage_cover:true db.commit() - print("book['added']:%s"%book['added']) if 'Generate Cover' in self.gui.iactions and (book['added'] or not prefs['gcnewonly']): gc_plugin = self.gui.iactions['Generate Cover'] setting_name = None diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index 55c9853e..39156ba7 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -113,6 +113,11 @@ def do_download_for_worker(book,options): adapter.password = book['password'] story = adapter.getStoryMetadataOnly() + if 'calibre_series' in book: + print("calibre_series:%s [%d]"%book['calibre_series']) + adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1]) + else: + print("no calibre_series") writer = writers.getWriter(options['fileform'],adapter.config,adapter) outfile = book['outfile'] diff --git a/defaults.ini b/defaults.ini index 33a621ba..f863361a 100644 --- a/defaults.ini +++ b/defaults.ini @@ -409,6 +409,8 @@ cover_exclusion_regexp:/images/.*?ribbon.gif #username:YourName #password:yourpassword +[thehexfiles.net] + [thequidditchpitch.org] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py index 871a0b08..a879e2fa 100644 --- a/fanficdownloader/adapters/__init__.py +++ b/fanficdownloader/adapters/__init__.py @@ -64,6 +64,7 @@ import adapter_occlumencysycophanthexcom import adapter_phoenixsongnet import adapter_walkingtheplankorg import adapter_ashwindersycophanthexcom +import adapter_thehexfilesnet ## This bit of complexity allows adapters to be added by just adding ## importing. It eliminates the long if/else clauses we used to need diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index c6557370..c85a6733 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -90,17 +90,16 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" else: self.story.setMetadata('status','Completed') - langs = { - 0:"English", - 1:"Russian", - 2:"French", - 3:"German", - } + # greater than 10, no language or series. if idnum < 10: + langs = { + 0:"English", + 1:"Russian", + 2:"French", + 3:"German", + } self.story.setMetadata('language',langs[idnum%len(langs)]) - # greater than 10, no language. - - self.setSeries('The Great Test',idnum) + self.setSeries('The Great Test',idnum) self.story.setMetadata('rating','Tweenie') @@ -161,7 +160,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" text=u'''

      Prologue

      -

      This is a fake adapter for testing purposes. Different storyId's will give different errors:

      +

      This is a fake adapter for testing purposes. Different sid's will give different errors:

      http://test1.com?sid=664 - Crazy string title

      http://test1.com?sid=665 - raises AdultCheckRequired

      http://test1.com?sid=666 - raises StoryDoesNotExist

      @@ -171,18 +170,17 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"

      http://test1.com?sid=670 - Succeeds, but sleeps 2sec on each chapter

      http://test1.com?sid=671 - Succeeds, but sleeps 2sec metadata only

      http://test1.com?sid=672 - Succeeds, quick meta, sleeps 2sec chapters only

      -

      And other storyId will succeed with the same output.

      +

      Odd sid's will be In-Progress, evens complete. sid<10 will be assigned one of four languages and included in a series.

      ''' else: text=u'''

      Chapter title from site

      -

      Centered text

      Lorem '''+self.crazystring+u''' italics, bold, underline consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

      br breaks

      Puella Magi Madoka Magica/魔法少女まどか★マギカ -
      +
      br breaks

      Don't—e;ver—d;o—that—a;gain, 法 é
      diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index f9f977cd..b1d41f3c 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -265,7 +265,7 @@ class BaseSiteAdapter(Configurable): # Just for series, in case we choose to change how it's stored or represented later. def setSeries(self,name,num): if self.getConfig('collect_series'): - self.story.setMetadata('series','%s [%s]'%(name, num)) + self.story.setMetadata('series','%s [%s]'%(name, int(num))) def setDescription(self,url,svalue): #print("\n\nsvalue:\n%s\n"%svalue) diff --git a/index.html b/index.html index 3f4bf5fc..41e9a9e8 100644 --- a/index.html +++ b/index.html @@ -54,19 +54,9 @@ much easier.

      -

      More New Sites

      +

      New Site

      - Ida's been busy! We now have support for an additional 8 new sites: -

        -
      • ashwinder.sycophanthex.com
      • -
      • chaos.sycophanthex.com
      • -
      • erosnsappho.sycophanthex.com
      • -
      • lumos.sycophanthex.com
      • -
      • occlumency.sycophanthex.com
      • -
      • dramione.org
      • -
      • www.phoenixsong.net
      • -
      • www.walkingtheplank.org
      • -
      + Now supporting thehexfiles.net, that brings us up to an even 40 supported sites. Thanks again, Ida.

      Questions? Check out our @@ -76,7 +66,7 @@ If you have any problems with this application, please report them in the FanFictionDownLoader Google Group. The - Previous Version is also available for you to use if necessary. + Previous Version is also available for you to use if necessary.

      {{ error_message }} @@ -363,6 +353,11 @@ Use the URL of the story's first chapter, such as
      http://www.walkingtheplank.org/archive/viewstory.php?sid=1234 +
      thehexfiles.net
      +
      + Use the URL of the story's chapter list, such as +
      http://thehexfiles.net/viewstory.php?sid=1234 +

      A few additional things to know, which will make your life substantially easier: diff --git a/plugin-defaults.ini b/plugin-defaults.ini index 9788dca3..dc5d3da3 100644 --- a/plugin-defaults.ini +++ b/plugin-defaults.ini @@ -395,6 +395,8 @@ cover_exclusion_regexp:/images/.*?ribbon.gif #username:YourName #password:yourpassword +[thehexfiles.net] + [thequidditchpitch.org] ## Some sites require login (or login for some rated stories) The ## program can prompt you, or you can save it in config. In From ffbf5ec3b597e2372bcbef5582b2b114000f6524 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 25 May 2012 17:04:24 -0500 Subject: [PATCH 477/482] Added tag calibre-plugin-1.5.24 for changeset 6ac620498226 From ae464a718afbfce606f0f2cf6e2596f7a4b0a896 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 25 May 2012 17:04:50 -0500 Subject: [PATCH 478/482] Added tag FanFictionDownLoader-4.4.12 for changeset 6ac620498226 From 617ba5a1494caa412b160bf9210d9939b45cbac7 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 27 May 2012 15:46:08 -0500 Subject: [PATCH 479/482] Fix ffnet genre, characters, status. --- calibre-plugin/__init__.py | 2 +- .../adapters/adapter_fanfictionnet.py | 24 ++++++++++++------- index.html | 4 ++-- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py index bdc4f3b9..bf63bbaf 100644 --- a/calibre-plugin/__init__.py +++ b/calibre-plugin/__init__.py @@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase): description = 'UI plugin to download FanFiction stories from various sites.' supported_platforms = ['windows', 'osx', 'linux'] author = 'Jim Miller' - version = (1, 5, 24) + version = (1, 5, 25) minimum_calibre_version = (0, 8, 30) #: This field defines the GUI plugin class that contains all the code diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py index ee0d59cf..80126d7d 100644 --- a/fanficdownloader/adapters/adapter_fanfictionnet.py +++ b/fanficdownloader/adapters/adapter_fanfictionnet.py @@ -205,21 +205,29 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): if metatext == None: # indicates there's no Reviews, look for id: instead. metatext = a.findNext(text=re.compile(r' - id:')) + m = re.match(r" - (?P[^ ]+)",metatext) + if m.group('lang') != None: + self.story.setMetadata('language',m.group('lang')) + # after Rating, the same bit of text containing id:123456 contains # Complete--if completed. - if 'Complete' in a.findNext(text=re.compile(r'id:'+self.story.getMetadata('storyId'))): + if 'Complete' in soup.find(text=re.compile(r'id:'+self.story.getMetadata('storyId'))): self.story.setMetadata('status', 'Completed') else: self.story.setMetadata('status', 'In-Progress') + # Parse genre(s) from # # (fp) # # - m = re.match(r"^(?:Chapter \d+ of a|A) (?:.*?) (?:- (?P.*?) )?(?:crossover )?(?:fan)?fiction(?P[ ]+with characters)?", + # + # Chapter 1 of a SpongeBob SquarePants - Romance/Humor fanfiction with characters SpongeBob. Bob Esponja tiene un admirador secreto ¿quien será?. update existing id:1684 + m = re.match(r"^(?:Chapter \d+ of a|A) (?:.*?) ?(?:- (?P.*?) )?(?:crossover )?(?:fan)?fiction(?P[ ]+with characters)?", soup.find('meta',{'name':'description'})['content']) + #print("meta desc:%s"%soup.find('meta',{'name':'description'})['content']) if m != None: genres=m.group('genres') if genres != None: @@ -238,15 +246,13 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter): # - English - Shinji H. - Updated: 01-13-12 - Published: 12-20-11 - id:7654123 # - English - Adventure/Angst - Ichigo K. & Neliel T. O./Nel - Reviews: # - English - Humor/Adventure - Harry P. & Ironhide - Reviews: - mc = re.match(r" - (?P[^ ]+ - )(?P[^ ]+ - )? (?P.+?) - (Reviews|Updated|Published)", + # - Spanish - Romance/Humor - SpongeBob - Reviews: + #print("metatext:%s"%metatext) + mc = re.match(r" - (?P[^ ]+ - )(?P[^ ]+ - )? ?(?P.+?) - (Reviews|Updated|Published)", metatext) chars = mc.group("chars") - for c in chars.split(' & '): - self.story.addToList('characters',c) - m = re.match(r" - (?P[^ ]+)",metatext) - if m.group('lang') != None: - self.story.setMetadata('language',m.group('lang')) - + for c in chars.split('&'): + self.story.addToList('characters',c.strip()) return def getChapterText(self, url): diff --git a/index.html b/index.html index 41e9a9e8..46b2faca 100644 --- a/index.html +++ b/index.html @@ -54,9 +54,9 @@ much easier.

      -

      New Site

      +

      Fix for fanfiction.net Genre, Characters and Status

      - Now supporting thehexfiles.net, that brings us up to an even 40 supported sites. Thanks again, Ida. + fanfiction.net changed their format yesterday or today(May 26/27) enough to break capture of Genre, Characters and Status. This version fixes it.

      Questions? Check out our From fb3b0f82dd5d8d58cdb29b0f44516f5339726849 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 27 May 2012 15:46:34 -0500 Subject: [PATCH 480/482] Added tag calibre-plugin-1.5.25 for changeset 6c79e16c256c From 536e5cf4b99e21bbff17244f4b8303827f159cf7 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sun, 27 May 2012 15:46:50 -0500 Subject: [PATCH 481/482] Added tag FanFictionDownLoader-4.4.12a for changeset 6c79e16c256c From b39ba3b1069d848f0c7c5fb4daa21d43e5b8c941 Mon Sep 17 00:00:00 2001 From: Ida Date: Tue, 29 May 2012 21:14:52 -0400 Subject: [PATCH 482/482] First version of the adapter for the dokuga.com --- .../adapters/adapter_dokugacom.py | 218 ++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 fanficdownloader/adapters/adapter_dokugacom.py diff --git a/fanficdownloader/adapters/adapter_dokugacom.py b/fanficdownloader/adapters/adapter_dokugacom.py new file mode 100644 index 00000000..758aa1fa --- /dev/null +++ b/fanficdownloader/adapters/adapter_dokugacom.py @@ -0,0 +1,218 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import logging +import re +import urllib2 + +from .. import BeautifulSoup as bs +from ..htmlcleanup import stripHTML +from .. import exceptions as exceptions + +from base_adapter import BaseSiteAdapter, makeDate + +def getClass(): + return DokugaComAdapter + +# Class name has to be unique. Our convention is camel case the +# sitename with Adapter at the end. www is skipped. +class DokugaComAdapter(BaseSiteAdapter): # XXX + + def __init__(self, config, url): + BaseSiteAdapter.__init__(self, config, url) + + self.decode = ["Windows-1252", + "utf8"] # 1252 is a superset of iso-8859-1. + # Most sites that claim to be + # iso-8859-1 (and some that claim to be + # utf8) are really windows-1252. + self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + self.password = "" + self.is_adult=False + + # get storyId from url--url validation guarantees query is only sid=1234 + self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3]) + logging.debug("storyId: (%s)"%self.story.getMetadata('storyId')) + self.story.setMetadata('section',self.parsedUrl.path.split('/',)[1]) + + # normalized story URL. + self._setURL('http://' + self.getSiteDomain() + '/'+self.parsedUrl.path.split('/',)[1]+'/story/'+self.story.getMetadata('storyId')) + + # Each adapter needs to have a unique site abbreviation. + self.story.setMetadata('siteabbrev','dkg') + + # If all stories from the site fall into the same category, + # the site itself isn't likely to label them as such, so we + # do. + self.story.addToList("category","InuYasha") + + # The date format will vary from site to site. + # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + if 'fanfiction' in self.story.getMetadata('section'): + self.dateformat = "%d %b %Y" + else: + self.dateformat = "%m-%d-%y" + + @staticmethod # must be @staticmethod, don't remove it. + def getSiteDomain(): + # The site domain. Does have www here, if it uses it. + return 'www.dokuga.com' + + def getSiteExampleURLs(self): + return "http://"+self.getSiteDomain()+"/fanfiction/story/1234/1 http://"+self.getSiteDomain()+"/spark/story/1234/1" + + def getSiteURLPattern(self): + return r"http://"+self.getSiteDomain()+"/(fanfiction|spark)?/story/\d+/?\d+?$" + + + ## Getting the chapter list and the meta data, plus 'is adult' checking. + def extractChapterUrlsAndMetadata(self): + + # index=1 makes sure we see the story chapter index. Some + # sites skip that for one-chapter stories. + url = self.url + logging.debug("URL: "+url) + + try: + data = self._fetchUrl(url) + except urllib2.HTTPError, e: + if e.code == 404: + raise exceptions.StoryDoesNotExist(self.url) + else: + raise e + + if "Access denied. This story has not been validated by the adminstrators of this site." in data: + raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") + + # use BeautifulSoup HTML parser to make everything easier to find. + soup = bs.BeautifulSoup(data) + # print data + + # Now go hunting for all the meta data and the chapter list. + + ## Title and author + a = soup.find('div', {'align' : 'center'}).find('h3') + + # Find authorid and URL from... author url. + aut = a.find('a') + self.story.setMetadata('authorId',aut['href'].split('=')[1]) + alink='http://'+self.host+aut['href'] + self.story.setMetadata('authorUrl','http://'+self.host+aut['href']) + self.story.setMetadata('author',aut.string) + aut.extract() + + a = a.string[:(len(a.string)-4)] + self.story.setMetadata('title',a) + + # Find the chapters: + chapters = soup.find('select').findAll('option') + if len(chapters)==1: + self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/'+self.story.getMetadata('section')+'/story/'+self.story.getMetadata('storyId')+'/1')) + else: + for chapter in chapters: + # just in case there's tags, like in chapter titles. /fanfiction/story/7406/1 + self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+self.story.getMetadata('section')+'/story/'+self.story.getMetadata('storyId')+'/'+chapter['value'])) + + self.story.setMetadata('numChapters',len(self.chapterUrls)) + + asoup = bs.BeautifulSoup(self._fetchUrl(alink)) + + if 'fanfiction' in self.story.getMetadata('section'): + asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div') + + #grab the rest of the metadata from the author's page + for div in asoup.findAll('div'): + nav=div.find('a', href=re.compile(r'/fanfiction/story/'+self.story.getMetadata('storyId')+"/1$")) + if nav != None: + break + div=div.nextSibling + self.setDescription(url,div) + + div=div.nextSibling + self.story.setMetadata('rating', div.text.split('Rating: ')[1].split('&')[0]) + + iscomp=div.text.split('Status: ')[1].split('&')[0] + if 'Complete' in iscomp: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + self.story.addToList('category', div.text.split('Category: ')[1].split('&')[0]) + self.story.addToList('category', 'Fanfiction') + + self.story.setMetadata('datePublished', makeDate(stripHTML(div.text.split('Created: ')[1].split('&')[0]), self.dateformat)) + + self.story.setMetadata('dateUpdated', makeDate(stripHTML(div.text.split('Updated: ')[1]), self.dateformat)) + + div=div.nextSibling.nextSibling + self.story.setMetadata('numWords', div.text.split('Words: ')[1].split('&')[0]) + + for genre in div.text.split('Genre: ')[1].split('&')[0].split(', '): + self.story.addToList('genre',genre) + + else: + asoup=asoup.find('div', {'id' : 'maincol'}).find('div', {'class' : 'padding'}) + for div in asoup.findAll('div'): + nav=div.find('a', href=re.compile(r'/spark/story/'+self.story.getMetadata('storyId')+"/1$")) + if nav != None: + break + + div=div.nextSibling.nextSibling + self.setDescription(url,div) + self.story.addToList('category', 'Spark') + + div=div.nextSibling.nextSibling + self.story.setMetadata('rating', div.text.split('Rating: ')[1].split(' - ')[0]) + + iscomp=div.text.split('Status: ')[1].split(' - ')[0] + if 'Complete' in iscomp: + self.story.setMetadata('status', 'Completed') + else: + self.story.setMetadata('status', 'In-Progress') + + for genre in div.text.split('Genre: ')[1].split(' - ')[0].split('/'): + self.story.addToList('genre',genre) + + div=div.nextSibling.nextSibling + + date=div.text.split('Updated: ')[1].split(' -')[0] + self.story.setMetadata('dateUpdated', makeDate(date, self.dateformat)) + + # does not have published date anywhere + self.story.setMetadata('datePublished', makeDate(date, self.dateformat)) + + self.story.setMetadata('numWords', div.text.split('Words ')[1]) + + + + + + # grab the text for an individual chapter. + def getChapterText(self, url): + + logging.debug('Getting chapter text from: %s' % url) + + soup = bs.BeautifulStoneSoup(self._fetchUrl(url), + selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. + + div = soup.find('div', {'id' : 'chtext'}) + + if None == div: + raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) + + return self.utf8FromSoup(url,div)