diff --git a/fanficfare/exceptions.py b/fanficfare/exceptions.py index 876711d5..c68b22ac 100644 --- a/fanficfare/exceptions.py +++ b/fanficfare/exceptions.py @@ -24,6 +24,13 @@ class FailedToDownload(Exception): def __str__(self): return self.error +class RejectImage(Exception): + def __init__(self,error): + self.error=error + + def __str__(self): + return self.error + class InvalidStoryURL(Exception): def __init__(self,url,domain,example): self.url=url diff --git a/fanficfare/story.py b/fanficfare/story.py index 9d78fd86..84f35894 100644 --- a/fanficfare/story.py +++ b/fanficfare/story.py @@ -44,6 +44,8 @@ imagetypes = { try: from calibre.utils.magick import Image + from StringIO import StringIO + from gif import GifInfo, CHECK_IS_ANIMATED convtype = {'jpg':'JPG', 'png':'PNG'} def convert_image(url,data,sizes,grayscale, @@ -55,6 +57,10 @@ try: owidth, oheight = img.size nwidth, nheight = sizes scaled, nwidth, nheight = fit_image(owidth, oheight, nwidth, nheight) + + if normalize_format_name(img.format)=="gif" and GifInfo(StringIO(data),CHECK_IS_ANIMATED).frameCount > 1: + raise exceptions.RejectImage("Animated gifs come out purely--not going to use it.") + if scaled: img.size = (nwidth, nheight) export = True @@ -224,7 +230,7 @@ langs = { "Devanagari":"hi", ## These are from/for AO3: - + u'العربية':'ar', u'беларуская':'be', u'Български език':'bg', @@ -325,7 +331,7 @@ class InExMatch: else: retval = self.match == value #print(">>>>>>>>>>>>>%s==%s r: %s,%s=%s"%(self.match,value,self.negate,retval, self.negate != retval)) - + return self.negate != retval def __str__(self): @@ -338,7 +344,7 @@ class InExMatch: else: s='=' return u'InExMatch(%s %s%s %s)'%(self.keys,f,s,self.match) - + ## metakey[,metakey]=~pattern ## metakey[,metakey]==string ## *for* part lines. Effect only when trailing conditional key=~regexp matches @@ -358,7 +364,7 @@ def set_in_ex_clude(setting): match = InExMatch(line) dest.append([match,condmatch]) return dest - + ## Two or three part lines. Two part effect everything. ## Three part effect only those key(s) lists. ## pattern=>replacement @@ -433,7 +439,7 @@ class Story(Configurable): def join_list(self, key, vallist): return self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ').join(map(unicode, vallist)) - + def setMetadata(self, key, value, condremoveentities=True): # keep as list type, but set as only value. @@ -445,20 +451,20 @@ class Story(Configurable): self.metadata[key]=conditionalRemoveEntities(value) else: self.metadata[key]=value - + if key == "language": try: # getMetadata not just self.metadata[] to do replace_metadata. self.setMetadata('langcode',langs[self.getMetadata(key)]) except: self.setMetadata('langcode','en') - + if key == 'dateUpdated' and value: # Last Update tags for Bill. self.addToList('lastupdate',value.strftime("Last Update Year/Month: %Y/%m")) self.addToList('lastupdate',value.strftime("Last Update: %Y/%m/%d")) - + def do_in_ex_clude(self,which,value,key): if value and which in self.in_ex_cludes: include = 'include' in which @@ -487,7 +493,7 @@ class Story(Configurable): if include and keyfound and not found: value = None return value - + def doReplacements(self,value,key,return_list=False,seen_list=[]): value = self.do_in_ex_clude('include_metadata_pre',value,key) @@ -526,7 +532,7 @@ class Story(Configurable): # print("replacement,value:%s,%s->%s"%(replacement,value,regexp.sub(replacement,value))) value = regexp.sub(replacement,value) retlist = [value] - + for val in retlist: retlist = map(partial(self.do_in_ex_clude,'include_metadata_post',key=key),retlist) retlist = map(partial(self.do_in_ex_clude,'exclude_metadata_post',key=key),retlist) @@ -610,7 +616,7 @@ class Story(Configurable): self.getMetadata('author', removeallentities, doreplacements))) self.extendList("extratags",self.getConfigList("extratags")) - + if self.getMetadataRaw('seriesUrl'): self.setMetadata('seriesHTML',linkhtml%('series',self.getMetadata('seriesUrl', removeallentities, doreplacements), self.getMetadata('series', removeallentities, doreplacements))) @@ -703,10 +709,10 @@ class Story(Configurable): for val in retlist: newretlist.extend(self.doReplacements(val,listname,return_list=True)) retlist = newretlist - + if removeallentities: retlist = map(removeAllEntities,retlist) - + retlist = filter( lambda x : x!=None and x!='' ,retlist) # reorder ships so b/a and c/b/a become a/b and a/b/c. Only on '/', diff --git a/included_dependencies/gif.py b/included_dependencies/gif.py new file mode 100644 index 00000000..406bab2c --- /dev/null +++ b/included_dependencies/gif.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +A pure Python GIF metadata extractor. +Supports adjustable detail to fine-tune performance. + +Example code and full epydoc docstrings included. + +Uses: + - Identifying whether a GIF is static or animated. + - Extracting the dimensions, pixel aspect ratio, number of frames, loop count, + global palette or palette size, and background color. + - Extracting comments and other plaintext. + - Testing for various structural errors. + +TODO: + - Provide basic support for XMP Metadata extraction + - http://en.wikipedia.org/wiki/Extensible_Metadata_Platform#Location_in_file_types + - http://www.matthewflickinger.com/lab/whatsinagif/bits_and_bytes.asp + - Generate test GIF with http://code.google.com/p/python-xmp-toolkit/ + +Changelog: + - 0.2.2: Audited the code and made some corrections. + - 0.2.1: 40% speed improvement (went from 15 to 9 seconds for 1000 images) + - 0.2.0: Feature-complete + - 0.1.0: Initial release +""" + +__appname__ = "gif.py" +__author__ = "Stephan Sokolow (deitarion/SSokolow)" +__version__ = "0.2.2" +__license__ = "PSF License 2.4 or higher (The Python License)" + +#{ Check Types (enum, numerical ordering is significant) +CHECK_IS_GIF_FILE = 0 #: Just check for a valid GIF header. +CHECK_IS_ANIMATED = 1 #: Check whether the file has more than one frame. +CHECK_COUNT_FRAMES = 2 #: Count the number of frames in the file. +CHECK_PARSE_PALETTE = 3 #: Parse the palette and resolve the background color. +CHECK_READ_COMMENTS = 4 #: Load comments (can sometimes be large) into the L{GifInfo} object. +CHECK_READ_ALL_TEXT = 5 #: Also load the contents of Plain Text extension blocks. + +CHECK_ALL = CHECK_READ_ALL_TEXT #: alias to allow for future modifications + +#{ Warning Codes (bitfield) +WARN_NONE = 0 #: No warnings +WARN_BAD_IMG = 1 #: Corruption (of the [sub]block size field(s)) or truncation detected in an image block +WARN_BAD_EXT = 2 #: Corruption (of the [sub]block size field(s)) or truncation detected in an extension block +WARN_BAD_SIZE = 4 #: An image block specifies dimensions exceeding the global canvas size +WARN_BAD_BGCOLOR = 8 #: The background color index specified is greater than the palette size +WARN_EOF = 16 #: File is missing it's trailer. (Corrupt elsewhere, truncated, or breaking spec by using EOF as the terminator.) +WARN_TRUNC = 32 #: File is definitively either truncated or corrupt. (An EOF was encountered part-way through a structure.) +WARN_LOOP_POS = 64 #: Netscape Application Extension block (animation-control) was present but not first in the file. +#} + +import struct + +#{ Structures used by GifInfo +gifHeaderStruct = struct.Struct('= CHECK_PARSE_PALETTE: + self.palette = [] + for pos in range(0, self.paletteSize): + self.palette.append(gifColorTripleStruct.unpack_from(rawPalette, pos * 3)) + + if self.paletteSize and bgColor > self.paletteSize: + self.warnFlags = self.warnFlags | WARN_BAD_BGCOLOR + elif self.palette: + self.bgColor = self.palette[bgColor] + + # Iterate blocks + self.firstBlock = True + blocktype = self._read(fh, 1) + while not blocktype == chr(0x3B) and not self.warnFlags & WARN_EOF: + self._blockHandlers.get(blocktype, lambda x, y:'')(self, fh) + if self.checkLevel <= CHECK_IS_ANIMATED and self.frameCount > 1: + return + + self.firstBlock = False + blocktype = self._read(fh, 1) + + del self.firstBlock + + def _handleImageBlock(self, fh): + """""" + self.frameCount += 1 + try: + x, y, w, h, LCTF_Byte = gifImageStruct.unpack(self._read(fh, gifImageStruct.size)) + except: + self.warnFlags = self.warnFlags | WARN_EOF | WARN_TRUNC + return + + if x + w > self.width or y + h > self.height: + self.warnFlags = self.warnFlags | WARN_BAD_SIZE + + self._getPalette(fh, LCTF_Byte) # Skip the local color table if present + fh.read(1) # Skip the LZW minimum code size. + + # Skip content and test for the block terminator + if not self._skipSubBlocks(fh): # For example, if it's a zero-length string like EOF would return. + self.warnFlags = self.warnFlags | WARN_BAD_IMG + + def _handleGenericExtensionBlock(self, fh): + """@todo: Rewrite this so extension block types have method handlers.""" + try: + extType, blkSize = gifExtenStruct.unpack(self._read(fh, gifExtenStruct.size)) + except: + self.warnFlags = self.warnFlags | WARN_EOF | WARN_TRUNC + return + startOffset = fh.tell() + + if extType == 0x01 and self.checkLevel >= CHECK_READ_ALL_TEXT: # Plain Text Block + self._read(fh, gifPlaintextStruct.size) + self.otherText = self.otherText or [] + blkSize = self._read(fh, 1) + while blkSize and blkSize != '\x00': + self.otherText.append(self._read(fh, ord(blkSize))) + blkSize = self._read(fh, 1) + elif extType == 0xFE and self.checkLevel >= CHECK_READ_COMMENTS: # Comment Block + self.comments = self.comments or [] + blkSize = self._read(fh, 1) + while blkSize and blkSize != '\x00': + self.comments.append(self._read(fh, ord(blkSize))) + blkSize = self._read(fh, 1) + elif extType == 0xFF: # Application Block + if blkSize == 0x0B and self._read(fh, blkSize) == "NETSCAPE2.0": + try: + a, b, self.loopCount = gifNetscapeStruct.unpack(self._read(fh, gifNetscapeStruct.size)) + except: + self.warnFlags = self.warnFlags | WARN_EOF | WARN_TRUNC + return + + if a != 3 and b != 1: + self.warnFlags = self.warnFlags | WARN_BAD_EXT + + if not self.firstBlock: + self.warnFlags = self.warnFlags | WARN_LOOP_POS + else: + fh.seek( startOffset + blkSize ) # Skip the contents + + # Test for the block terminator + if not self._skipSubBlocks(fh): + self.warnFlags = self.warnFlags | WARN_BAD_EXT + + def _getPalette(self, handle, bitfield): + """Using the size value from C{bitfield}, + load the palette at C{handle}'s current file pointer position.""" + if bitfield & int("10000000", 2): + nBits = bitfield & int("00000111", 2) + tableSize = 3 * 2**( nBits + 1 ) + return handle.read(tableSize) + else: + return '' + + def _read(self, handle, size): + """Attempt to read the specified number of bytes. Set L{WARN_EOF} if + fewer are received.""" + content = handle.read(size) + if len(content) < size: + self.warnFlags = self.warnFlags | WARN_EOF + return content + + def _skipSubBlocks(self, handle): + """Skip sub-blocks beginning at the current file pointer position + using fseek.""" + offset = handle.tell() + blkSize = handle.read(1) + while blkSize and blkSize != '\x00': + offset += ord(blkSize) + 1 + handle.seek(offset) + blkSize = handle.read(1) + return blkSize + + _blockHandlers = { + chr(0x2C) : _handleImageBlock, + chr(0x21) : _handleGenericExtensionBlock, + } + +def gif_is_animated(path): + """A simple convenience function for testing whether a GIF is animated. + @rtype: C{bool} + """ + return GifInfo(file(path,'rb'), CHECK_IS_ANIMATED).frameCount > 1 + +if __name__ == '__main__': + from optparse import OptionParser + parser = OptionParser(description=__doc__.split('\n\n')[0], + version="%%prog v%s" % __version__, usage="%prog ...") + + opts, args = parser.parse_args() + + if args: + for fpath in args: + try: + info = GifInfo(fpath, CHECK_COUNT_FRAMES) + warnFlags = ( + (info.warnFlags & WARN_BAD_IMG and 'I' or ' ') + + (info.warnFlags & WARN_BAD_EXT and 'X' or ' ') + + (info.warnFlags & WARN_BAD_SIZE and 'C' or ' ') + + (info.warnFlags & WARN_BAD_BGCOLOR and 'B' or ' ') + + (info.warnFlags & WARN_EOF and 'E' or ' ') + + (info.warnFlags & WARN_TRUNC and 'T' or ' ') + + (info.warnFlags & WARN_LOOP_POS and 'L' or ' ') + ) + print "[%s](%3s Frames): %s" % (warnFlags, info.frameCount, info.path) + except BadHeaderException, err: + print "%s: %s" % (str(err), fpath) + print "\nWarning Flags:" + print " I = Image Chunk Corruption/Truncation" + print " X = Extension Chunk Corruption/Truncation" + print " C = Image Chunk Dimensions Exceed Global Canvas" + print " B = Bad Background Color (Index Exceeds Palette Size)" + print " E = Unexpected EOF Encountered (Missing Image Terminator)" + print " T = EOF Encountered Within A Block Header (Corrupt or Truncated File)" + print " L = Loop-control block misplaced within the file" + print + print "Note: A nearly-threefold speed-up can be had by using CHECK_IS_ANIMATED rather than CHECK_COUNT_FRAMES" diff --git a/makeplugin.py b/makeplugin.py index 56c8efb1..9350d6a8 100644 --- a/makeplugin.py +++ b/makeplugin.py @@ -36,7 +36,7 @@ if __name__=="__main__": os.chdir('../included_dependencies') # 'a' for append - files=['six.py','bs4','html5lib','chardet'] + files=['gif.py','six.py','bs4','html5lib','chardet'] createZipFile("../"+filename,"a", files, exclude=exclude)