diff --git a/recipes/hvg.recipe b/recipes/hvg.recipe new file mode 100644 index 0000000000..8e9218d9c3 --- /dev/null +++ b/recipes/hvg.recipe @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class HVG(BasicNewsRecipe): + title = 'HVG.HU' + __author__ = u'István Papp' + description = u'Friss hírek a HVG-től' + timefmt = ' [%Y. %b. %d., %a.]' + oldest_article = 4 + language = 'hu' + + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf8' + publisher = 'HVG Online' + category = u'news, hírek, hvg' + extra_css = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] + remove_tags_before = dict(id='pg-content') + remove_javascript = True + remove_empty_feeds = True + + feeds = [ + (u'Itthon', u'http://hvg.hu/rss/itthon') + ,(u'Világ', u'http://hvg.hu/rss/vilag') + ,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag') + ,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany') + ,(u'Panoráma', u'http://hvg.hu/rss/Panorama') + ,(u'Karrier', u'http://hvg.hu/rss/karrier') + ,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia') + ,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek') + ,(u'Kultúra', u'http://hvg.hu/rss/kultura') + ,(u'Cégautó', u'http://hvg.hu/rss/cegauto') + ,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv') + ,(u'Egészség', u'http://hvg.hu/rss/egeszseg') + ,(u'Vélemény', u'http://hvg.hu/rss/velemeny') + ,(u'Sport', u'http://hvg.hu/rss/sport') + ] + + def print_version(self, url): + return url.replace ('#rss', '/print') + diff --git a/recipes/lwn_weekly.recipe b/recipes/lwn_weekly.recipe index 28ee35802a..7363062346 100644 --- a/recipes/lwn_weekly.recipe +++ b/recipes/lwn_weekly.recipe @@ -23,6 +23,11 @@ class WeeklyLWN(BasicNewsRecipe): remove_tags_after = dict(attrs={'class':'ArticleText'}) remove_tags = [dict(name=['h2', 'form'])] + preprocess_regexps = [ + # Remove the
and "Log in to post comments" + (re.compile(r'> sys.stderr, e parser.print_help() return 1 @@ -291,13 +291,13 @@ def main(): elif args[1].startswith("prs500:"): try: infile = open(args[0], "rb") - except IOError, e: + except IOError as e: print >> sys.stderr, e parser.print_help() return 1 try: dev.put_file(infile, args[1][7:]) - except PathError, err: + except PathError as err: if options.force and 'exists' in str(err): dev.del_file(err.path, False) dev.put_file(infile, args[1][7:]) @@ -355,7 +355,7 @@ def main(): return 1 except DeviceLocked: print >> sys.stderr, "The device is locked. Use the --unlock option" - except (ArgumentError, DeviceError), e: + except (ArgumentError, DeviceError) as e: print >>sys.stderr, e return 1 return 0 diff --git a/src/calibre/devices/prs500/driver.py b/src/calibre/devices/prs500/driver.py index 65ecc98a81..aaba094fb3 100644 --- a/src/calibre/devices/prs500/driver.py +++ b/src/calibre/devices/prs500/driver.py @@ -177,7 +177,7 @@ def run_session(*args, **kwargs): dev.send_validated_command(BeginEndSession(end=True)) dev.in_session = False raise - except USBError, err: + except USBError as err: if "No such device" in str(err): raise DeviceError() elif "Connection timed out" in str(err): @@ -272,7 +272,7 @@ def open(self, library_uuid) : self.bulk_read_max_packet_size = red.MaxPacketSize self.bulk_write_max_packet_size = wed.MaxPacketSize self.handle.claim_interface(self.INTERFACE_ID) - except USBError, err: + except USBError as err: raise DeviceBusy(str(err)) # Large timeout as device may still be initializing res = self.send_validated_command(GetUSBProtocolVersion(), timeout=20000) @@ -303,7 +303,7 @@ def close(self): try: self.handle.reset() self.handle.release_interface(self.INTERFACE_ID) - except Exception, err: + except Exception as err: print >> sys.stderr, err self.handle, self.device = None, None self.in_session = False @@ -509,7 +509,7 @@ def get_file(self, path, outfile, end_session=True): outfile.write("".join(map(chr, packets[0][16:]))) for i in range(1, len(packets)): outfile.write("".join(map(chr, packets[i]))) - except IOError, err: + except IOError as err: self.send_validated_command(FileClose(_id)) raise ArgumentError("File get operation failed. " + \ "Could not write to local location: " + str(err)) @@ -656,7 +656,7 @@ def _exists(self, path): dest = None try: dest = self.path_properties(path, end_session=False) - except PathError, err: + except PathError as err: if "does not exist" in str(err) or "not mounted" in str(err): return (False, None) else: raise diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 37b2b061e5..c46e9539c9 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -124,11 +124,11 @@ def _windows_space(cls, prefix): if not prefix: return 0, 0 prefix = prefix[:-1] - win32file = __import__('win32file', globals(), locals(), [], -1) + import win32file try: sectors_per_cluster, bytes_per_sector, free_clusters, total_clusters = \ win32file.GetDiskFreeSpace(prefix) - except Exception, err: + except Exception as err: if getattr(err, 'args', [None])[0] == 21: # Disk not ready time.sleep(3) sectors_per_cluster, bytes_per_sector, free_clusters, total_clusters = \ @@ -771,7 +771,7 @@ def eject_linux(self): for d in drives: try: eject(d) - except Exception, e: + except Exception as e: print 'Udisks eject call for:', d, 'failed:' print '\t', e failures = True diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index c5bac936b5..7776be5e28 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -57,7 +57,7 @@ def __call__(self, ok): buf.open(QBuffer.WriteOnly) image.save(buf, 'JPEG') self.data = str(ba.data()) - except Exception, e: + except Exception as e: self.exception = e self.traceback = traceback.format_exc() finally: diff --git a/src/calibre/ebooks/epub/fix/container.py b/src/calibre/ebooks/epub/fix/container.py index 539d886312..1669290a7b 100644 --- a/src/calibre/ebooks/epub/fix/container.py +++ b/src/calibre/ebooks/epub/fix/container.py @@ -151,7 +151,7 @@ def get(self, name): if name in self.mime_map: try: raw = self._parse(raw, self.mime_map[name]) - except XMLSyntaxError, err: + except XMLSyntaxError as err: raise ParseError(name, unicode(err)) self.cache[name] = raw return raw diff --git a/src/calibre/ebooks/epub/fix/main.py b/src/calibre/ebooks/epub/fix/main.py index fbfe80551d..e4c1a60a77 100644 --- a/src/calibre/ebooks/epub/fix/main.py +++ b/src/calibre/ebooks/epub/fix/main.py @@ -54,7 +54,7 @@ def main(args=sys.argv): epub = os.path.abspath(args[1]) try: run(epub, opts, default_log) - except ParseError, err: + except ParseError as err: default_log.error(unicode(err)) raise SystemExit(1) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 1599d3c896..dd0a247a67 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -110,7 +110,7 @@ def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None): try: with open(self.path, 'rb') as f: src = f.read() - except IOError, err: + except IOError as err: msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err)) if level == 0: raise IOError(msg) @@ -202,7 +202,7 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None) raise IgnoreFile('%s is a binary file'%nf.path, -1) nl.append(nf) flat.append(nf) - except IgnoreFile, err: + except IgnoreFile as err: rejects.append(link) if not err.doesnt_exist or verbose > 1: print repr(err) diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 3be8f85e45..4ee1538e3f 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -332,7 +332,7 @@ def preprocess(self, raw): soup = BeautifulSoup(raw, convertEntities=BeautifulSoup.XHTML_ENTITIES, markupMassage=nmassage) - except ConversionError, err: + except ConversionError as err: if 'Failed to coerce to unicode' in str(err): raw = unicode(raw, 'utf8', 'replace') soup = BeautifulSoup(raw, @@ -935,7 +935,7 @@ def detect_encoding(im): try: im = PILImage.open(path) - except IOError, err: + except IOError as err: self.log.warning('Unable to process image: %s\n%s'%( original_path, err)) return encoding = detect_encoding(im) @@ -953,7 +953,7 @@ def scale_image(width, height): pt.close() self.scaled_images[path] = pt return pt.name - except (IOError, SystemError), err: # PIL chokes on interlaced PNG images as well a some GIF images + except (IOError, SystemError) as err: # PIL chokes on interlaced PNG images as well a some GIF images self.log.warning(_('Unable to process image %s. Error: %s')%(path, err)) if width == None or height == None: @@ -1013,7 +1013,7 @@ def scale_image(width, height): if not self.images.has_key(path): try: self.images[path] = ImageStream(path, encoding=encoding) - except LrsError, err: + except LrsError as err: self.log.warning(_('Could not process image: %s\n%s')%( original_path, err)) return @@ -1768,7 +1768,7 @@ def parse_tag(self, tag, parent_css): tag_css = self.tag_css(tag)[0] # Table should not inherit CSS try: self.process_table(tag, tag_css) - except Exception, err: + except Exception as err: self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err)) self.log.exception('') self.log.debug(_('Bad table:\n%s')%unicode(tag)[:300]) @@ -1858,7 +1858,7 @@ def process_file(path, options, logger): tf.close() tim.save(tf.name) tpath = tf.name - except IOError, err: # PIL sometimes fails, for example on interlaced PNG files + except IOError as err: # PIL sometimes fails, for example on interlaced PNG files logger.warn(_('Could not read cover image: %s'), err) options.cover = None else: diff --git a/src/calibre/ebooks/markdown/markdown.py b/src/calibre/ebooks/markdown/markdown.py index e734079116..677047878a 100644 --- a/src/calibre/ebooks/markdown/markdown.py +++ b/src/calibre/ebooks/markdown/markdown.py @@ -34,7 +34,7 @@ import re, sys, codecs from logging import getLogger, StreamHandler, Formatter, \ - DEBUG, INFO, WARN, ERROR, CRITICAL + DEBUG, INFO, WARN, CRITICAL MESSAGE_THRESHOLD = CRITICAL @@ -95,7 +95,7 @@ def removeBOM(text, encoding): # and uses the actual name of the executable called.) EXECUTABLE_NAME_FOR_USAGE = "python markdown.py" - + # --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ---------- @@ -242,8 +242,6 @@ def setBidi(self, bidi): if bidi: - orig_bidi = self.bidi - if not self.bidi or self.isDocumentElement: # Once the bidi is set don't change it (except for doc element) self.bidi = bidi @@ -319,7 +317,7 @@ def toxml(self): childBuffer += "/>" - + buffer += "<" + self.nodeName if self.nodeName in ['p', 'li', 'ul', 'ol', @@ -330,10 +328,10 @@ def toxml(self): bidi = self.bidi else: bidi = self.doc.bidi - + if bidi=="rtl": self.setAttribute("dir", "rtl") - + for attr in self.attributes: value = self.attribute_values[attr] value = self.doc.normalizeEntities(value, @@ -358,7 +356,7 @@ class TextNode: attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123} def __init__ (self, text): - self.value = text + self.value = text def attributeCallback(self, match): @@ -372,7 +370,7 @@ def toxml(self): text = self.value self.parent.setBidi(getBidiType(text)) - + if not text.startswith(HTML_PLACEHOLDER_PREFIX): if self.parent.nodeName == "p": text = text.replace("\n", "\n ") @@ -413,11 +411,11 @@ def toxml(self): class TextPreprocessor: ''' TextPreprocessors are run before the text is broken into lines. - + Each TextPreprocessor implements a "run" method that takes a pointer to a text string of the document, modifies it as necessary and returns - either the same pointer or a pointer to a new string. - + either the same pointer or a pointer to a new string. + TextPreprocessors must extend markdown.TextPreprocessor. ''' @@ -431,18 +429,18 @@ class Preprocessor: Each preprocessor implements a "run" method that takes a pointer to a list of lines of the document, modifies it as necessary and returns - either the same pointer or a pointer to a new list. - + either the same pointer or a pointer to a new list. + Preprocessors must extend markdown.Preprocessor. ''' def run(self, lines): pass - + class HtmlBlockPreprocessor(TextPreprocessor): """Removes html blocks from the source text and stores it.""" - + def _get_left_tag(self, block): return block[1:].replace(">", " ", 1).split()[0].lower() @@ -451,7 +449,7 @@ def _get_right_tag(self, left_tag, block): return block.rstrip()[-len(left_tag)-2:-1].lower() def _equal_tags(self, left_tag, right_tag): - + if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc. return True if ("/" + left_tag) == right_tag: @@ -467,17 +465,17 @@ def _equal_tags(self, left_tag, right_tag): def _is_oneliner(self, tag): return (tag in ['hr', 'hr/']) - + def run(self, text): new_blocks = [] text = text.split("\n\n") - + items = [] left_tag = '' right_tag = '' in_tag = False # flag - + for block in text: if block.startswith("\n"): block = block[1:] @@ -485,7 +483,7 @@ def run(self, text): if not in_tag: if block.startswith("<"): - + left_tag = self._get_left_tag(block) right_tag = self._get_right_tag(left_tag, block) @@ -497,13 +495,13 @@ def run(self, text): if self._is_oneliner(left_tag): new_blocks.append(block.strip()) continue - + if block[1] == "!": # is a comment block left_tag = "--" right_tag = self._get_right_tag(left_tag, block) # keep checking conditions below and maybe just append - + if block.rstrip().endswith(">") \ and self._equal_tags(left_tag, right_tag): new_blocks.append( @@ -519,9 +517,9 @@ def run(self, text): else: items.append(block.strip()) - + right_tag = self._get_right_tag(left_tag, block) - + if self._equal_tags(left_tag, right_tag): # if find closing tag in_tag = False @@ -532,7 +530,7 @@ def run(self, text): if items: new_blocks.append(self.stash.store('\n\n'.join(items))) new_blocks.append('\n') - + return "\n\n".join(new_blocks) HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor() @@ -605,7 +603,7 @@ def _isLine(self, block): class ReferencePreprocessor(Preprocessor): - ''' + ''' Removes reference definitions from the text and stores them for later use. ''' @@ -760,7 +758,7 @@ def handleMatch(self, m, doc): return el -class DoubleTagPattern (SimpleTagPattern): +class DoubleTagPattern (SimpleTagPattern): def handleMatch(self, m, doc): tag1, tag2 = self.tag.split(",") @@ -775,7 +773,6 @@ class HtmlPattern (Pattern): def handleMatch (self, m, doc): rawhtml = m.group(2) - inline = True place_holder = self.stash.store(rawhtml) return doc.createTextNode(place_holder) @@ -926,11 +923,11 @@ def handleMatch(self, m, doc): class Postprocessor: ''' Postprocessors are run before the dom it converted back into text. - + Each Postprocessor implements a "run" method that takes a pointer to a - NanoDom document, modifies it as necessary and returns a NanoDom + NanoDom document, modifies it as necessary and returns a NanoDom document. - + Postprocessors must extend markdown.Postprocessor. There are currently no standard post-processors, but the footnote @@ -945,10 +942,10 @@ def run(self, dom): class TextPostprocessor: ''' TextPostprocessors are run after the dom it converted back into text. - + Each TextPostprocessor implements a "run" method that takes a pointer to a text string, modifies it as necessary and returns a text string. - + TextPostprocessors must extend markdown.TextPostprocessor. ''' @@ -971,7 +968,7 @@ def run(self, text): html = '' else: html = HTML_REMOVED_TEXT - + text = text.replace("

%s\n

" % (HTML_PLACEHOLDER % i), html + "\n") text = text.replace(HTML_PLACEHOLDER % i, html) @@ -1031,7 +1028,6 @@ def _findHead(self, lines, fn, allowBlank=0): remainder of the original list""" items = [] - item = -1 i = 0 # to keep track of where we are @@ -1187,7 +1183,7 @@ def __init__(self, source=None, # depreciated RAWHTMLTEXTPOSTPROCESSOR] self.prePatterns = [] - + self.inlinePatterns = [DOUBLE_BACKTICK_PATTERN, BACKTICK_PATTERN, @@ -1241,7 +1237,7 @@ def registerExtensions(self, extensions, configs): configs_for_ext = configs[ext] else: configs_for_ext = [] - extension = module.makeExtension(configs_for_ext) + extension = module.makeExtension(configs_for_ext) extension.extendMarkdown(self, globals()) @@ -1310,7 +1306,7 @@ def _transform(self): else: buffer.append(line) self._processSection(self.top_element, buffer) - + #self._processSection(self.top_element, self.lines) # Not sure why I put this in but let's leave it for now. @@ -1426,7 +1422,7 @@ def _processParagraph(self, parent_elem, paragraph, inList, looseList): for item in list: el.appendChild(item) - + def _processUList(self, parent_elem, lines, inList): self._processList(parent_elem, lines, inList, @@ -1458,7 +1454,7 @@ def _processList(self, parent_elem, lines, inList, listexpr, tag): i = 0 # a counter to keep track of where we are - for line in lines: + for line in lines: loose = 0 if not line.strip(): @@ -1477,7 +1473,7 @@ def _processList(self, parent_elem, lines, inList, listexpr, tag): # Check if the next non-blank line is still a part of the list if ( RE.regExp['ul'].match(next) or - RE.regExp['ol'].match(next) or + RE.regExp['ol'].match(next) or RE.regExp['tabbed'].match(next) ): # get rid of any white space in the line items[item].append(line.strip()) @@ -1618,7 +1614,7 @@ def _handleInline (self, line, patternIndex=0): i = 0 while i < len(parts): - + x = parts[i] if isinstance(x, (str, unicode)): @@ -1641,14 +1637,14 @@ def _handleInline (self, line, patternIndex=0): parts[i] = self.doc.createTextNode(x) return parts - + def _applyPattern(self, line, pattern, patternIndex): """ Given a pattern name, this function checks if the line fits the pattern, creates the necessary elements, and returns back a list consisting of NanoDom elements and/or strings. - + @param line: the text to be processed @param pattern: the pattern to be checked @@ -1676,19 +1672,19 @@ def _applyPattern(self, line, pattern, patternIndex): if not node.nodeName in ["code", "pre"]: for child in node.childNodes: if isinstance(child, TextNode): - + result = self._handleInline(child.value, patternIndex+1) - + if result: if result == [child]: continue - + result.reverse() #to make insertion easier position = node.childNodes.index(child) - + node.removeChild(child) for item in result: @@ -1699,7 +1695,7 @@ def _applyPattern(self, line, pattern, patternIndex): self.doc.createTextNode(item)) else: node.insertChild(position, item) - + @@ -1798,14 +1794,14 @@ def markdownFromFile(input = None, def markdown(text, extensions = [], safe_mode = False): - + message(DEBUG, "in markdown.markdown(), received text:\n%s" % text) extension_names = [] extension_configs = {} - + for ext in extensions: - pos = ext.find("(") + pos = ext.find("(") if pos == -1: extension_names.append(ext) else: @@ -1820,7 +1816,7 @@ def markdown(text, safe_mode = safe_mode) return md.convert(text) - + class Extension: @@ -1845,26 +1841,11 @@ def setConfig(self, key, value): For lower versions of Python use: %s INPUT_FILE > OUTPUT_FILE - + """ % EXECUTABLE_NAME_FOR_USAGE def parse_options(): - - try: - optparse = __import__("optparse") - except: - if len(sys.argv) == 2: - return {'input': sys.argv[1], - 'output': None, - 'message_threshold': CRITICAL, - 'safe': False, - 'extensions': [], - 'encoding': None } - - else: - print OPTPARSE_WARNING - return None - + import optparse parser = optparse.OptionParser(usage="%prog INPUTFILE [options]") parser.add_option("-f", "--file", dest="filename", @@ -1881,7 +1862,7 @@ def parse_options(): parser.add_option("-s", "--safe", dest="safe", default=False, metavar="SAFE_MODE", help="same mode ('replace', 'remove' or 'escape' user's HTML tag)") - + parser.add_option("--noisy", action="store_const", const=DEBUG, dest="verbose", help="print debug messages") @@ -1914,14 +1895,14 @@ def main(): if not options: sys.exit(0) - + markdownFromFile(**options) if __name__ == '__main__': sys.exit(main()) """ Run Markdown from the command line. """ - + diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py index 8e4dd1dd27..4100439feb 100644 --- a/src/calibre/ebooks/metadata/amazon.py +++ b/src/calibre/ebooks/metadata/amazon.py @@ -108,7 +108,7 @@ def _get_cover_url(br, asin): q = 'http://amzn.com/'+asin try: raw = br.open_novisit(q).read() - except Exception, e: + except Exception as e: if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: return None @@ -139,7 +139,7 @@ def get_metadata(br, asin, mi): q = 'http://amzn.com/'+asin try: raw = br.open_novisit(q).read() - except Exception, e: + except Exception as e: if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: return False diff --git a/src/calibre/ebooks/metadata/amazonfr.py b/src/calibre/ebooks/metadata/amazonfr.py index 156fff3d75..248c8d9ed0 100644 --- a/src/calibre/ebooks/metadata/amazonfr.py +++ b/src/calibre/ebooks/metadata/amazonfr.py @@ -33,7 +33,7 @@ def fetch(self): try: self.results = search(self.title, self.book_author, self.publisher, self.isbn, max_results=10, verbose=self.verbose, lang='fr') - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -50,7 +50,7 @@ def fetch(self): try: self.results = search(self.title, self.book_author, self.publisher, self.isbn, max_results=10, verbose=self.verbose, lang='es') - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -67,7 +67,7 @@ def fetch(self): try: self.results = search(self.title, self.book_author, self.publisher, self.isbn, max_results=10, verbose=self.verbose, lang='en') - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -84,7 +84,7 @@ def fetch(self): try: self.results = search(self.title, self.book_author, self.publisher, self.isbn, max_results=10, verbose=self.verbose, lang='de') - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -103,7 +103,7 @@ def fetch(self): try: self.results = search(self.title, self.book_author, self.publisher, self.isbn, max_results=10, verbose=self.verbose, lang='all') - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -193,7 +193,7 @@ def __call__(self, browser, verbose, timeout = 5.): try: raw = browser.open_novisit(self.urldata, timeout=timeout).read() - except Exception, e: + except Exception as e: report(verbose) if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: @@ -226,7 +226,7 @@ def __call__(self, browser, verbose, timeout = 5.): try: urldata = self.urldata + '&page=' + str(i) raw = browser.open_novisit(urldata, timeout=timeout).read() - except Exception, e: + except Exception as e: continue if '404 - ' in raw: continue @@ -413,7 +413,7 @@ def fill_MI(self, entry, title, authors, browser, verbose): def get_individual_metadata(self, browser, linkdata, verbose): try: raw = browser.open_novisit(linkdata).read() - except Exception, e: + except Exception as e: report(verbose) if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: @@ -445,7 +445,7 @@ def populate(self, entries, browser, verbose=False): # self.clean_entry(entry, invalid_id=inv_ids) title = self.get_title(entry) authors = self.get_authors(entry) - except Exception, e: + except Exception as e: if verbose: print 'Failed to get all details for an entry' print e diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 2bf23e4b82..91dcc29230 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -575,7 +575,10 @@ def format_field_extended(self, key, series_with_index=True): orig_res = res datatype = cmeta['datatype'] if datatype == 'text' and cmeta['is_multiple']: - res = u', '.join(sorted(res, key=sort_key)) + if cmeta['display'].get('is_names', False): + res = u' & '.join(res) + else: + res = u', '.join(sorted(res, key=sort_key)) elif datatype == 'series' and series_with_index: if self.get_extra(key) is not None: res = res + \ diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py index 6ea292aa93..aa2ae8387d 100644 --- a/src/calibre/ebooks/metadata/covers.py +++ b/src/calibre/ebooks/metadata/covers.py @@ -91,7 +91,7 @@ def has_cover(self, mi, ans, timeout=5.): br.open_novisit(HeadRequest(self.OPENLIBRARY%mi.isbn), timeout=timeout) self.debug('cover for', mi.isbn, 'found') ans.set() - except Exception, e: + except Exception as e: if callable(getattr(e, 'getcode', None)) and e.getcode() == 302: self.debug('cover for', mi.isbn, 'found') ans.set() @@ -106,7 +106,7 @@ def get_covers(self, mi, result_queue, abort, timeout=5.): try: ans = br.open(self.OPENLIBRARY%mi.isbn, timeout=timeout).read() result_queue.put((True, ans, 'jpg', self.name)) - except Exception, e: + except Exception as e: if callable(getattr(e, 'getcode', None)) and e.getcode() == 404: result_queue.put((False, _('ISBN: %s not found')%mi.isbn, '', self.name)) else: @@ -131,7 +131,7 @@ def has_cover(self, mi, ans, timeout=5.): get_cover_url(mi.isbn, br) self.debug('cover for', mi.isbn, 'found') ans.set() - except Exception, e: + except Exception as e: self.debug(e) def get_covers(self, mi, result_queue, abort, timeout=5.): @@ -145,7 +145,7 @@ def get_covers(self, mi, result_queue, abort, timeout=5.): raise ValueError('No cover found for ISBN: %s'%mi.isbn) cover_data = br.open_novisit(url).read() result_queue.put((True, cover_data, 'jpg', self.name)) - except Exception, e: + except Exception as e: result_queue.put((False, self.exception_to_string(e), traceback.format_exc(), self.name)) @@ -242,7 +242,7 @@ def get_cover_url(self, isbn, br, timeout=5.): try: url = self.DOUBAN_ISBN_URL + isbn + "?apikey=" + self.CALIBRE_DOUBAN_API_KEY src = br.open(url, timeout=timeout).read() - except Exception, err: + except Exception as err: if isinstance(getattr(err, 'args', [None])[0], socket.timeout): err = Exception(_('Douban.com API timed out. Try again later.')) raise err @@ -275,7 +275,7 @@ def has_cover(self, mi, ans, timeout=5.): if self.get_cover_url(mi.isbn, br, timeout=timeout) != None: self.debug('cover for', mi.isbn, 'found') ans.set() - except Exception, e: + except Exception as e: self.debug(e) def get_covers(self, mi, result_queue, abort, timeout=5.): @@ -286,7 +286,7 @@ def get_covers(self, mi, result_queue, abort, timeout=5.): url = self.get_cover_url(mi.isbn, br, timeout=timeout) cover_data = br.open_novisit(url).read() result_queue.put((True, cover_data, 'jpg', self.name)) - except Exception, e: + except Exception as e: result_queue.put((False, self.exception_to_string(e), traceback.format_exc(), self.name)) # }}} diff --git a/src/calibre/ebooks/metadata/douban.py b/src/calibre/ebooks/metadata/douban.py index c6a34b6162..98a51f69d1 100644 --- a/src/calibre/ebooks/metadata/douban.py +++ b/src/calibre/ebooks/metadata/douban.py @@ -49,7 +49,7 @@ def fetch(self): self.results = search(self.title, self.book_author, self.publisher, self.isbn, max_results=10, verbose=self.verbose) - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -192,7 +192,7 @@ def populate(self, entries, browser, verbose=False, api_key=''): raw = browser.open(id_url).read() feed = etree.fromstring(raw) x = entry(feed)[0] - except Exception, e: + except Exception as e: if verbose: print 'Failed to get all details for an entry' print e @@ -212,7 +212,7 @@ def search(title=None, author=None, publisher=None, isbn=None, api_key = CALIBRE_DOUBAN_API_KEY while start > 0 and len(entries) <= max_results: - new, start = Query(title=title, author=author, publisher=publisher, + new, start = Query(title=title, author=author, publisher=publisher, isbn=isbn, max_results=max_results, start_index=start, api_key=api_key)(br, verbose) if not new: break diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index f19b89eb88..27fa94e217 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -5,7 +5,7 @@ '''Read meta information from epub files''' -import os, re, posixpath, shutil +import os, re, posixpath from cStringIO import StringIO from contextlib import closing @@ -192,6 +192,13 @@ def get_metadata(stream, extract_cover=True): def get_quick_metadata(stream): return get_metadata(stream, False) +def _write_new_cover(new_cdata, cpath): + from calibre.utils.magick.draw import save_cover_data_to + new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1]) + new_cover.close() + save_cover_data_to(new_cdata, new_cover.name) + return new_cover + def set_metadata(stream, mi, apply_null=False, update_timestamp=False): stream.seek(0) reader = OCFZipReader(stream, root=os.getcwdu()) @@ -208,6 +215,7 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False): new_cdata = open(mi.cover, 'rb').read() except: pass + new_cover = cpath = None if new_cdata and raster_cover: try: cpath = posixpath.join(posixpath.dirname(reader.opf_path), @@ -215,19 +223,7 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False): cover_replacable = not reader.encryption_meta.is_encrypted(cpath) and \ os.path.splitext(cpath)[1].lower() in ('.png', '.jpg', '.jpeg') if cover_replacable: - from calibre.utils.magick.draw import save_cover_data_to, \ - identify - new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1]) - resize_to = None - if False: # Resize new cover to same size as old cover - shutil.copyfileobj(reader.open(cpath), new_cover) - new_cover.close() - width, height, fmt = identify(new_cover.name) - resize_to = (width, height) - else: - new_cover.close() - save_cover_data_to(new_cdata, new_cover.name, - resize_to=resize_to) + new_cover = _write_new_cover(new_cdata, cpath) replacements[cpath] = open(new_cover.name, 'rb') except: import traceback @@ -249,4 +245,11 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False): newopf = StringIO(reader.opf.render()) safe_replace(stream, reader.container[OPF.MIMETYPE], newopf, extra_replacements=replacements) + try: + if cpath is not None: + replacements[cpath].close() + os.remove(replacements[cpath].name) + except: + pass + diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py index 0401ee78c5..fb01c5dd71 100644 --- a/src/calibre/ebooks/metadata/fetch.py +++ b/src/calibre/ebooks/metadata/fetch.py @@ -93,7 +93,7 @@ def _fetch(self): traceback.print_exc() mi.comments = None - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -186,7 +186,7 @@ def fetch(self): self.results = search(self.title, self.book_author, self.publisher, self.isbn, max_results=10, verbose=self.verbose) - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -217,7 +217,7 @@ def fetch(self): try: opts, args = option_parser().parse_args(args) self.results = create_books(opts, args) - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -244,7 +244,7 @@ def fetch(self): try: self.results = get_social_metadata(self.title, self.book_author, self.publisher, self.isbn) - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -285,7 +285,7 @@ def fetch(self): from calibre.ebooks.metadata.kdl import get_series try: self.results = get_series(self.title, self.book_author) - except Exception, e: + except Exception as e: import traceback traceback.print_exc() self.exception = e diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py index b780f2b39d..145e39768d 100644 --- a/src/calibre/ebooks/metadata/fictionwise.py +++ b/src/calibre/ebooks/metadata/fictionwise.py @@ -30,7 +30,7 @@ def fetch(self): try: self.results = search(self.title, self.book_author, self.publisher, self.isbn, max_results=10, verbose=self.verbose) - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -91,7 +91,7 @@ def __call__(self, browser, verbose, timeout = 5.): try: raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read() - except Exception, e: + except Exception as e: report(verbose) if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: @@ -276,7 +276,7 @@ def fill_MI(self, entry, title, authors, ratings, verbose): def get_individual_metadata(self, browser, linkdata, verbose): try: raw = browser.open_novisit(self.BASE_URL + linkdata).read() - except Exception, e: + except Exception as e: report(verbose) if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: @@ -311,7 +311,7 @@ def populate(self, entries, browser, verbose=False): #maybe strenghten the search ratings = self.get_rating(entry.xpath("./p/table")[1], verbose) authors = self.get_authors(entry) - except Exception, e: + except Exception as e: if verbose: print _('Failed to get all details for an entry') print e @@ -328,7 +328,7 @@ def populate(self, entries, browser, verbose=False): #maybe strenghten the search ratings = self.get_rating(entry.xpath("./p/table")[1], verbose) authors = self.get_authors(entry) - except Exception, e: + except Exception as e: if verbose: print _('Failed to get all details for an entry') print e diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py index 2087b7c489..5a5e09234e 100644 --- a/src/calibre/ebooks/metadata/google_books.py +++ b/src/calibre/ebooks/metadata/google_books.py @@ -176,7 +176,7 @@ def populate(self, entries, browser, verbose=False): raw = browser.open(id_url).read() feed = etree.fromstring(raw) x = entry(feed)[0] - except Exception, e: + except Exception as e: if verbose: print 'Failed to get all details for an entry' print e diff --git a/src/calibre/ebooks/metadata/imp.py b/src/calibre/ebooks/metadata/imp.py index e2a2b61f31..28bc2bc00f 100644 --- a/src/calibre/ebooks/metadata/imp.py +++ b/src/calibre/ebooks/metadata/imp.py @@ -38,7 +38,7 @@ def cString(skip=0): mi.author = author if category: mi.category = category - except Exception, err: + except Exception as err: msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err)) print >>sys.stderr, msg.encode('utf8') return mi diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py index 1c5f706593..54cd403c62 100644 --- a/src/calibre/ebooks/metadata/isbndb.py +++ b/src/calibre/ebooks/metadata/isbndb.py @@ -25,7 +25,7 @@ def fetch_metadata(url, max=3, timeout=5.): while len(books) < total_results and max > 0: try: raw = br.open(url, timeout=timeout).read() - except Exception, err: + except Exception as err: raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err)) soup = BeautifulStoneSoup(raw, convertEntities=BeautifulStoneSoup.XML_ENTITIES) diff --git a/src/calibre/ebooks/metadata/kdl.py b/src/calibre/ebooks/metadata/kdl.py index b0b961b603..aa2f0d7246 100644 --- a/src/calibre/ebooks/metadata/kdl.py +++ b/src/calibre/ebooks/metadata/kdl.py @@ -43,7 +43,7 @@ def get_series(title, authors, timeout=60): br = browser() try: raw = br.open_novisit(url, timeout=timeout).read() - except URLError, e: + except URLError as e: if isinstance(e.reason, socket.timeout): raise Exception('KDL Server busy, try again later') raise diff --git a/src/calibre/ebooks/metadata/library_thing.py b/src/calibre/ebooks/metadata/library_thing.py index a0f28a3c21..be0cd5f324 100644 --- a/src/calibre/ebooks/metadata/library_thing.py +++ b/src/calibre/ebooks/metadata/library_thing.py @@ -45,7 +45,7 @@ def check_for_cover(isbn, timeout=5.): try: br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout) return True - except Exception, e: + except Exception as e: if callable(getattr(e, 'getcode', None)) and e.getcode() == 302: return True return False diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py index 8914e2d985..2afa6c018a 100644 --- a/src/calibre/ebooks/metadata/nicebooks.py +++ b/src/calibre/ebooks/metadata/nicebooks.py @@ -32,7 +32,7 @@ def fetch(self): try: self.results = search(self.title, self.book_author, self.publisher, self.isbn, max_results=10, verbose=self.verbose) - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() @@ -54,7 +54,7 @@ def has_cover(self, mi, ans, timeout=5.): if Covers(mi.isbn)(entry).check_cover(): self.debug('cover for', mi.isbn, 'found') ans.set() - except Exception, e: + except Exception as e: self.debug(e) def get_covers(self, mi, result_queue, abort, timeout=5.): @@ -67,7 +67,7 @@ def get_covers(self, mi, result_queue, abort, timeout=5.): if not ext: ext = 'jpg' result_queue.put((True, cover_data, ext, self.name)) - except Exception, e: + except Exception as e: result_queue.put((False, self.exception_to_string(e), traceback.format_exc(), self.name)) @@ -109,7 +109,7 @@ def __call__(self, browser, verbose, timeout = 5.): try: raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read() - except Exception, e: + except Exception as e: report(verbose) if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: @@ -144,7 +144,7 @@ def __call__(self, browser, verbose, timeout = 5.): try: urldata = self.urldata + '&p=' + str(i) raw = browser.open_novisit(self.BASE_URL+urldata, timeout=timeout).read() - except Exception, e: + except Exception as e: continue if '<title>404 - ' in raw: continue @@ -233,7 +233,7 @@ def fill_MI(self, entry, title, authors, verbose): def get_individual_metadata(self, browser, linkdata, verbose): try: raw = browser.open_novisit(self.BASE_URL + linkdata).read() - except Exception, e: + except Exception as e: report(verbose) if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: @@ -266,7 +266,7 @@ def populate(self, entries, browser, verbose=False): entry = entry.find("div[@id='book-info']") title = self.get_title(entry) authors = self.get_authors(entry) - except Exception, e: + except Exception as e: if verbose: print 'Failed to get all details for an entry' print e @@ -280,7 +280,7 @@ def populate(self, entries, browser, verbose=False): entry = entry.find("div[@id='book-info']") title = self.get_title(entry) authors = self.get_authors(entry) - except Exception, e: + except Exception as e: if verbose: print 'Failed to get all details for an entry' print e @@ -315,7 +315,7 @@ def get_cover(self, browser, timeout = 5.): cover, ext = browser.open_novisit(self.urlimg, timeout=timeout).read(), \ self.urlimg.rpartition('.')[-1] return cover, ext if ext else 'jpg' - except Exception, err: + except Exception as err: if isinstance(getattr(err, 'args', [None])[0], socket.timeout): raise NiceBooksError(_('Nicebooks timed out. Try again later.')) if not len(self.urlimg): diff --git a/src/calibre/ebooks/metadata/rb.py b/src/calibre/ebooks/metadata/rb.py index 1f13ce1d9d..c8ab657146 100644 --- a/src/calibre/ebooks/metadata/rb.py +++ b/src/calibre/ebooks/metadata/rb.py @@ -43,7 +43,7 @@ def get_metadata(stream): elif key.strip() == 'AUTHOR': mi.author = value mi.authors = string_to_authors(value) - except Exception, err: + except Exception as err: msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err)) print >>sys.stderr, msg.encode('utf8') raise diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 9460ed7ace..c9c7350a74 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -41,12 +41,12 @@ def run(self): try: self.get_details() except: - self.log.error('get_details failed for url: %r'%self.url) + self.log.exception('get_details failed for url: %r'%self.url) def get_details(self): try: raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip() - except Exception, e: + except Exception as e: if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: self.log.error('URL malformed: %r'%self.url) @@ -168,7 +168,7 @@ def parse_details(self, root): if self.isbn: self.plugin.cache_isbn_to_identifier(self.isbn, self.amazon_id) if self.cover_url: - self.cache_identifier_to_cover_url(self.amazon_id, + self.plugin.cache_identifier_to_cover_url(self.amazon_id, self.cover_url) self.result_queue.put(mi) @@ -359,7 +359,7 @@ def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ br = self.browser try: raw = br.open_novisit(query, timeout=timeout).read().strip() - except Exception, e: + except Exception as e: if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: log.error('Query malformed: %r'%query) diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 6fc52eb88b..0d820c2bae 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -21,6 +21,7 @@ def create_log(ostream=None): log.outputs = [FileStream(ostream)] return log +# Comparing Metadata objects for relevance {{{ words = ("the", "a", "an", "of", "and") prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words))) trailing_paren_pat = re.compile(r'\(.*\)$') @@ -35,6 +36,55 @@ def cleanup_title(s): s = whitespace_pat.sub(' ', s) return s.strip() +class InternalMetadataCompareKeyGen(object): + + ''' + Generate a sort key for comparison of the relevance of Metadata objects, + given a search query. + + The sort key ensures that an ascending order sort is a sort by order of + decreasing relevance. + + The algorithm is: + + 1. Prefer results that have the same ISBN as specified in the query + 2. Prefer results with all available fields filled in + 3. Prefer results that are an exact title match to the query + 4. Prefer results with longer comments (greater than 10 % longer) + 5. Prefer results with a cached cover URL + 6. Use the relevance of the result as reported by the metadata source's search + engine + ''' + + def __init__(self, mi, source_plugin, title, authors, identifiers): + isbn = 1 if mi.isbn and mi.isbn == identifiers.get('isbn', None) else 2 + + all_fields = 1 if source_plugin.test_fields(mi) is None else 2 + + exact_title = 1 if title and \ + cleanup_title(title) == cleanup_title(mi.title) else 2 + + has_cover = 2 if source_plugin.get_cached_cover_url(mi.identifiers)\ + is None else 1 + + self.base = (isbn, all_fields, exact_title) + self.comments_len = len(mi.comments.strip() if mi.comments else '') + self.extra = (has_cover, getattr(mi, 'source_relevance', 0)) + + def __cmp__(self, other): + result = cmp(self.base, other.base) + if result == 0: + # Now prefer results with the longer comments, within 10% + cx, cy = self.comments_len, other.comments_len + t = (cx + cy) / 20 + delta = cy - cx + if abs(delta) > t: + result = delta + else: + result = cmp(self.extra, other.extra) + return result + +# }}} class Source(Plugin): @@ -70,7 +120,7 @@ def prefs(self): def browser(self): if self._browser is None: self._browser = browser(user_agent=random_user_agent()) - return self._browser + return self._browser.clone_browser() # }}} @@ -172,69 +222,30 @@ def test_fields(self, mi): def get_cached_cover_url(self, identifiers): ''' Return cached cover URL for the book identified by - the identifiers dict or Noneif no such URL exists + the identifiers dict or None if no such URL exists. + + Note that this method must only return validated URLs, i.e. not URLS + that could result in a generic cover image or a not found error. ''' return None - def compare_identify_results(self, x, y, title=None, authors=None, + def identify_results_keygen(self, title=None, authors=None, identifiers={}): ''' - Method used to sort the results from a call to identify by relevance. - Uses the actual query and various heuristics to rank results. - Re-implement in your plugin if this generic algorithm is not suitable. - Note that this method assumes x and y have a source_relevance - attribute. + Return a function that is used to generate a key that can sort Metadata + objects by their relevance given a search query (title, authors, + identifiers). - one < two iff one is more relevant than two + These keys are used to sort the results of a call to :meth:`identify`. + + For details on the default algorithm see + :class:`InternalMetadataCompareKeyGen`. Re-implement this function in + your plugin if the default algorithm is not suitable. ''' - # First, guarantee that if the query specifies an ISBN, the result with - # the same isbn is the most relevant - def isbn_test(mi): - return mi.isbn and mi.isbn == identifiers.get('isbn', None) - - def boolcmp(a, b): - return -1 if a and not b else 1 if not a and b else 0 - - x_has_isbn, y_has_isbn = isbn_test(x), isbn_test(y) - result = boolcmp(x_has_isbn, y_has_isbn) - if result != 0: - return result - - # Now prefer results that have complete metadata over those that don't - x_has_all_fields = self.test_fields(x) is None - y_has_all_fields = self.test_fields(y) is None - - result = boolcmp(x_has_all_fields, y_has_all_fields) - if result != 0: - return result - - # Now prefer results whose title matches the search query - if title: - x_title = cleanup_title(x.title) - y_title = cleanup_title(y.title) - t = cleanup_title(title) - x_has_title, y_has_title = x_title == t, y_title == t - result = boolcmp(x_has_title, y_has_title) - if result != 0: - return result - - # Now prefer results with the longer comments, within 10% - cx = len(x.comments.strip() if x.comments else '') - cy = len(y.comments.strip() if y.comments else '') - t = (cx + cy) / 20 - result = cy - cx - if result != 0 and abs(cx - cy) > t: - return result - - # Now prefer results with cached cover URLs - x_has_cover = self.get_cached_cover_url(x.identifiers) is not None - y_has_cover = self.get_cached_cover_url(y.identifiers) is not None - result = boolcmp(x_has_cover, y_has_cover) - if result != 0: - return result - - # Now use the relevance reported by the remote search engine - return x.source_relevance - y.source_relevance + def keygen(mi): + return InternalMetadataCompareKeyGen(mi, self, title, authors, + identifiers) + return keygen def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=5): diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index b7298c0099..06362cf8b8 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -213,7 +213,7 @@ def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ br = self.browser try: raw = br.open_novisit(query, timeout=timeout).read() - except Exception, e: + except Exception as e: log.exception('Failed to make identify query: %r'%query) return as_unicode(e) @@ -222,7 +222,7 @@ def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0], parser=parser) entries = entry(feed) - except Exception, e: + except Exception as e: log.exception('Failed to parse identify results') return as_unicode(e) diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py index 032041ef29..3419a91d31 100644 --- a/src/calibre/ebooks/metadata/sources/test.py +++ b/src/calibre/ebooks/metadata/sources/test.py @@ -11,7 +11,6 @@ from Queue import Queue, Empty from threading import Event - from calibre.customize.ui import metadata_plugins from calibre import prints from calibre.ebooks.metadata import check_isbn @@ -90,11 +89,17 @@ def test_identify_plugin(name, tests): except Empty: break - prints('Found', len(results), 'matches:') + prints('Found', len(results), 'matches:', end=' ') + prints('Smaller relevance means better match') - for mi in results: + results.sort(key=plugin.identify_results_keygen( + title=kwargs.get('title', None), authors=kwargs.get('authors', + None), identifiers=kwargs.get('identifiers', {}))) + + for i, mi in enumerate(results): + prints('*'*30, 'Relevance:', i, '*'*30) prints(mi) - prints('\n\n') + prints('*'*75, '\n\n') possibles = [] for mi in results: @@ -117,6 +122,9 @@ def test_identify_plugin(name, tests): prints('Failed to find', plugin.test_fields(possibles[0])) raise SystemExit(1) + if results[0] is not possibles[0]: + prints('Most relevant result failed the tests') + prints('Average time per query', sum(times)/len(times)) diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py index 10d45186de..17f99150be 100644 --- a/src/calibre/ebooks/metadata/toc.py +++ b/src/calibre/ebooks/metadata/toc.py @@ -147,7 +147,7 @@ def read_from_opf(self, opfreader): if path and os.access(path, os.R_OK): try: self.read_ncx_toc(path) - except Exception, err: + except Exception as err: print 'WARNING: Invalid NCX file:', err return cwd = os.path.abspath(self.base_path) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index f1b1b1ef63..8877ecdd0b 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -769,7 +769,8 @@ def text_section(self, index): def extract_text(self): self.log.debug('Extracting text...') - text_sections = [self.text_section(i) for i in range(1, self.book_header.records + 1)] + text_sections = [self.text_section(i) for i in range(1, + min(self.book_header.records + 1, len(self.sections)))] processed_records = list(range(0, self.book_header.records + 1)) self.mobi_html = '' diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 3bd936b803..e5f2cace7f 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -884,13 +884,13 @@ def _parse_xhtml(self, data): def first_pass(data): try: data = etree.fromstring(data, parser=parser) - except etree.XMLSyntaxError, err: + except etree.XMLSyntaxError as err: self.oeb.log.exception('Initial parse failed:') repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0)) data = ENTITY_RE.sub(repl, data) try: data = etree.fromstring(data, parser=parser) - except etree.XMLSyntaxError, err: + except etree.XMLSyntaxError as err: self.oeb.logger.warn('Parsing file %r as HTML' % self.href) if err.args and err.args[0].startswith('Excessive depth'): from lxml.html import soupparser diff --git a/src/calibre/ebooks/pdf/manipulate/decrypt.py b/src/calibre/ebooks/pdf/manipulate/decrypt.py index ede12f15ee..fd8510efc7 100644 --- a/src/calibre/ebooks/pdf/manipulate/decrypt.py +++ b/src/calibre/ebooks/pdf/manipulate/decrypt.py @@ -103,7 +103,7 @@ def main(args=sys.argv, name=''): try: decrypt(args[0], opts.output, args[1]) - except DecryptionError, e: + except DecryptionError as e: print e.value return 1 diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py index 564ba14a32..4ac1d0e368 100644 --- a/src/calibre/ebooks/pdf/pdftohtml.py +++ b/src/calibre/ebooks/pdf/pdftohtml.py @@ -50,7 +50,7 @@ def pdftohtml(output_dir, pdf_path, no_images): try: p = popen(cmd, stderr=logf._fd, stdout=logf._fd, stdin=subprocess.PIPE) - except OSError, err: + except OSError as err: if err.errno == 2: raise ConversionError(_('Could not find pdftohtml, check it is in your PATH')) else: @@ -60,7 +60,7 @@ def pdftohtml(output_dir, pdf_path, no_images): try: ret = p.wait() break - except OSError, e: + except OSError as e: if e.errno == errno.EINTR: continue else: diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index 1594b2fbce..23c16f473d 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -268,7 +268,7 @@ def convert(self, stream, options, file_ext, log, self.log('Converting RTF to XML...') try: xml = self.generate_xml(stream.name) - except RtfInvalidCodeException, e: + except RtfInvalidCodeException as e: raise ValueError(_('This RTF file has a feature calibre does not ' 'support. Convert it to HTML first and then try it.\n%s')%e) diff --git a/src/calibre/ebooks/snb/snbfile.py b/src/calibre/ebooks/snb/snbfile.py index 9a7d65e417..1a0986baf4 100644 --- a/src/calibre/ebooks/snb/snbfile.py +++ b/src/calibre/ebooks/snb/snbfile.py @@ -85,7 +85,7 @@ def Parse(self, snbFile, metaOnly = False): uncompressedData += bzdc.decompress(data) else: uncompressedData += data - except Exception, e: + except Exception as e: print e if len(uncompressedData) != self.plainStreamSizeUncompressed: raise Exception() diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py index c3c82ef893..b01a7bcdb7 100755 --- a/src/calibre/ebooks/textile/functions.py +++ b/src/calibre/ebooks/textile/functions.py @@ -1,4 +1,6 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- + """ PyTextile @@ -206,6 +208,12 @@ class Textile(object): (re.compile(r'{clubs?}'), r'♣'), # club (re.compile(r'{hearts?}'), r'♥'), # heart (re.compile(r'{diam(onds?|s)}'), r'♦'), # diamond + (re.compile(r'{"}'), r'"'), # double-quote + (re.compile(r"{'}"), r'''), # single-quote + (re.compile(r"{(’|'/|/')}"), r'’'), # closing-single-quote - apostrophe + (re.compile(r"{(‘|\\'|'\\)}"), r'‘'), # opening-single-quote + (re.compile(r'{(”|"/|/")}'), r'”'), # closing-double-quote + (re.compile(r'{(“|\\"|"\\)}'), r'“'), # opening-double-quote ] glyph_defaults = [ (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 7face4c24f..cad55b8c3f 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -165,7 +165,7 @@ def convert(self, stream, options, file_ext, log, elif options.formatting_type == 'textile': log.debug('Running text through textile conversion...') html = convert_textile(txt) - setattr(options, 'smarten_punctuation', True) + #setattr(options, 'smarten_punctuation', True) else: log.debug('Running text through basic conversion...') flow_size = getattr(options, 'flow_size', 0) diff --git a/src/calibre/gui2/actions/copy_to_library.py b/src/calibre/gui2/actions/copy_to_library.py index 0668baeac6..2e4d0380be 100644 --- a/src/calibre/gui2/actions/copy_to_library.py +++ b/src/calibre/gui2/actions/copy_to_library.py @@ -32,7 +32,7 @@ def __init__(self, ids, db, loc, progress, done, delete_after): def run(self): try: self.doit() - except Exception, err: + except Exception as err: import traceback try: err = unicode(err) diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index f40cf0ff75..44b5bb446b 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -78,7 +78,7 @@ def run(self): if isinstance(root, unicode): root = root.encode(filesystem_encoding) self.walk(root) - except Exception, err: + except Exception as err: import traceback traceback.print_exc() try: diff --git a/src/calibre/gui2/convert/__init__.py b/src/calibre/gui2/convert/__init__.py index 925fecd693..bdcf9ede05 100644 --- a/src/calibre/gui2/convert/__init__.py +++ b/src/calibre/gui2/convert/__init__.py @@ -6,7 +6,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import textwrap, codecs +import textwrap, codecs, importlib from functools import partial from PyQt4.Qt import QWidget, QSpinBox, QDoubleSpinBox, QLineEdit, QTextEdit, \ @@ -22,8 +22,8 @@ def config_widget_for_input_plugin(plugin): name = plugin.name.lower().replace(' ', '_') try: - return __import__('calibre.gui2.convert.'+name, - fromlist=[1]).PluginWidget + return importlib.import_module( + 'calibre.gui2.convert.'+name).PluginWidget except ImportError: pass diff --git a/src/calibre/gui2/convert/bulk.py b/src/calibre/gui2/convert/bulk.py index 349f39ac76..576b3ca3e7 100644 --- a/src/calibre/gui2/convert/bulk.py +++ b/src/calibre/gui2/convert/bulk.py @@ -4,7 +4,7 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>' __docformat__ = 'restructuredtext en' -import shutil +import shutil, importlib from PyQt4.Qt import QString, SIGNAL @@ -82,8 +82,8 @@ def widget_factory(cls): output_widget = None name = self.plumber.output_plugin.name.lower().replace(' ', '_') try: - output_widget = __import__('calibre.gui2.convert.'+name, - fromlist=[1]) + output_widget = importlib.import_module( + 'calibre.gui2.convert.'+name) pw = output_widget.PluginWidget pw.ICON = I('back.png') pw.HELP = _('Options specific to the output format.') diff --git a/src/calibre/gui2/convert/metadata.py b/src/calibre/gui2/convert/metadata.py index 95dd7623c9..80311502e8 100644 --- a/src/calibre/gui2/convert/metadata.py +++ b/src/calibre/gui2/convert/metadata.py @@ -192,7 +192,7 @@ def select_cover(self): try: cf = open(_file, "rb") cover = cf.read() - except IOError, e: + except IOError as e: d = error_dialog(self.parent(), _('Error reading file'), _("<p>There was an error reading from file: <br /><b>") + _file + "</b></p><br />"+str(e)) d.exec_() diff --git a/src/calibre/gui2/convert/search_and_replace.py b/src/calibre/gui2/convert/search_and_replace.py index c2241ff8eb..407e7922e7 100644 --- a/src/calibre/gui2/convert/search_and_replace.py +++ b/src/calibre/gui2/convert/search_and_replace.py @@ -69,7 +69,7 @@ def pre_commit_check(self): try: pat = unicode(x.regex) re.compile(pat) - except Exception, err: + except Exception as err: error_dialog(self, _('Invalid regular expression'), _('Invalid regular expression: %s')%err, show=True) return False diff --git a/src/calibre/gui2/convert/single.py b/src/calibre/gui2/convert/single.py index 59fcbb65ad..3575fb5ffb 100644 --- a/src/calibre/gui2/convert/single.py +++ b/src/calibre/gui2/convert/single.py @@ -6,7 +6,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import sys, cPickle, shutil +import sys, cPickle, shutil, importlib from PyQt4.Qt import QString, SIGNAL, QAbstractListModel, Qt, QVariant, QFont @@ -182,8 +182,8 @@ def widget_factory(cls): output_widget = None name = self.plumber.output_plugin.name.lower().replace(' ', '_') try: - output_widget = __import__('calibre.gui2.convert.'+name, - fromlist=[1]) + output_widget = importlib.import_module( + 'calibre.gui2.convert.'+name) pw = output_widget.PluginWidget pw.ICON = I('back.png') pw.HELP = _('Options specific to the output format.') @@ -193,8 +193,8 @@ def widget_factory(cls): input_widget = None name = self.plumber.input_plugin.name.lower().replace(' ', '_') try: - input_widget = __import__('calibre.gui2.convert.'+name, - fromlist=[1]) + input_widget = importlib.import_module( + 'calibre.gui2.convert.'+name) pw = input_widget.PluginWidget pw.ICON = I('forward.png') pw.HELP = _('Options specific to the input format.') diff --git a/src/calibre/gui2/custom_column_widgets.py b/src/calibre/gui2/custom_column_widgets.py index beaca77a38..10602fb28c 100644 --- a/src/calibre/gui2/custom_column_widgets.py +++ b/src/calibre/gui2/custom_column_widgets.py @@ -226,10 +226,18 @@ def getter(self): class Text(Base): def setup_ui(self, parent): + if self.col_metadata['display'].get('is_names', False): + self.sep = u' & ' + else: + self.sep = u', ' values = self.all_values = list(self.db.all_custom(num=self.col_id)) values.sort(key=sort_key) if self.col_metadata['is_multiple']: w = MultiCompleteLineEdit(parent) + w.set_separator(self.sep.strip()) + if self.sep == u' & ': + w.set_space_before_sep(True) + w.set_add_separator(tweaks['authors_completer_append_separator']) w.update_items_cache(values) w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred) else: @@ -261,12 +269,12 @@ def setter(self, val): if self.col_metadata['is_multiple']: if not val: val = [] - self.widgets[1].setText(u', '.join(val)) + self.widgets[1].setText(self.sep.join(val)) def getter(self): if self.col_metadata['is_multiple']: val = unicode(self.widgets[1].text()).strip() - ans = [x.strip() for x in val.split(',') if x.strip()] + ans = [x.strip() for x in val.split(self.sep.strip()) if x.strip()] if not ans: ans = None return ans @@ -847,13 +855,20 @@ def setup_ui(self, parent): self.main_widget.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred) self.adding_widget = self.main_widget - w = RemoveTags(parent, values) - self.widgets.append(QLabel('&'+self.col_metadata['name']+': ' + - _('tags to remove'), parent)) - self.widgets.append(w) - self.removing_widget = w - w.tags_box.textChanged.connect(self.a_c_checkbox_changed) - w.checkbox.stateChanged.connect(self.a_c_checkbox_changed) + if not self.col_metadata['display'].get('is_names', False): + w = RemoveTags(parent, values) + self.widgets.append(QLabel('&'+self.col_metadata['name']+': ' + + _('tags to remove'), parent)) + self.widgets.append(w) + self.removing_widget = w + self.main_widget.set_separator(',') + w.tags_box.textChanged.connect(self.a_c_checkbox_changed) + w.checkbox.stateChanged.connect(self.a_c_checkbox_changed) + else: + self.main_widget.set_separator('&') + self.main_widget.set_space_before_sep(True) + self.main_widget.set_add_separator( + tweaks['authors_completer_append_separator']) else: self.make_widgets(parent, MultiCompleteComboBox) self.main_widget.set_separator(None) @@ -882,21 +897,26 @@ def commit(self, book_ids, notify=False): if not self.a_c_checkbox.isChecked(): return if self.col_metadata['is_multiple']: - remove_all, adding, rtext = self.gui_val - remove = set() - if remove_all: - remove = set(self.db.all_custom(num=self.col_id)) + if self.col_metadata['display'].get('is_names', False): + val = self.gui_val + add = [v.strip() for v in val.split('&') if v.strip()] + self.db.set_custom_bulk(book_ids, add, num=self.col_id) else: - txt = rtext + remove_all, adding, rtext = self.gui_val + remove = set() + if remove_all: + remove = set(self.db.all_custom(num=self.col_id)) + else: + txt = rtext + if txt: + remove = set([v.strip() for v in txt.split(',')]) + txt = adding if txt: - remove = set([v.strip() for v in txt.split(',')]) - txt = adding - if txt: - add = set([v.strip() for v in txt.split(',')]) - else: - add = set() - self.db.set_custom_bulk_multiple(book_ids, add=add, remove=remove, - num=self.col_id) + add = set([v.strip() for v in txt.split(',')]) + else: + add = set() + self.db.set_custom_bulk_multiple(book_ids, add=add, + remove=remove, num=self.col_id) else: val = self.gui_val val = self.normalize_ui_val(val) @@ -905,10 +925,11 @@ def commit(self, book_ids, notify=False): def getter(self): if self.col_metadata['is_multiple']: - return self.removing_widget.checkbox.isChecked(), \ - unicode(self.adding_widget.text()), \ - unicode(self.removing_widget.tags_box.text()) - + if not self.col_metadata['display'].get('is_names', False): + return self.removing_widget.checkbox.isChecked(), \ + unicode(self.adding_widget.text()), \ + unicode(self.removing_widget.tags_box.text()) + return unicode(self.adding_widget.text()) val = unicode(self.main_widget.currentText()).strip() if not val: val = None diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 215e67c46f..ab2177cef1 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -64,7 +64,7 @@ def run(self): self.result = self.func(*self.args, **self.kwargs) if self._aborted: return - except (Exception, SystemExit), err: + except (Exception, SystemExit) as err: if self._aborted: return self.failed = True @@ -162,7 +162,7 @@ def do_connect(self, connected_devices, device_kind): dev.reset(detected_device=detected_device, report_progress=self.report_progress) dev.open(self.current_library_uuid) - except OpenFeedback, e: + except OpenFeedback as e: if dev not in self.ejected_devices: self.open_feedback_msg(dev.get_gui_name(), e.feedback_msg) self.ejected_devices.add(dev) diff --git a/src/calibre/gui2/device_drivers/configwidget.py b/src/calibre/gui2/device_drivers/configwidget.py index 97c492b550..fc7e16e639 100644 --- a/src/calibre/gui2/device_drivers/configwidget.py +++ b/src/calibre/gui2/device_drivers/configwidget.py @@ -133,7 +133,7 @@ def validate(self): try: validation_formatter.validate(tmpl) return True - except Exception, err: + except Exception as err: error_dialog(self, _('Invalid template'), '<p>'+_('The template %s is invalid:')%tmpl + \ '<br>'+unicode(err), show=True) diff --git a/src/calibre/gui2/dialogs/catalog.py b/src/calibre/gui2/dialogs/catalog.py index ebca7235eb..a8f7ed160f 100644 --- a/src/calibre/gui2/dialogs/catalog.py +++ b/src/calibre/gui2/dialogs/catalog.py @@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import os, sys +import os, sys, importlib from calibre.customize.ui import config from calibre.gui2.dialogs.catalog_ui import Ui_Dialog @@ -43,8 +43,7 @@ def __init__(self, parent, dbspec, ids, db): name = plugin.name.lower().replace(' ', '_') if type(plugin) in builtin_plugins: try: - catalog_widget = __import__('calibre.gui2.catalog.'+name, - fromlist=[1]) + catalog_widget = importlib.import_module('calibre.gui2.catalog.'+name) pw = catalog_widget.PluginWidget() pw.initialize(name, db) pw.ICON = I('forward.png') @@ -75,7 +74,7 @@ def __init__(self, parent, dbspec, ids, db): # Import the dynamic PluginWidget() from .py file provided in plugin.zip try: sys.path.insert(0, plugin.resources_path) - catalog_widget = __import__(name, fromlist=[1]) + catalog_widget = importlib.import_module(name) pw = catalog_widget.PluginWidget() pw.initialize(name) pw.ICON = I('forward.png') diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py index 560090d2b3..95f99d4034 100644 --- a/src/calibre/gui2/dialogs/check_library.py +++ b/src/calibre/gui2/dialogs/check_library.py @@ -68,7 +68,7 @@ def do_one_dump(self): self.start_load() return QTimer.singleShot(0, self.do_one_dump) - except Exception, e: + except Exception as e: import traceback self.error = (as_unicode(e), traceback.format_exc()) self.reject() @@ -90,7 +90,7 @@ def start_load(self): self.conn.commit() QTimer.singleShot(0, self.do_one_load) - except Exception, e: + except Exception as e: import traceback self.error = (as_unicode(e), traceback.format_exc()) self.reject() @@ -111,7 +111,7 @@ def do_one_load(self): self.pb.setValue(self.pb.value() + 1) self.count -= 1 QTimer.singleShot(0, self.do_one_load) - except Exception, e: + except Exception as e: import traceback self.error = (as_unicode(e), traceback.format_exc()) self.reject() diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index 9b25545252..0683f2cb91 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -120,7 +120,7 @@ def do_one_safe(self): self.msg.setText(self.msg_text.format(self.phases[self.current_phase], percent)) self.do_one(id) - except Exception, err: + except Exception as err: import traceback try: err = unicode(err) @@ -653,7 +653,10 @@ def s_r_do_destination(self, mi, val): if self.destination_field_fm['is_multiple']: if self.comma_separated.isChecked(): - if dest == 'authors': + if dest == 'authors' or \ + (self.destination_field_fm['is_custom'] and + self.destination_field_fm['datatype'] == 'text' and + self.destination_field_fm['display'].get('is_names', False)): splitter = ' & ' else: splitter = ',' diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index 9efe7f7160..f6b7b94453 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -76,7 +76,7 @@ def run(self): self.cover_data, self.errors = download_cover(mi, timeout=self.timeout) - except Exception, e: + except Exception as e: self.exception = e self.traceback = traceback.format_exc() print self.traceback @@ -183,7 +183,7 @@ def select_cover(self, checked): try: cf = open(_file, "rb") cover = cf.read() - except IOError, e: + except IOError as e: d = error_dialog(self, _('Error reading file'), _("<p>There was an error reading from file: <br /><b>") + _file + "</b></p><br />"+str(e)) d.exec_() diff --git a/src/calibre/gui2/dialogs/tag_editor.py b/src/calibre/gui2/dialogs/tag_editor.py index 6bd8eb7dbe..bf3bb9fd4e 100644 --- a/src/calibre/gui2/dialogs/tag_editor.py +++ b/src/calibre/gui2/dialogs/tag_editor.py @@ -122,6 +122,8 @@ def add_tag(self): tags = unicode(self.add_tag_input.text()).split(',') for tag in tags: tag = tag.strip() + if not tag: + continue for item in self.available_tags.findItems(tag, Qt.MatchFixedString): self.available_tags.takeItem(self.available_tags.row(item)) if tag not in self.tags: diff --git a/src/calibre/gui2/dialogs/user_profiles.py b/src/calibre/gui2/dialogs/user_profiles.py index 5453a90766..d66d02d211 100644 --- a/src/calibre/gui2/dialogs/user_profiles.py +++ b/src/calibre/gui2/dialogs/user_profiles.py @@ -237,7 +237,7 @@ def add_profile(self, clicked): try: compile_recipe(src) - except Exception, err: + except Exception as err: error_dialog(self, _('Invalid input'), _('<p>Could not create recipe. Error:<br>%s')%str(err)).exec_() return @@ -246,7 +246,7 @@ def add_profile(self, clicked): src = unicode(self.source_code.toPlainText()) try: title = compile_recipe(src).title - except Exception, err: + except Exception as err: error_dialog(self, _('Invalid input'), _('<p>Could not create recipe. Error:<br>%s')%str(err)).exec_() return @@ -333,7 +333,7 @@ def load(self): try: profile = open(file, 'rb').read().decode('utf-8') title = compile_recipe(profile).title - except Exception, err: + except Exception as err: error_dialog(self, _('Invalid input'), _('<p>Could not create recipe. Error:<br>%s')%str(err)).exec_() return diff --git a/src/calibre/gui2/dnd.py b/src/calibre/gui2/dnd.py index 928de72578..1f9dbdfa34 100644 --- a/src/calibre/gui2/dnd.py +++ b/src/calibre/gui2/dnd.py @@ -35,7 +35,7 @@ def run(self): try: br = browser() br.retrieve(self.url, self.fpath, self.callback) - except Exception, e: + except Exception as e: self.err = as_unicode(e) import traceback self.tb = traceback.format_exc() diff --git a/src/calibre/gui2/email.py b/src/calibre/gui2/email.py index c84b3180f7..81c1d9c255 100644 --- a/src/calibre/gui2/email.py +++ b/src/calibre/gui2/email.py @@ -116,7 +116,7 @@ def run(self): try: self.sendmail(job) break - except Exception, e: + except Exception as e: if not self._run: return import traceback diff --git a/src/calibre/gui2/library/delegates.py b/src/calibre/gui2/library/delegates.py index 3a090f8102..0f74500099 100644 --- a/src/calibre/gui2/library/delegates.py +++ b/src/calibre/gui2/library/delegates.py @@ -398,7 +398,7 @@ def setModelData(self, editor, model, index): val = unicode(editor.textbox.toPlainText()) try: validation_formatter.validate(val) - except Exception, err: + except Exception as err: error_dialog(self.parent(), _('Invalid template'), '<p>'+_('The template %s is invalid:')%val + \ '<br>'+str(err), show=True) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index a200562ea9..c921ea125f 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -640,18 +640,18 @@ def ondevice_decorator(r, idx=-1): return self.bool_yes_icon return self.bool_blank_icon - def text_type(r, mult=False, idx=-1): + def text_type(r, mult=None, idx=-1): text = self.db.data[r][idx] - if text and mult: - return QVariant(', '.join(sorted(text.split('|'),key=sort_key))) + if text and mult is not None: + if mult: + return QVariant(u' & '.join(text.split('|'))) + return QVariant(u', '.join(sorted(text.split('|'),key=sort_key))) return QVariant(text) - def decorated_text_type(r, mult=False, idx=-1): + def decorated_text_type(r, idx=-1): text = self.db.data[r][idx] if force_to_bool(text) is not None: return None - if text and mult: - return QVariant(', '.join(sorted(text.split('|'),key=sort_key))) return QVariant(text) def number_type(r, idx=-1): @@ -659,7 +659,7 @@ def number_type(r, idx=-1): self.dc = { 'title' : functools.partial(text_type, - idx=self.db.field_metadata['title']['rec_index'], mult=False), + idx=self.db.field_metadata['title']['rec_index'], mult=None), 'authors' : functools.partial(authors, idx=self.db.field_metadata['authors']['rec_index']), 'size' : functools.partial(size, @@ -671,14 +671,14 @@ def number_type(r, idx=-1): 'rating' : functools.partial(rating_type, idx=self.db.field_metadata['rating']['rec_index']), 'publisher': functools.partial(text_type, - idx=self.db.field_metadata['publisher']['rec_index'], mult=False), + idx=self.db.field_metadata['publisher']['rec_index'], mult=None), 'tags' : functools.partial(tags, idx=self.db.field_metadata['tags']['rec_index']), 'series' : functools.partial(series_type, idx=self.db.field_metadata['series']['rec_index'], siix=self.db.field_metadata['series_index']['rec_index']), 'ondevice' : functools.partial(text_type, - idx=self.db.field_metadata['ondevice']['rec_index'], mult=False), + idx=self.db.field_metadata['ondevice']['rec_index'], mult=None), } self.dc_decorator = { @@ -692,11 +692,12 @@ def number_type(r, idx=-1): datatype = self.custom_columns[col]['datatype'] if datatype in ('text', 'comments', 'composite', 'enumeration'): mult=self.custom_columns[col]['is_multiple'] + if mult is not None: + mult = self.custom_columns[col]['display'].get('is_names', False) self.dc[col] = functools.partial(text_type, idx=idx, mult=mult) if datatype in ['text', 'composite', 'enumeration'] and not mult: if self.custom_columns[col]['display'].get('use_decorations', False): - self.dc[col] = functools.partial(decorated_text_type, - idx=idx, mult=mult) + self.dc[col] = functools.partial(decorated_text_type, idx=idx) self.dc_decorator[col] = functools.partial( bool_type_decorator, idx=idx, bool_cols_are_tristate= diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py index c62936a46f..0cce33da9e 100644 --- a/src/calibre/gui2/library/views.py +++ b/src/calibre/gui2/library/views.py @@ -78,6 +78,7 @@ def __init__(self, parent, modelcls=BooksModel): self.pubdate_delegate = PubDateDelegate(self) self.tags_delegate = CompleteDelegate(self, ',', 'all_tags') self.authors_delegate = CompleteDelegate(self, '&', 'all_author_names', True) + self.cc_names_delegate = CompleteDelegate(self, '&', 'all_custom', True) self.series_delegate = TextDelegate(self) self.publisher_delegate = TextDelegate(self) self.text_delegate = TextDelegate(self) @@ -410,6 +411,7 @@ def set_database(self, db): self.save_state() self._model.set_database(db) self.tags_delegate.set_database(db) + self.cc_names_delegate.set_database(db) self.authors_delegate.set_database(db) self.series_delegate.set_auto_complete_function(db.all_series) self.publisher_delegate.set_auto_complete_function(db.all_publishers) @@ -431,12 +433,17 @@ def database_changed(self, db): self.setItemDelegateForColumn(cm.index(colhead), delegate) elif cc['datatype'] == 'comments': self.setItemDelegateForColumn(cm.index(colhead), self.cc_comments_delegate) - elif cc['datatype'] in ('text', 'series'): + elif cc['datatype'] == 'text': if cc['is_multiple']: - self.setItemDelegateForColumn(cm.index(colhead), self.tags_delegate) + if cc['display'].get('is_names', False): + self.setItemDelegateForColumn(cm.index(colhead), + self.cc_names_delegate) + else: + self.setItemDelegateForColumn(cm.index(colhead), + self.tags_delegate) else: self.setItemDelegateForColumn(cm.index(colhead), self.cc_text_delegate) - elif cc['datatype'] in ('int', 'float'): + elif cc['datatype'] in ('series', 'int', 'float'): self.setItemDelegateForColumn(cm.index(colhead), self.cc_text_delegate) elif cc['datatype'] == 'bool': self.setItemDelegateForColumn(cm.index(colhead), self.cc_bool_delegate) diff --git a/src/calibre/gui2/lrf_renderer/main.py b/src/calibre/gui2/lrf_renderer/main.py index 2acfd3c9a7..e68e04adcf 100644 --- a/src/calibre/gui2/lrf_renderer/main.py +++ b/src/calibre/gui2/lrf_renderer/main.py @@ -35,7 +35,7 @@ def run(self): self.stream = None if self.aborted: self.lrf = None - except Exception, err: + except Exception as err: self.lrf, self.stream = None, None self.exception = err self.formatted_traceback = traceback.format_exc() diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 976b679726..c67ec8c2b4 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -399,7 +399,7 @@ def main(args=sys.argv): if __name__ == '__main__': try: sys.exit(main()) - except Exception, err: + except Exception as err: if not iswindows: raise tb = traceback.format_exc() from PyQt4.QtGui import QErrorMessage diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py index d5a8de7b67..635a037482 100644 --- a/src/calibre/gui2/metadata/basic_widgets.py +++ b/src/calibre/gui2/metadata/basic_widgets.py @@ -656,7 +656,7 @@ def select_cover(self, *args): try: cf = open(_file, "rb") cover = cf.read() - except IOError, e: + except IOError as e: d = error_dialog(self, _('Error reading file'), _("<p>There was an error reading from file: <br /><b>") + _file + "</b></p><br />"+str(e)) diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py index 461f56b60c..7a7f49dabf 100644 --- a/src/calibre/gui2/metadata/bulk_download.py +++ b/src/calibre/gui2/metadata/bulk_download.py @@ -88,7 +88,7 @@ def __init__(self, db, ids, get_covers, set_metadata=True, def run(self): try: self._run() - except Exception, e: + except Exception as e: self.exception = e self.tb = traceback.format_exc() diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py index 3b6dd0e253..5b17b454e7 100644 --- a/src/calibre/gui2/metadata/single.py +++ b/src/calibre/gui2/metadata/single.py @@ -303,7 +303,7 @@ def apply_changes(self): return False self.books_to_refresh |= getattr(widget, 'books_to_refresh', set([])) - except IOError, err: + except IOError as err: if err.errno == 13: # Permission denied import traceback fname = err.filename if err.filename else 'file' diff --git a/src/calibre/gui2/notify.py b/src/calibre/gui2/notify.py index 501f7007eb..947d98f1a4 100644 --- a/src/calibre/gui2/notify.py +++ b/src/calibre/gui2/notify.py @@ -34,7 +34,7 @@ def __init__(self, server, path, interface): import dbus self.dbus = dbus self._notify = dbus.Interface(dbus.SessionBus().get_object(server, path), interface) - except Exception, err: + except Exception as err: self.ok = False self.err = str(err) diff --git a/src/calibre/gui2/preferences/conversion.py b/src/calibre/gui2/preferences/conversion.py index 8de9ee1661..b5240227d3 100644 --- a/src/calibre/gui2/preferences/conversion.py +++ b/src/calibre/gui2/preferences/conversion.py @@ -5,6 +5,8 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' +import importlib + from PyQt4.Qt import QIcon, Qt, QStringListModel, QVariant from calibre.gui2.preferences import ConfigWidgetBase, test_widget, AbortCommit @@ -104,8 +106,8 @@ def load_conversion_widgets(self): for plugin in output_format_plugins(): name = plugin.name.lower().replace(' ', '_') try: - output_widget = __import__('calibre.gui2.convert.'+name, - fromlist=[1]) + output_widget = importlib.import_module( + 'calibre.gui2.convert.'+name) pw = output_widget.PluginWidget self.conversion_widgets.append(pw) except ImportError: diff --git a/src/calibre/gui2/preferences/create_custom_column.py b/src/calibre/gui2/preferences/create_custom_column.py index cee34f150e..f476845f8b 100644 --- a/src/calibre/gui2/preferences/create_custom_column.py +++ b/src/calibre/gui2/preferences/create_custom_column.py @@ -63,7 +63,7 @@ def __init__(self, parent, editing, standard_colheads, standard_colnames): for col, name in [('isbn', _('ISBN')), ('formats', _('Formats')), ('last_modified', _('Modified Date')), ('yesno', _('Yes/No')), ('tags', _('Tags')), ('series', _('Series')), ('rating', - _('Rating'))]: + _('Rating')), ('people', _("People's names"))]: text += ' <a href="col:%s">%s</a>,'%(col, name) text = text[:-1] self.shortcuts.setText(text) @@ -125,6 +125,8 @@ def __init__(self, parent, editing, standard_colheads, standard_colnames): self.datatype_changed() if ct in ['text', 'composite', 'enumeration']: self.use_decorations.setChecked(c['display'].get('use_decorations', False)) + elif ct == '*text': + self.is_names.setChecked(c['display'].get('is_names', False)) self.exec_() def shortcut_activated(self, url): @@ -134,6 +136,7 @@ def shortcut_activated(self, url): 'tags' : 1, 'series': 3, 'rating': 8, + 'people': 1, }.get(which, 10)) self.column_name_box.setText(which) self.column_heading_box.setText({ @@ -143,7 +146,9 @@ def shortcut_activated(self, url): 'tags': _('My Tags'), 'series': _('My Series'), 'rating': _('My Rating'), - 'last_modified':_('Modified Date')}[which]) + 'last_modified':_('Modified Date'), + 'people': _('People')}[which]) + self.is_names.setChecked(which == 'people') if self.composite_box.isVisible(): self.composite_box.setText( { @@ -153,7 +158,6 @@ def shortcut_activated(self, url): }[which]) self.composite_sort_by.setCurrentIndex(2 if which == 'last_modified' else 0) - def datatype_changed(self, *args): try: col_type = self.column_types[self.column_type_box.currentIndex()]['datatype'] @@ -167,6 +171,7 @@ def datatype_changed(self, *args): for x in ('box', 'default_label', 'label'): getattr(self, 'enum_'+x).setVisible(col_type == 'enumeration') self.use_decorations.setVisible(col_type in ['text', 'composite', 'enumeration']) + self.is_names.setVisible(col_type == '*text') def accept(self): col = unicode(self.column_name_box.text()).strip() @@ -241,6 +246,8 @@ def accept(self): return self.simple_error('', _('The value "{0}" is in the ' 'list more than once').format(l[i])) display_dict = {'enum_values': l} + elif col_type == 'text' and is_multiple: + display_dict = {'is_names': self.is_names.isChecked()} if col_type in ['text', 'composite', 'enumeration']: display_dict['use_decorations'] = self.use_decorations.checkState() diff --git a/src/calibre/gui2/preferences/create_custom_column.ui b/src/calibre/gui2/preferences/create_custom_column.ui index 3290d3c846..619b0c6212 100644 --- a/src/calibre/gui2/preferences/create_custom_column.ui +++ b/src/calibre/gui2/preferences/create_custom_column.ui @@ -9,7 +9,7 @@ <rect> <x>0</x> <y>0</y> - <width>603</width> + <width>831</width> <height>344</height> </rect> </property> @@ -110,27 +110,37 @@ </item> <item> <widget class="QCheckBox" name="use_decorations"> - <property name="text"> - <string>Show checkmarks</string> - </property> <property name="toolTip"> <string>Show check marks in the GUI. Values of 'yes', 'checked', and 'true' will show a green check. Values of 'no', 'unchecked', and 'false' will show a red X. Everything else will show nothing.</string> </property> + <property name="text"> + <string>Show checkmarks</string> + </property> + </widget> + </item> + <item> + <widget class="QCheckBox" name="is_names"> + <property name="toolTip"> + <string>Check this box if this column contains names, like the authors column.</string> + </property> + <property name="text"> + <string>Contains names</string> + </property> </widget> </item> <item> <spacer name="horizontalSpacer_27"> - <property name="orientation"> - <enum>Qt::Horizontal</enum> - </property> <property name="sizePolicy"> <sizepolicy hsizetype="Expanding" vsizetype="Fixed"> <horstretch>10</horstretch> <verstretch>0</verstretch> </sizepolicy> </property> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> <property name="sizeHint" stdset="0"> <size> <width>20</width> @@ -241,25 +251,25 @@ Everything else will show nothing.</string> </item> <item> <widget class="QCheckBox" name="composite_make_category"> - <property name="text"> - <string>Show in tags browser</string> - </property> <property name="toolTip"> <string>If checked, this column will appear in the tags browser as a category</string> </property> + <property name="text"> + <string>Show in tags browser</string> + </property> </widget> </item> <item> <spacer name="horizontalSpacer_24"> - <property name="orientation"> - <enum>Qt::Horizontal</enum> - </property> <property name="sizePolicy"> <sizepolicy hsizetype="Expanding" vsizetype="Fixed"> <horstretch>10</horstretch> <verstretch>0</verstretch> </sizepolicy> </property> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> <property name="sizeHint" stdset="0"> <size> <width>20</width> diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py index 206f2b97fb..a2d2236039 100644 --- a/src/calibre/gui2/preferences/look_feel.py +++ b/src/calibre/gui2/preferences/look_feel.py @@ -64,8 +64,9 @@ def genesis(self, gui): r('tags_browser_collapse_at', gprefs) choices = set([k for k in db.field_metadata.all_field_keys() - if db.field_metadata[k]['is_category'] and - db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']]) + if db.field_metadata[k]['is_category'] and + (db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']) and + not db.field_metadata[k]['display'].get('is_names', False)]) choices -= set(['authors', 'publisher', 'formats', 'news', 'identifiers']) choices |= set(['search']) self.opt_categories_using_hierarchy.update_items_cache(choices) diff --git a/src/calibre/gui2/preferences/plugboard.py b/src/calibre/gui2/preferences/plugboard.py index e1dc6b03bd..8f2b084d76 100644 --- a/src/calibre/gui2/preferences/plugboard.py +++ b/src/calibre/gui2/preferences/plugboard.py @@ -251,7 +251,7 @@ def ok_clicked(self): if d != 0: try: validation_formatter.validate(s) - except Exception, err: + except Exception as err: error_dialog(self, _('Invalid template'), '<p>'+_('The template %s is invalid:')%s + \ '<br>'+str(err), show=True) diff --git a/src/calibre/gui2/preferences/save_template.py b/src/calibre/gui2/preferences/save_template.py index 4c00a14c0f..96ca8c8945 100644 --- a/src/calibre/gui2/preferences/save_template.py +++ b/src/calibre/gui2/preferences/save_template.py @@ -57,7 +57,7 @@ def validate(self): return question_dialog(self, _('Constant template'), _('The template contains no {fields}, so all ' 'books will have the same name. Is this OK?')) - except Exception, err: + except Exception as err: error_dialog(self, _('Invalid template'), '<p>'+_('The template %s is invalid:')%tmpl + \ '<br>'+str(err), show=True) diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index 34fa3a8b10..6b1ce2f851 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -658,8 +658,7 @@ def category_data(self, role): def tag_data(self, role): tag = self.tag - if tag.category == 'authors' and \ - tweaks['categories_use_field_for_author_name'] == 'author_sort': + if tag.use_sort_as_name: name = tag.sort tt_author = True else: @@ -1275,6 +1274,7 @@ def process_one_node(category, state_map): # {{{ if len(components) == 0 or '.'.join(components) != tag.original_name: components = [tag.original_name] if (not tag.is_hierarchical) and (in_uc or + (fm['is_custom'] and fm['display'].get('is_names', False)) or key in ['authors', 'publisher', 'news', 'formats', 'rating'] or key not in self.db.prefs.get('categories_using_hierarchy', []) or len(components) == 1): diff --git a/src/calibre/gui2/viewer/dictionary.py b/src/calibre/gui2/viewer/dictionary.py index dad8d1821c..d5dd4d0a86 100644 --- a/src/calibre/gui2/viewer/dictionary.py +++ b/src/calibre/gui2/viewer/dictionary.py @@ -36,7 +36,7 @@ def define(self): def run(self): try: self.define() - except Exception, e: + except Exception as e: import traceback self.exception = e self.traceback = traceback.format_exc() diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py index c570a6e159..ea0509b51a 100644 --- a/src/calibre/gui2/widgets.py +++ b/src/calibre/gui2/widgets.py @@ -97,7 +97,7 @@ def initialize(self, defaults=False): def do_test(self): try: pat = self.pattern() - except Exception, err: + except Exception as err: error_dialog(self, _('Invalid regular expression'), _('Invalid regular expression: %s')%err).exec_() return diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py index c629b10b5d..a32347dc72 100644 --- a/src/calibre/gui2/wizard/__init__.py +++ b/src/calibre/gui2/wizard/__init__.py @@ -565,7 +565,7 @@ def move_library(oldloc, newloc, parent, callback_on_complete): # Try to load existing library at new location try: LibraryDatabase2(newloc) - except Exception, err: + except Exception as err: det = traceback.format_exc() error_dialog(parent, _('Invalid database'), _('<p>An invalid library already exists at ' @@ -577,7 +577,7 @@ def move_library(oldloc, newloc, parent, callback_on_complete): else: callback(newloc) return - except Exception, err: + except Exception as err: det = traceback.format_exc() error_dialog(parent, _('Could not move library'), unicode(err), det, show=True) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 19ef7e213c..e5864ceaaf 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -15,7 +15,7 @@ from calibre.utils.date import parse_date, now, UNDEFINED_DATE from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.pyparsing import ParseException -from calibre.ebooks.metadata import title_sort +from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre import prints @@ -1023,7 +1023,11 @@ def itervals(self, record): if val: sep = fm['is_multiple'] if sep: - val = sep.join(sorted(val.split(sep), + if fm['display'].get('is_names', False): + val = sep.join( + [author_to_author_sort(v) for v in val.split(sep)]) + else: + val = sep.join(sorted(val.split(sep), key=self.string_sort_key)) val = self.string_sort_key(val) diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py index dec55f2b02..48960ac871 100644 --- a/src/calibre/library/custom_columns.py +++ b/src/calibre/library/custom_columns.py @@ -117,7 +117,7 @@ def adapt_text(x, d): if x is None: return [] if isinstance(x, (str, unicode, bytes)): - x = x.split(',') + x = x.split('&' if d['display'].get('is_names', False) else',') x = [y.strip() for y in x if y.strip()] x = [y.decode(preferred_encoding, 'replace') if not isinstance(y, unicode) else y for y in x] @@ -482,8 +482,11 @@ def _set_custom(self, id_, val, label=None, num=None, append=False, set_val = val if data['is_multiple'] else [val] existing = getter() if not existing: - existing = [] - for x in set(set_val) - set(existing): + existing = set([]) + else: + existing = set(existing) + # preserve the order in set_val + for x in [v for v in set_val if v not in existing]: # normalized types are text and ratings, so we can do this check # to see if we need to re-add the value if not x: diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index e751d4d522..b23c8ff4a4 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -48,7 +48,7 @@ class Tag(object): def __init__(self, name, id=None, count=0, state=0, avg=0, sort=None, tooltip=None, icon=None, category=None, id_set=None, - is_editable = True, is_searchable=True): + is_editable = True, is_searchable=True, use_sort_as_name=False): self.name = self.original_name = name self.id = id self.count = count @@ -59,6 +59,7 @@ def __init__(self, name, id=None, count=0, state=0, avg=0, sort=None, self.id_set = id_set if id_set is not None else set([]) self.avg_rating = avg/2.0 if avg is not None else 0 self.sort = sort + self.use_sort_as_name = use_sort_as_name if self.avg_rating > 0: if tooltip: tooltip = tooltip + ': ' @@ -1323,6 +1324,11 @@ def get_categories(self, sort='name', ids=None, icon_map=None): for l in list: (id, val) = (l[0], l[1]) tids[category][val] = (id, '{0:05.2f}'.format(val)) + elif cat['datatype'] == 'text' and cat['is_multiple'] and \ + cat['display'].get('is_names', False): + for l in list: + (id, val) = (l[0], l[1]) + tids[category][val] = (id, author_to_author_sort(val)) else: for l in list: (id, val) = (l[0], l[1]) @@ -1480,11 +1486,20 @@ def get_categories(self, sort='name', ids=None, icon_map=None): reverse=True items.sort(key=kf, reverse=reverse) + if tweaks['categories_use_field_for_author_name'] == 'author_sort' and\ + (category == 'authors' or + (cat['display'].get('is_names', False) and + cat['is_custom'] and cat['is_multiple'] and + cat['datatype'] == 'text')): + use_sort_as_name = True + else: + use_sort_as_name = False is_editable = category not in ['news', 'rating'] categories[category] = [tag_class(formatter(r.n), count=r.c, id=r.id, avg=avgr(r), sort=r.s, icon=icon, tooltip=tooltip, category=category, - id_set=r.id_set, is_editable=is_editable) + id_set=r.id_set, is_editable=is_editable, + use_sort_as_name=use_sort_as_name) for r in items] #print 'end phase "tags list":', time.clock() - last, 'seconds' diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py index 83d395dec5..dba6abbfa5 100644 --- a/src/calibre/library/server/base.py +++ b/src/calibre/library/server/base.py @@ -222,7 +222,7 @@ def start(self): # cherrypy.engine.signal_handler.subscribe() cherrypy.engine.block() - except Exception, e: + except Exception as e: self.exception = e finally: self.is_running = False diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index f1d9b9785c..895fbb06e9 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -15,7 +15,7 @@ prepare_string_for_xml from calibre.utils.ordered_dict import OrderedDict from calibre.utils.filenames import ascii_filename -from calibre.utils.config import prefs, tweaks +from calibre.utils.config import prefs from calibre.utils.icu import sort_key from calibre.utils.magick import Image from calibre.library.comments import comments_to_html @@ -155,8 +155,7 @@ def item(i): '<div>{1}</div>' '<div>{2}</div></div>') rating, rstring = render_rating(i.avg_rating, prefix) - if i.category == 'authors' and \ - tweaks['categories_use_field_for_author_name'] == 'author_sort': + if i.use_sort_as_name: name = xml(i.sort) else: name = xml(i.name) @@ -696,7 +695,10 @@ def browse_get_book_args(self, mi, id_, add_category_links=False): xml(href, True), xml(val if len(dbtags) == 1 else tag.name), xml(key, True))) - join = ' & ' if key == 'authors' else ', ' + join = ' & ' if key == 'authors' or \ + (fm['is_custom'] and + fm['display'].get('is_names', False)) \ + else ', ' args[key] = join.join(vals) added_key = True if not added_key: diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 11ea2b951e..919f5a7969 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -169,7 +169,7 @@ def get_cover(self, id, thumbnail=False, thumb_width=60, thumb_height=80): return cover return save_cover_data_to(img, 'img.jpg', return_data=True, resize_to=(width, height)) - except Exception, err: + except Exception as err: import traceback cherrypy.log.error('Failed to generate cover:') cherrypy.log.error(traceback.print_exc()) diff --git a/src/calibre/library/server/main.py b/src/calibre/library/server/main.py index e4de710c6a..3a6f918022 100644 --- a/src/calibre/library/server/main.py +++ b/src/calibre/library/server/main.py @@ -69,7 +69,7 @@ def daemonize(stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'): if pid > 0: # exit first parent sys.exit(0) - except OSError, e: + except OSError as e: print >>sys.stderr, "fork #1 failed: %d (%s)" % (e.errno, e.strerror) sys.exit(1) @@ -84,7 +84,7 @@ def daemonize(stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'): if pid > 0: # exit from second parent sys.exit(0) - except OSError, e: + except OSError as e: print >>sys.stderr, "fork #2 failed: %d (%s)" % (e.errno, e.strerror) sys.exit(1) diff --git a/src/calibre/library/server/opds.py b/src/calibre/library/server/opds.py index e7fdffbbbb..bdd35c16f1 100644 --- a/src/calibre/library/server/opds.py +++ b/src/calibre/library/server/opds.py @@ -22,7 +22,6 @@ from calibre import guess_type, prepare_string_for_xml as xml from calibre.utils.icu import sort_key from calibre.utils.ordered_dict import OrderedDict -from calibre.utils.config import tweaks BASE_HREFS = { 0 : '/stanza', @@ -126,8 +125,7 @@ def CATALOG_ENTRY(item, item_kind, base_href, version, updated, count = (_('%d books') if item.count > 1 else _('%d book'))%item.count if ignore_count: count = '' - if item.category == 'authors' and \ - tweaks['categories_use_field_for_author_name'] == 'author_sort': + if item.use_sort_as_name: name = item.sort else: name = item.name diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index 2075ab5880..511106fe7b 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -193,7 +193,7 @@ def load_c_extensions(conn, debug=DEBUG): conn.load_extension(ext_path) conn.enable_load_extension(False) return True - except Exception, e: + except Exception as e: if debug: print 'Failed to load high performance sqlite C extension' print e @@ -247,14 +247,14 @@ def run(self): if func == 'dump': try: ok, res = True, tuple(self.conn.iterdump()) - except Exception, err: + except Exception as err: ok, res = False, (err, traceback.format_exc()) elif func == 'create_dynamic_filter': try: f = DynamicFilter(args[0]) self.conn.create_function(args[0], 1, f) ok, res = True, f - except Exception, err: + except Exception as err: ok, res = False, (err, traceback.format_exc()) else: bfunc = getattr(self.conn, func) @@ -263,7 +263,7 @@ def run(self): try: ok, res = True, bfunc(*args, **kwargs) break - except OperationalError, err: + except OperationalError as err: # Retry if unable to open db file e = str(err) if 'unable to open' not in e or i == 2: @@ -273,10 +273,10 @@ def run(self): reprlib.repr(kwargs)) raise time.sleep(0.5) - except Exception, err: + except Exception as err: ok, res = False, (err, traceback.format_exc()) self.results.put((ok, res)) - except Exception, err: + except Exception as err: self.unhandled_error = (err, traceback.format_exc()) class DatabaseException(Exception): diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 22f8af56c2..dfab13e3b8 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -3,7 +3,7 @@ ''' Post installation script for linux ''' -import sys, os, cPickle, textwrap, stat +import sys, os, cPickle, textwrap, stat, importlib from subprocess import check_call from calibre import __appname__, prints, guess_type @@ -59,7 +59,7 @@ shutil.rmtree(x) else: os.unlink(x) - except Exception, e: + except Exception as e: print 'Failed to delete', x print '\t', e @@ -285,7 +285,7 @@ def setup_completion(self): # {{{ complete -o nospace -C calibre-complete ebook-convert ''')) - except TypeError, err: + except TypeError as err: if 'resolve_entities' in str(err): print 'You need python-lxml >= 2.0.5 for calibre' sys.exit(1) @@ -309,7 +309,7 @@ def install_man_pages(self): # {{{ for src in entry_points['console_scripts']: prog, right = src.split('=') prog = prog.strip() - module = __import__(right.split(':')[0].strip(), fromlist=['a']) + module = importlib.import_module(right.split(':')[0].strip()) parser = getattr(module, 'option_parser', None) if parser is None: continue diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 948611f775..97ef32e9d4 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -493,7 +493,16 @@ Most purchased EPUB books have `DRM <http://wiki.mobileread.com/wiki/DRM>`_. Thi I am getting a "Permission Denied" error? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A permission denied error can occur because of many possible reasons, none of them having anything to do with |app|. You can get permission denied errors if you are using an SD card with write protect enabled. Or if you, or some program you used changed the file permissions of the files in question to read only. Or if there is a filesystem error on the device which caused your operating system to mount the filesystem in read only mode or mark a particular file as read only pending recovery. Or if the files have their owner set to a user other than you. Or if your file is open in another program. You will need to fix the underlying cause of the permissions error before resuming to use |app|. Read the error message carefully, see what file it points to and fix the permissions on that file. +A permission denied error can occur because of many possible reasons, none of them having anything to do with |app|. + + * You can get permission denied errors if you are using an SD card with write protect enabled. + * If you, or some program you used changed the file permissions of the files in question to read only. + * If there is a filesystem error on the device which caused your operating system to mount the filesystem in read only mode or mark a particular file as read only pending recovery. + * If the files have their owner set to a user other than you. + * If your file is open in another program. + * If the file resides on a device, you may have reached the limit of a maximum of 256 files in the root of the device. In this case you need to reformat the device/sd card referered to in the error message with a FAT32 filesystem, or delete some files from the SD card/device memory. + +You will need to fix the underlying cause of the permissions error before resuming to use |app|. Read the error message carefully, see what file it points to and fix the permissions on that file. Can I have the comment metadata show up on my reader? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -510,7 +519,7 @@ You have two choices: How is |app| licensed? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode <http://code.google.com/p/calibre-ebook/downloads/list>`_. You are free to use the results of conversions from |app| however you want. You cannot use code, libraries from |app| in your software without maing your software open source. For details, see `The GNU GPL v3 http://www.gnu.org/licenses/gpl.html`_. +|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode <http://code.google.com/p/calibre-ebook/downloads/list>`_. You are free to use the results of conversions from |app| however you want. You cannot use code, libraries from |app| in your software without maing your software open source. For details, see `The GNU GPL v3 <http://www.gnu.org/licenses/gpl.html>`_. How do I run calibre from my USB stick? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/calibre/manual/news.rst b/src/calibre/manual/news.rst index d0838ccb0f..ed306a168e 100644 --- a/src/calibre/manual/news.rst +++ b/src/calibre/manual/news.rst @@ -137,7 +137,7 @@ to the recipe. Finally, lets replace some of the :term:`CSS` that we disabled ea With these additions, our recipe has become "production quality", indeed it is very close to the actual recipe used by |app| for the *BBC*, shown below: -.. literalinclude:: ../../../resources/recipes/bbc.recipe +.. literalinclude:: ../../../recipes/bbc.recipe This :term:`recipe` explores only the tip of the iceberg when it comes to the power of |app|. To explore more of the abilities of |app| we'll examine a more complex real life example in the next section. diff --git a/src/calibre/manual/sub_groups.rst b/src/calibre/manual/sub_groups.rst index c27b3581f8..e5a433dce9 100644 --- a/src/calibre/manual/sub_groups.rst +++ b/src/calibre/manual/sub_groups.rst @@ -105,8 +105,8 @@ After creating the saved search, you can use it as a restriction. .. image:: images/sg_restrict2.jpg :align: center - Useful Template Functions - ------------------------- +Useful Template Functions +------------------------- You might want to use the genre information in a template, such as with save to disk or send to device. The question might then be "How do I get the outermost genre name or names?" An |app| template function, subitems, is provided to make doing this easier. @@ -114,4 +114,4 @@ After creating the saved search, you can use it as a restriction. {#genre:subitems(0,1)||/}{title} - {authors} -See :ref:`The |app| template language <templatelangcalibre>` for more information templates and the subitem function. \ No newline at end of file +See :ref:`The |app| template language <templatelangcalibre>` for more information templates and the subitem function. diff --git a/src/calibre/utils/Zeroconf.py b/src/calibre/utils/Zeroconf.py index f4a7119d16..fbb9b4e71f 100755 --- a/src/calibre/utils/Zeroconf.py +++ b/src/calibre/utils/Zeroconf.py @@ -863,7 +863,7 @@ def run(self): for socket in rr: try: self.readers[socket].handle_read() - except NonLocalNameException, err: + except NonLocalNameException as err: print err except UnicodeDecodeError: if DEBUG: diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py index 740e67bee8..2e40275beb 100644 --- a/src/calibre/utils/formatter.py +++ b/src/calibre/utils/formatter.py @@ -316,7 +316,7 @@ def safe_format(self, fmt, kwargs, error_value, book): self.locals = {} try: ans = self.vformat(fmt, [], kwargs).strip() - except Exception, e: + except Exception as e: if DEBUG: traceback.print_exc() ans = error_value + ' ' + e.message diff --git a/src/calibre/utils/ipc/worker.py b/src/calibre/utils/ipc/worker.py index e187235a9e..9594f64ae4 100644 --- a/src/calibre/utils/ipc/worker.py +++ b/src/calibre/utils/ipc/worker.py @@ -6,7 +6,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import os, cPickle, sys +import os, cPickle, sys, importlib from multiprocessing.connection import Client from threading import Thread from Queue import Queue @@ -75,7 +75,7 @@ def run(self): def get_func(name): module, func, notification = PARALLEL_FUNCS[name] - module = __import__(module, fromlist=[1]) + module = importlib.import_module(module) func = getattr(module, func) return func, notification diff --git a/src/calibre/utils/lock.py b/src/calibre/utils/lock.py index 5098c78f90..0b66be963b 100644 --- a/src/calibre/utils/lock.py +++ b/src/calibre/utils/lock.py @@ -32,7 +32,7 @@ def __init__(self, path, timeout=20): None, #No template file ) break - except pywintypes.error, err: + except pywintypes.error as err: if getattr(err, 'args', [-1])[0] in (0x20, 0x21): time.sleep(1) continue diff --git a/src/calibre/utils/pdftk.py b/src/calibre/utils/pdftk.py index 1263b60306..f4fcb8a2e3 100644 --- a/src/calibre/utils/pdftk.py +++ b/src/calibre/utils/pdftk.py @@ -56,7 +56,7 @@ def set_metadata(stream, mi): try: p.wait() break - except OSError, e: + except OSError as e: if e.errno == errno.EINTR: continue else: diff --git a/src/calibre/utils/smtp.py b/src/calibre/utils/smtp.py index 744021f911..81936a8f71 100644 --- a/src/calibre/utils/smtp.py +++ b/src/calibre/utils/smtp.py @@ -76,7 +76,7 @@ def sendmail_direct(from_, to, msg, timeout, localhost, verbose, s.connect(host, 25) s.sendmail(from_, [to], msg) return s.quit() - except Exception, e: + except Exception as e: last_error, last_traceback = e, traceback.format_exc() if last_error is not None: print last_traceback diff --git a/src/calibre/web/feeds/feedparser.py b/src/calibre/web/feeds/feedparser.py index ead9207b70..99c3e09666 100755 --- a/src/calibre/web/feeds/feedparser.py +++ b/src/calibre/web/feeds/feedparser.py @@ -6,12 +6,11 @@ Visit http://feedparser.org/ for the latest version Visit http://feedparser.org/docs/ for the latest documentation -Required: Python 2.1 or later -Recommended: Python 2.3 or later +Required: Python 2.4 or later Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/> """ -__version__ = "4.2-pre-" + "$Revision: 316 $"[11:14] + "-svn" +__version__ = "5.0.1" __license__ = """Copyright (c) 2002-2008, Mark Pilgrim, All rights reserved. Redistribution and use in source and binary forms, with or without modification, @@ -42,14 +41,14 @@ "Kevin Marks <http://epeus.blogspot.com/>", "Sam Ruby <http://intertwingly.net/>", "Ade Oshineye <http://blog.oshineye.com/>", - "Martin Pool <http://sourcefrog.net/>"] + "Martin Pool <http://sourcefrog.net/>", + "Kurt McKee <http://kurtmckee.org/>"] _debug = 0 # HTTP "User-Agent" header to send to servers when downloading feeds. # If you are embedding feedparser in a larger application, you should # change this to your application name and URL. -USER_AGENT = 'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4' # Changed by Kovid - +USER_AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11' # Changed by Kovid # HTTP "Accept" header to send to servers when downloading feeds. If you don't # want to send an Accept header, set this to None. ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1" @@ -76,12 +75,73 @@ # HTML content, set this to 1. SANITIZE_HTML = 1 -# ---------- required modules (should come with any Python distribution) ---------- -import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2 +# ---------- Python 3 modules (make it work if possible) ---------- try: - from cStringIO import StringIO as _StringIO + import rfc822 +except ImportError: + from email import _parseaddr as rfc822 + +try: + # Python 3.1 introduces bytes.maketrans and simultaneously + # deprecates string.maketrans; use bytes.maketrans if possible + _maketrans = bytes.maketrans +except (NameError, AttributeError): + import string + _maketrans = string.maketrans + +# base64 support for Atom feeds that contain embedded binary data +try: + import base64, binascii + # Python 3.1 deprecates decodestring in favor of decodebytes + _base64decode = getattr(base64, 'decodebytes', base64.decodestring) except: - from StringIO import StringIO as _StringIO + base64 = binascii = None + +def _s2bytes(s): + # Convert a UTF-8 str to bytes if the interpreter is Python 3 + try: + return bytes(s, 'utf8') + except (NameError, TypeError): + # In Python 2.5 and below, bytes doesn't exist (NameError) + # In Python 2.6 and above, bytes and str are the same (TypeError) + return s + +def _l2bytes(l): + # Convert a list of ints to bytes if the interpreter is Python 3 + try: + if bytes is not str: + # In Python 2.6 and above, this call won't raise an exception + # but it will return bytes([65]) as '[65]' instead of 'A' + return bytes(l) + raise NameError + except NameError: + return ''.join(map(chr, l)) + +# If you want feedparser to allow all URL schemes, set this to () +# List culled from Python's urlparse documentation at: +# http://docs.python.org/library/urlparse.html +# as well as from "URI scheme" at Wikipedia: +# https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme +# Many more will likely need to be added! +ACCEPTABLE_URI_SCHEMES = ( + 'file', 'ftp', 'gopher', 'h323', 'hdl', 'http', 'https', 'imap', 'mailto', + 'mms', 'news', 'nntp', 'prospero', 'rsync', 'rtsp', 'rtspu', 'sftp', + 'shttp', 'sip', 'sips', 'snews', 'svn', 'svn+ssh', 'telnet', 'wais', + # Additional common-but-unofficial schemes + 'aim', 'callto', 'cvs', 'facetime', 'feed', 'git', 'gtalk', 'irc', 'ircs', + 'irc6', 'itms', 'mms', 'msnim', 'skype', 'ssh', 'smb', 'svn', 'ymsg', +) +#ACCEPTABLE_URI_SCHEMES = () + +# ---------- required modules (should come with any Python distribution) ---------- +import sgmllib, re, sys, copy, urlparse, time, types, cgi, urllib, urllib2, datetime +try: + from io import BytesIO as _StringIO +except ImportError: + try: + from cStringIO import StringIO as _StringIO + except: + from StringIO import StringIO as _StringIO # ---------- optional modules (feedparser will work without these, but with reduced functionality) ---------- @@ -114,12 +174,6 @@ def _xmlescape(data,entities={}): data = data.replace(char, entity) return data -# base64 support for Atom feeds that contain embedded binary data -try: - import base64, binascii -except: - base64 = binascii = None - # cjkcodecs and iconv_codec provide support for more character encodings. # Both are available from http://cjkpython.i18n.org/ try: @@ -172,17 +226,27 @@ class UndeclaredNamespace(Exception): pass sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') sgmllib.special = re.compile('<!') -sgmllib.charref = re.compile('&#(\d+|x[0-9a-fA-F]+);') +sgmllib.charref = re.compile('&#(\d+|[xX][0-9a-fA-F]+);') if sgmllib.endbracket.search(' <').start(0): - class EndBracketMatch: - endbracket = re.compile('''([^'"<>]|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''') + class EndBracketRegEx: + def __init__(self): + # Overriding the built-in sgmllib.endbracket regex allows the + # parser to find angle brackets embedded in element attributes. + self.endbracket = re.compile('''([^'"<>]|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''') def search(self,string,index=0): - self.match = self.endbracket.match(string,index) - if self.match: return self - def start(self,n): + match = self.endbracket.match(string,index) + if match is not None: + # Returning a new object in the calling thread's context + # resolves a thread-safety. + return EndBracketMatch(match) + return None + class EndBracketMatch: + def __init__(self, match): + self.match = match + def start(self, n): return self.match.end(n) - sgmllib.endbracket = EndBracketMatch() + sgmllib.endbracket = EndBracketRegEx() SUPPORTED_VERSIONS = {'': 'unknown', 'rss090': 'RSS 0.90', @@ -220,7 +284,7 @@ class FeedParserDict(UserDict): 'guid': 'id', 'date': 'updated', 'date_parsed': 'updated_parsed', - 'description': ['subtitle', 'summary'], + 'description': ['summary', 'subtitle'], 'url': ['href'], 'modified': 'updated', 'modified_parsed': 'updated_parsed', @@ -245,9 +309,9 @@ def __getitem__(self, key): realkey = self.keymap.get(key, key) if type(realkey) == types.ListType: for k in realkey: - if UserDict.has_key(self, k): + if UserDict.__contains__(self, k): return UserDict.__getitem__(self, k) - if UserDict.has_key(self, key): + if UserDict.__contains__(self, key): return UserDict.__getitem__(self, key) return UserDict.__getitem__(self, realkey) @@ -272,9 +336,12 @@ def setdefault(self, key, value): def has_key(self, key): try: - return hasattr(self, key) or UserDict.has_key(self, key) + return hasattr(self, key) or UserDict.__contains__(self, key) except AttributeError: return False + # This alias prevents the 2to3 tool from changing the semantics of the + # __contains__ function below and exhausting the maximum recursion depth + __has_key = has_key def __getattr__(self, key): try: @@ -294,7 +361,7 @@ def __setattr__(self, key, value): return self.__setitem__(key, value) def __contains__(self, key): - return self.has_key(key) + return self.__has_key(key) def zopeCompatibilityHack(): global FeedParserDict @@ -327,9 +394,8 @@ def _ebcdic_to_ascii(s): 92,159,83,84,85,86,87,88,89,90,244,245,246,247,248,249, 48,49,50,51,52,53,54,55,56,57,250,251,252,253,254,255 ) - import string - _ebcdic_to_ascii_map = string.maketrans( \ - ''.join(map(chr, range(256))), ''.join(map(chr, emap))) + _ebcdic_to_ascii_map = _maketrans( \ + _l2bytes(range(256)), _l2bytes(emap)) return s.translate(_ebcdic_to_ascii_map) _cp1252 = { @@ -483,6 +549,10 @@ def unknown_starttag(self, tag, attrs): # normalize attrs attrs = [(k.lower(), v) for k, v in attrs] attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] + # the sgml parser doesn't handle entities in attributes, but + # strict xml parsers do -- account for this difference + if isinstance(self, _LooseFeedParser): + attrs = [(k, v.replace('&', '&')) for k, v in attrs] # track xml:base and xml:lang attrsD = dict(attrs) @@ -492,7 +562,12 @@ def unknown_starttag(self, tag, attrs): baseuri = unicode(baseuri, self.encoding) except: baseuri = unicode(baseuri, 'iso-8859-1') - self.baseuri = _urljoin(self.baseuri, baseuri) + # ensure that self.baseuri is always an absolute URI that + # uses a whitelisted URI scheme (e.g. not `javscript:`) + if self.baseuri: + self.baseuri = _makeSafeAbsoluteURI(self.baseuri, baseuri) or self.baseuri + else: + self.baseuri = _urljoin(self.baseuri, baseuri) lang = attrsD.get('xml:lang', attrsD.get('lang')) if lang == '': # xml:lang could be explicitly set to '', we need to capture that @@ -671,7 +746,7 @@ def parse_declaration(self, i): def mapContentType(self, contentType): contentType = contentType.lower() - if contentType == 'text': + if contentType == 'text' or contentType == 'plain': contentType = 'text/plain' elif contentType == 'html': contentType = 'text/html' @@ -735,6 +810,11 @@ def pop(self, element, stripWhitespace=1): else: pieces = pieces[1:-1] + # Ensure each piece is a str for Python 3 + for (i, v) in enumerate(pieces): + if not isinstance(v, basestring): + pieces[i] = v.decode('utf-8') + output = ''.join(pieces) if stripWhitespace: output = output.strip() @@ -743,11 +823,15 @@ def pop(self, element, stripWhitespace=1): # decode base64 content if base64 and self.contentparams.get('base64', 0): try: - output = base64.decodestring(output) + output = _base64decode(output) except binascii.Error: pass except binascii.Incomplete: pass + except TypeError: + # In Python 3, base64 takes and outputs bytes, not str + # This may not be the most correct way to accomplish this + output = _base64decode(output.encode('utf-8')).decode('utf-8') # resolve relative URIs if (element in self.can_be_relative_uri) and output: @@ -805,7 +889,7 @@ def pop(self, element, stripWhitespace=1): # address common error where people take data that is already # utf-8, presume that it is iso-8859-1, and re-encode it. - if self.encoding=='utf-8' and type(output) == type(u''): + if self.encoding in ('utf-8', 'utf-8_INVALID_PYTHON_3') and type(output) == type(u''): try: output = unicode(output.encode('iso-8859-1'), 'utf-8') except: @@ -830,9 +914,14 @@ def pop(self, element, stripWhitespace=1): contentparams['value'] = output self.entries[-1][element].append(contentparams) elif element == 'link': - self.entries[-1][element] = output - if output: - self.entries[-1]['links'][-1]['href'] = output + if not self.inimage: + # query variables in urls in link elements are improperly + # converted from `?a=1&b=2` to `?a=1&b;=2` as if they're + # unhandled character references. fix this special case. + output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) + self.entries[-1][element] = output + if output: + self.entries[-1]['links'][-1]['href'] = output else: if element == 'description': element = 'summary' @@ -847,6 +936,9 @@ def pop(self, element, stripWhitespace=1): element = 'subtitle' context[element] = output if element == 'link': + # fix query variables; see above for the explanation + output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) + context[element] = output context['links'][-1]['href'] = output elif self.incontent: contentparams = copy.deepcopy(self.contentparams) @@ -874,21 +966,21 @@ def popContent(self, tag): # text, but this is routinely ignored. This is an attempt to detect # the most common cases. As false positives often result in silent # data loss, this function errs on the conservative side. - def lookslikehtml(self, str): + def lookslikehtml(self, s): if self.version.startswith('atom'): return if self.contentparams.get('type','text/html') != 'text/plain': return # must have a close tag or a entity reference to qualify - if not (re.search(r'</(\w+)>',str) or re.search("&#?\w+;",str)): return + if not (re.search(r'</(\w+)>',s) or re.search("&#?\w+;",s)): return # all tags must be in a restricted subset of valid HTML tags if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements, - re.findall(r'</?(\w+)',str)): return + re.findall(r'</?(\w+)',s)): return # all entities must have been defined as valid HTML entities from htmlentitydefs import entitydefs if filter(lambda e: e not in entitydefs.keys(), - re.findall(r'&(\w+);',str)): return + re.findall(r'&(\w+);',s)): return return 1 @@ -929,9 +1021,12 @@ def _itsAnHrefDamnIt(self, attrsD): attrsD['href'] = href return attrsD - def _save(self, key, value): + def _save(self, key, value, overwrite=False): context = self._getContext() - context.setdefault(key, value) + if overwrite: + context[key] = value + else: + context.setdefault(key, value) def _start_rss(self, attrsD): versionmap = {'0.91': 'rss091u', @@ -988,7 +1083,8 @@ def _end_channel(self): def _start_image(self, attrsD): context = self._getContext() - context.setdefault('image', FeedParserDict()) + if not self.inentry: + context.setdefault('image', FeedParserDict()) self.inimage = 1 self.hasTitle = 0 self.push('image', 0) @@ -1013,6 +1109,10 @@ def _end_textinput(self): def _start_author(self, attrsD): self.inauthor = 1 self.push('author', 1) + # Append a new FeedParserDict when expecting an author + context = self._getContext() + context.setdefault('authors', []) + context['authors'].append(FeedParserDict()) _start_managingeditor = _start_author _start_dc_author = _start_author _start_dc_creator = _start_author @@ -1147,6 +1247,8 @@ def _save_author(self, key, value, prefix='author'): context.setdefault(prefix + '_detail', FeedParserDict()) context[prefix + '_detail'][key] = value self._sync_author_detail() + context.setdefault('authors', [FeedParserDict()]) + context['authors'][-1][key] = value def _save_contributor(self, key, value): context = self._getContext() @@ -1252,7 +1354,7 @@ def _start_published(self, attrsD): def _end_published(self): value = self.pop('published') - self._save('published_parsed', _parse_date(value)) + self._save('published_parsed', _parse_date(value), overwrite=True) _end_dcterms_issued = _end_published _end_issued = _end_published @@ -1262,15 +1364,17 @@ def _start_updated(self, attrsD): _start_dcterms_modified = _start_updated _start_pubdate = _start_updated _start_dc_date = _start_updated + _start_lastbuilddate = _start_updated def _end_updated(self): value = self.pop('updated') parsed_value = _parse_date(value) - self._save('updated_parsed', parsed_value) + self._save('updated_parsed', parsed_value, overwrite=True) _end_modified = _end_updated _end_dcterms_modified = _end_updated _end_pubdate = _end_updated _end_dc_date = _end_updated + _end_lastbuilddate = _end_updated def _start_created(self, attrsD): self.push('created', 1) @@ -1278,14 +1382,14 @@ def _start_created(self, attrsD): def _end_created(self): value = self.pop('created') - self._save('created_parsed', _parse_date(value)) + self._save('created_parsed', _parse_date(value), overwrite=True) _end_dcterms_created = _end_created def _start_expirationdate(self, attrsD): self.push('expired', 1) def _end_expirationdate(self): - self._save('expired_parsed', _parse_date(self.pop('expired'))) + self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True) def _start_cc_license(self, attrsD): context = self._getContext() @@ -1334,6 +1438,10 @@ def _start_category(self, attrsD): _start_dc_subject = _start_category _start_keywords = _start_category + def _start_media_category(self, attrsD): + attrsD.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema') + self._start_category(attrsD) + def _end_itunes_keywords(self): for term in self.pop('itunes_keywords').split(): self._addTag(term, 'http://www.itunes.com/', None) @@ -1354,6 +1462,7 @@ def _end_category(self): _end_dc_subject = _end_category _end_keywords = _end_category _end_itunes_category = _end_category + _end_media_category = _end_category def _start_cloud(self, attrsD): self._getContext()['cloud'] = FeedParserDict(attrsD) @@ -1368,11 +1477,10 @@ def _start_link(self, attrsD): attrsD = self._itsAnHrefDamnIt(attrsD) if attrsD.has_key('href'): attrsD['href'] = self.resolveURI(attrsD['href']) - if attrsD.get('rel')=='enclosure' and not context.get('id'): - context['id'] = attrsD.get('href') expectingText = self.infeed or self.inentry or self.insource context.setdefault('links', []) - context['links'].append(FeedParserDict(attrsD)) + if not (self.inentry and self.inimage): + context['links'].append(FeedParserDict(attrsD)) if attrsD.has_key('href'): expectingText = 0 if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): @@ -1498,9 +1606,6 @@ def _start_enclosure(self, attrsD): context = self._getContext() attrsD['rel']='enclosure' context.setdefault('links', []).append(FeedParserDict(attrsD)) - href = attrsD.get('href') - if href and not context.get('id'): - context['id'] = href def _start_source(self, attrsD): if 'url' in attrsD: @@ -1537,10 +1642,10 @@ def _start_content_encoded(self, attrsD): _start_fullitem = _start_content_encoded def _end_content(self): - copyToDescription = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types) + copyToSummary = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types) value = self.popContent('content') - if copyToDescription: - self._save('description', value) + if copyToSummary: + self._save('summary', value) _end_body = _end_content _end_xhtml_body = _end_content @@ -1550,7 +1655,8 @@ def _end_content(self): def _start_itunes_image(self, attrsD): self.push('itunes_image', 0) - self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')}) + if attrsD.get('href'): + self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')}) _start_itunes_link = _start_itunes_image def _end_itunes_block(self): @@ -1559,7 +1665,10 @@ def _end_itunes_block(self): def _end_itunes_explicit(self): value = self.pop('itunes_explicit', 0) - self._getContext()['itunes_explicit'] = (value == 'yes') and 1 or 0 + # Convert 'yes' -> True, 'clean' to False, and any other value to None + # False and None both evaluate as False, so the difference can be ignored + # by applications that only need to know if the content is explicit. + self._getContext()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0] def _start_media_content(self, attrsD): context = self._getContext() @@ -1588,6 +1697,17 @@ def _end_media_player(self): context = self._getContext() context['media_player']['content'] = value + def _start_newlocation(self, attrsD): + self.push('newlocation', 1) + + def _end_newlocation(self): + url = self.pop('newlocation') + context = self._getContext() + # don't set newlocation if the context isn't right + if context is not self.feeddata: + return + context['newlocation'] = _makeSafeAbsoluteURI(self.baseuri, url.strip()) + if _XML_AVAILABLE: class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler): def __init__(self, baseuri, baselang, encoding): @@ -1689,9 +1809,9 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser): 'source', 'track', 'wbr' ] - def __init__(self, encoding, type): + def __init__(self, encoding, _type): self.encoding = encoding - self.type = type + self._type = _type if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding) sgmllib.SGMLParser.__init__(self) @@ -1708,7 +1828,7 @@ def _shorttag_replace(self, match): def parse_starttag(self,i): j=sgmllib.SGMLParser.parse_starttag(self, i) - if self.type == 'application/xhtml+xml': + if self._type == 'application/xhtml+xml': if j>2 and self.rawdata[j-2:j]=='/>': self.unknown_endtag(self.lasttag) return j @@ -1719,8 +1839,14 @@ def feed(self, data): data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) data = data.replace(''', "'") data = data.replace('"', '"') - if self.encoding and type(data) == type(u''): - data = data.encode(self.encoding) + try: + bytes + if bytes is str: + raise NameError + self.encoding = self.encoding + '_INVALID_PYTHON_3' + except NameError: + if self.encoding and type(data) == type(u''): + data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data) sgmllib.SGMLParser.close(self) @@ -1749,7 +1875,11 @@ def unknown_starttag(self, tag, attrs): value = unicode(value, self.encoding) except: value = unicode(value, 'iso-8859-1') - uattrs.append((unicode(key, self.encoding), value)) + try: + # Currently, in Python 3 the key is already a str, and cannot be decoded again + uattrs.append((unicode(key, self.encoding), value)) + except TypeError: + uattrs.append((key, value)) strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]) if self.encoding: try: @@ -1840,6 +1970,14 @@ def output(self): '''Return processed HTML as a single string''' return ''.join([str(p) for p in self.pieces]) + def parse_declaration(self, i): + try: + return sgmllib.SGMLParser.parse_declaration(self, i) + except sgmllib.SGMLParseError: + # escape the doctype declaration and continue parsing + self.handle_data('<') + return i+1 + class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor): def __init__(self, baseuri, baselang, encoding, entities): sgmllib.SGMLParser.__init__(self) @@ -2019,10 +2157,10 @@ def findVCards(self, elmRoot, bAgentParsing=0): arLines = [] def processSingleString(sProperty): - sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1) + sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1).decode(self.encoding) if sValue: arLines.append(self.vcardFold(sProperty.upper() + ':' + sValue)) - return sValue or '' + return sValue or u'' def processSingleURI(sProperty): sValue = self.getPropertyValue(elmCard, sProperty, self.URI) @@ -2071,8 +2209,8 @@ def processTypeValue(sProperty, arDefaultType, arForceType=None): sAgentValue = sAgentValue.replace(';', '\\;') if sAgentValue: arLines.append(self.vcardFold('AGENT:' + sAgentValue)) - elmAgent['class'] = '' - elmAgent.contents = [] + # Completely remove the agent element from the parse tree + elmAgent.extract() else: sAgentValue = self.getPropertyValue(elmAgent, 'value', self.URI, bAutoEscape=1); if sAgentValue: @@ -2219,8 +2357,8 @@ def processTypeValue(sProperty, arDefaultType, arForceType=None): processSingleURI('key') if arLines: - arLines = ['BEGIN:vCard','VERSION:3.0'] + arLines + ['END:vCard'] - sVCards += '\n'.join(arLines) + '\n' + arLines = [u'BEGIN:vCard',u'VERSION:3.0'] + arLines + [u'END:vCard'] + sVCards += u'\n'.join(arLines) + u'\n' return sVCards.strip() @@ -2277,7 +2415,12 @@ def findXFN(self): def _parseMicroformats(htmlSource, baseURI, encoding): if not BeautifulSoup: return if _debug: sys.stderr.write('entering _parseMicroformats\n') - p = _MicroformatsParser(htmlSource, baseURI, encoding) + try: + p = _MicroformatsParser(htmlSource, baseURI, encoding) + except UnicodeEncodeError: + # sgmllib throws this exception when performing lookups of tags + # with non-ASCII characters in them. + return p.vcard = p.findVCards(p.document) p.findTags() p.findEnclosures() @@ -2311,12 +2454,12 @@ class _RelativeURIResolver(_BaseHTMLProcessor): ('q', 'cite'), ('script', 'src')] - def __init__(self, baseuri, encoding, type): - _BaseHTMLProcessor.__init__(self, encoding, type) + def __init__(self, baseuri, encoding, _type): + _BaseHTMLProcessor.__init__(self, encoding, _type) self.baseuri = baseuri def resolveURI(self, uri): - return _urljoin(self.baseuri, uri.strip()) + return _makeSafeAbsoluteURI(_urljoin(self.baseuri, uri.strip())) def unknown_starttag(self, tag, attrs): if _debug: @@ -2325,27 +2468,44 @@ def unknown_starttag(self, tag, attrs): attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs] _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) -def _resolveRelativeURIs(htmlSource, baseURI, encoding, type): +def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type): if _debug: sys.stderr.write('entering _resolveRelativeURIs\n') - p = _RelativeURIResolver(baseURI, encoding, type) + p = _RelativeURIResolver(baseURI, encoding, _type) p.feed(htmlSource) return p.output() +def _makeSafeAbsoluteURI(base, rel=None): + # bail if ACCEPTABLE_URI_SCHEMES is empty + if not ACCEPTABLE_URI_SCHEMES: + return _urljoin(base, rel or u'') + if not base: + return rel or u'' + if not rel: + scheme = urlparse.urlparse(base)[0] + if not scheme or scheme in ACCEPTABLE_URI_SCHEMES: + return base + return u'' + uri = _urljoin(base, rel) + if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES: + return u'' + return uri + class _HTMLSanitizer(_BaseHTMLProcessor): - acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'article', - 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', 'canvas', - 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'command', - 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'dir', - 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', 'figure', 'footer', - 'font', 'form', 'header', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', - 'img', 'input', 'ins', 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', - 'menu', 'meter', 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', - 'option', 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', - 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', 'sub', - 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', 'th', 'thead', - 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript'] + acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', + 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', + 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', + 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', + 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', + 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1', + 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', + 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', + 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', + 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', + 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', + 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', + 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript'] acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis', @@ -2469,7 +2629,7 @@ def unknown_starttag(self, tag, attrs): self.unacceptablestack += 1 # add implicit namespaces to html5 inline svg/mathml - if self.type.endswith('html'): + if self._type.endswith('html'): if not dict(attrs).get('xmlns'): if tag=='svg': attrs.append( ('xmlns','http://www.w3.org/2000/svg') ) @@ -2514,6 +2674,9 @@ def unknown_starttag(self, tag, attrs): for key, value in self.normalize_attrs(attrs): if key in acceptable_attributes: key=keymap.get(key,key) + # make sure the uri uses an acceptable uri scheme + if key == u'href': + value = _makeSafeAbsoluteURI(value) clean_attrs.append((key,value)) elif key=='style': clean_value = self.sanitize_style(value) @@ -2569,9 +2732,22 @@ def sanitize_style(self, style): return ' '.join(clean) + def parse_comment(self, i, report=1): + ret = _BaseHTMLProcessor.parse_comment(self, i, report) + if ret >= 0: + return ret + # if ret == -1, this may be a malicious attempt to circumvent + # sanitization, or a page-destroying unclosed comment + match = re.compile(r'--[^>]*>').search(self.rawdata, i+4) + if match: + return match.end() + # unclosed comment; deliberately fail to handle_data() + return len(self.rawdata) -def _sanitizeHTML(htmlSource, encoding, type): - p = _HTMLSanitizer(encoding, type) + +def _sanitizeHTML(htmlSource, encoding, _type): + p = _HTMLSanitizer(encoding, _type) + htmlSource = htmlSource.replace('<![CDATA[', '<![CDATA[') p.feed(htmlSource) data = p.output() if TIDY_MARKUP: @@ -2654,7 +2830,7 @@ def http_error_401(self, req, fp, code, msg, headers): try: assert sys.version.split()[0] >= '2.3.3' assert base64 != None - user, passw = base64.decodestring(req.headers['Authorization'].split(' ')[1]).split(':') + user, passw = _base64decode(req.headers['Authorization'].split(' ')[1]).split(':') realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0] self.add_password(realm, host, user, passw) retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) @@ -2663,7 +2839,7 @@ def http_error_401(self, req, fp, code, msg, headers): except: return self.http_error_default(req, fp, code, msg, headers) -def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, extra_headers): +def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers): """URL, filename, or string --> stream This function lets you define parsers that take any input source @@ -2691,7 +2867,7 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h If handlers is supplied, it is a list of handlers used to build a urllib2 opener. - if extra_headers is supplied it is a dictionary of HTTP request headers + if request_headers is supplied it is a dictionary of HTTP request headers that will override the values generated by FeedParser. """ @@ -2701,7 +2877,12 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h if url_file_stream_or_string == '-': return sys.stdin - if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp'): + if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'): + # Deal with the feed URI scheme + if url_file_stream_or_string.startswith('feed:http'): + url_file_stream_or_string = url_file_stream_or_string[5:] + elif url_file_stream_or_string.startswith('feed:'): + url_file_stream_or_string = 'http:' + url_file_stream_or_string[5:] if not agent: agent = USER_AGENT # test for inline user:password for basic auth @@ -2713,20 +2894,20 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h user_passwd, realhost = urllib.splituser(realhost) if user_passwd: url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest) - auth = base64.encodestring(user_passwd).strip() + auth = base64.standard_b64encode(user_passwd).strip() # iri support try: if isinstance(url_file_stream_or_string,unicode): - url_file_stream_or_string = url_file_stream_or_string.encode('idna') + url_file_stream_or_string = url_file_stream_or_string.encode('idna').decode('utf-8') else: - url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna') + url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna').decode('utf-8') except: pass # try to open with urllib2 (to use optional headers) - request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, extra_headers) - opener = apply(urllib2.build_opener, tuple([_FeedURLHandler()] + handlers)) + request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, request_headers) + opener = apply(urllib2.build_opener, tuple(handlers + [_FeedURLHandler()])) opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent try: return opener.open(request) @@ -2735,20 +2916,22 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h # try to open with native open function (if url_file_stream_or_string is a filename) try: - return open(url_file_stream_or_string) + return open(url_file_stream_or_string, 'rb') except: pass # treat url_file_stream_or_string as string return _StringIO(str(url_file_stream_or_string)) -def _build_urllib2_request(url, agent, etag, modified, referrer, auth, extra_headers): +def _build_urllib2_request(url, agent, etag, modified, referrer, auth, request_headers): request = urllib2.Request(url) request.add_header('User-Agent', agent) if etag: request.add_header('If-None-Match', etag) if type(modified) == type(''): modified = _parse_date(modified) + elif isinstance(modified, datetime.datetime): + modified = modified.utctimetuple() if modified: # format into an RFC 1123-compliant timestamp. We can't use # time.strftime() since the %a and %b directives can be affected @@ -2773,7 +2956,7 @@ def _build_urllib2_request(url, agent, etag, modified, referrer, auth, extra_hea request.add_header('Accept', ACCEPT_HEADER) # use this for whatever -- cookies, special headers, etc # [('Cookie','Something'),('x-special-header','Another Value')] - for header_name, header_value in extra_headers.items(): + for header_name, header_value in request_headers.items(): request.add_header(header_name, header_value) request.add_header('A-IM', 'feed') # RFC 3229 support return request @@ -2811,9 +2994,15 @@ def registerDateHandler(func): + r'(\.(?P<fracsecond>\d+))?' + r'(?P<tz>[+-](?P<tzhour>\d{2})(:(?P<tzmin>\d{2}))?|Z)?)?' for tmpl in _iso8601_tmpl] -del tmpl +try: + del tmpl +except NameError: + pass _iso8601_matches = [re.compile(regex).match for regex in _iso8601_re] -del regex +try: + del regex +except NameError: + pass def _parse_date_iso8601(dateString): '''Parse a variety of ISO-8601-compatible formats like 20040105''' m = None @@ -2887,7 +3076,7 @@ def _parse_date_iso8601(dateString): # Python's time.mktime() is a wrapper around the ANSI C mktime(3c) # which is guaranteed to normalize d/m/y/h/m/s. # Many implementations have bugs, but we'll pretend they don't. - return time.localtime(time.mktime(tm)) + return time.localtime(time.mktime(tuple(tm))) registerDateHandler(_parse_date_iso8601) # 8-bit date handling routines written by ytrewq1. @@ -3128,12 +3317,12 @@ def __extract_tzd(m): __date_re = ('(?P<year>\d\d\d\d)' '(?:(?P<dsep>-|)' - '(?:(?P<julian>\d\d\d)' - '|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?') + '(?:(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?' + '|(?P<julian>\d\d\d)))?') __tzd_re = '(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)' __tzd_rx = re.compile(__tzd_re) __time_re = ('(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)' - '(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?' + '(?:(?P=tsep)(?P<seconds>\d\d)(?:[.,]\d+)?)?' + __tzd_re) __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) __datetime_rx = re.compile(__datetime_re) @@ -3157,6 +3346,10 @@ def _parse_date_rfc822(dateString): else: data.append('') dateString = " ".join(data) + # Account for the Etc/GMT timezone by stripping 'Etc/' + elif len(data) == 5 and data[4].lower().startswith('etc/'): + data[4] = data[4][4:] + dateString = " ".join(data) if len(data) < 5: dateString += ' 00:00:00 GMT' tm = rfc822.parsedate_tz(dateString) @@ -3194,7 +3387,7 @@ def _parse_date(dateString): raise ValueError map(int, date9tuple) return date9tuple - except Exception, e: + except Exception as e: if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e))) pass return None @@ -3261,59 +3454,59 @@ def _parseHTTPContentType(content_type): sniffed_xml_encoding = '' xml_encoding = '' true_encoding = '' - http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type')) + http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type', http_headers.get('Content-type'))) # Must sniff for non-ASCII-compatible character encodings before # searching for XML declaration. This heuristic is defined in # section F of the XML specification: # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info try: - if xml_data[:4] == '\x4c\x6f\xa7\x94': + if xml_data[:4] == _l2bytes([0x4c, 0x6f, 0xa7, 0x94]): # EBCDIC xml_data = _ebcdic_to_ascii(xml_data) - elif xml_data[:4] == '\x00\x3c\x00\x3f': + elif xml_data[:4] == _l2bytes([0x00, 0x3c, 0x00, 0x3f]): # UTF-16BE sniffed_xml_encoding = 'utf-16be' xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] != '\x00\x00'): + elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xfe, 0xff])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): # UTF-16BE with BOM sniffed_xml_encoding = 'utf-16be' xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') - elif xml_data[:4] == '\x3c\x00\x3f\x00': + elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x3f, 0x00]): # UTF-16LE sniffed_xml_encoding = 'utf-16le' xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and (xml_data[2:4] != '\x00\x00'): + elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xff, 0xfe])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): # UTF-16LE with BOM sniffed_xml_encoding = 'utf-16le' xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') - elif xml_data[:4] == '\x00\x00\x00\x3c': + elif xml_data[:4] == _l2bytes([0x00, 0x00, 0x00, 0x3c]): # UTF-32BE sniffed_xml_encoding = 'utf-32be' xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') - elif xml_data[:4] == '\x3c\x00\x00\x00': + elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x00, 0x00]): # UTF-32LE sniffed_xml_encoding = 'utf-32le' xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') - elif xml_data[:4] == '\x00\x00\xfe\xff': + elif xml_data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): # UTF-32BE with BOM sniffed_xml_encoding = 'utf-32be' xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') - elif xml_data[:4] == '\xff\xfe\x00\x00': + elif xml_data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): # UTF-32LE with BOM sniffed_xml_encoding = 'utf-32le' xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') - elif xml_data[:3] == '\xef\xbb\xbf': + elif xml_data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): # UTF-8 with BOM sniffed_xml_encoding = 'utf-8' xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') else: # ASCII-compatible pass - xml_encoding_match = re.compile('^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) + xml_encoding_match = re.compile(_s2bytes('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')).match(xml_data) except: xml_encoding_match = None if xml_encoding_match: - xml_encoding = xml_encoding_match.groups()[0].lower() + xml_encoding = xml_encoding_match.groups()[0].decode('utf-8').lower() if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')): xml_encoding = sniffed_xml_encoding acceptable_content_type = 0 @@ -3329,7 +3522,7 @@ def _parseHTTPContentType(content_type): true_encoding = http_encoding or 'us-ascii' elif http_content_type.startswith('text/'): true_encoding = http_encoding or 'us-ascii' - elif http_headers and (not http_headers.has_key('content-type')): + elif http_headers and (not (http_headers.has_key('content-type') or http_headers.has_key('Content-type'))): true_encoding = xml_encoding or 'iso-8859-1' else: true_encoding = xml_encoding or 'utf-8' @@ -3347,35 +3540,35 @@ def _toUTF8(data, encoding): ''' if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding) # strip Byte Order Mark (if present) - if (len(data) >= 4) and (data[:2] == '\xfe\xff') and (data[2:4] != '\x00\x00'): + if (len(data) >= 4) and (data[:2] == _l2bytes([0xfe, 0xff])) and (data[2:4] != _l2bytes([0x00, 0x00])): if _debug: sys.stderr.write('stripping BOM\n') if encoding != 'utf-16be': sys.stderr.write('trying utf-16be instead\n') encoding = 'utf-16be' data = data[2:] - elif (len(data) >= 4) and (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'): + elif (len(data) >= 4) and (data[:2] == _l2bytes([0xff, 0xfe])) and (data[2:4] != _l2bytes([0x00, 0x00])): if _debug: sys.stderr.write('stripping BOM\n') if encoding != 'utf-16le': sys.stderr.write('trying utf-16le instead\n') encoding = 'utf-16le' data = data[2:] - elif data[:3] == '\xef\xbb\xbf': + elif data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): if _debug: sys.stderr.write('stripping BOM\n') if encoding != 'utf-8': sys.stderr.write('trying utf-8 instead\n') encoding = 'utf-8' data = data[3:] - elif data[:4] == '\x00\x00\xfe\xff': + elif data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): if _debug: sys.stderr.write('stripping BOM\n') if encoding != 'utf-32be': sys.stderr.write('trying utf-32be instead\n') encoding = 'utf-32be' data = data[4:] - elif data[:4] == '\xff\xfe\x00\x00': + elif data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): if _debug: sys.stderr.write('stripping BOM\n') if encoding != 'utf-32le': @@ -3398,36 +3591,36 @@ def _stripDoctype(data): rss_version may be 'rss091n' or None stripped_data is the same XML document, minus the DOCTYPE ''' - start = re.search('<\w',data) + start = re.search(_s2bytes('<\w'), data) start = start and start.start() or -1 head,data = data[:start+1], data[start+1:] - entity_pattern = re.compile(r'^\s*<!ENTITY([^>]*?)>', re.MULTILINE) + entity_pattern = re.compile(_s2bytes(r'^\s*<!ENTITY([^>]*?)>'), re.MULTILINE) entity_results=entity_pattern.findall(head) - head = entity_pattern.sub('', head) - doctype_pattern = re.compile(r'^\s*<!DOCTYPE([^>]*?)>', re.MULTILINE) + head = entity_pattern.sub(_s2bytes(''), head) + doctype_pattern = re.compile(_s2bytes(r'^\s*<!DOCTYPE([^>]*?)>'), re.MULTILINE) doctype_results = doctype_pattern.findall(head) - doctype = doctype_results and doctype_results[0] or '' - if doctype.lower().count('netscape'): + doctype = doctype_results and doctype_results[0] or _s2bytes('') + if doctype.lower().count(_s2bytes('netscape')): version = 'rss091n' else: version = None # only allow in 'safe' inline entity definitions - replacement='' + replacement=_s2bytes('') if len(doctype_results)==1 and entity_results: - safe_pattern=re.compile('\s+(\w+)\s+"(&#\w+;|[^&"]*)"') + safe_pattern=re.compile(_s2bytes('\s+(\w+)\s+"(&#\w+;|[^&"]*)"')) safe_entities=filter(lambda e: safe_pattern.match(e),entity_results) if safe_entities: - replacement='<!DOCTYPE feed [\n <!ENTITY %s>\n]>' % '>\n <!ENTITY '.join(safe_entities) + replacement=_s2bytes('<!DOCTYPE feed [\n <!ENTITY') + _s2bytes('>\n <!ENTITY ').join(safe_entities) + _s2bytes('>\n]>') data = doctype_pattern.sub(replacement, head) + data - return version, data, dict(replacement and safe_pattern.findall(replacement)) + return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)]) -def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], extra_headers={}): +def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}): '''Parse a feed from a URL, file, stream, or string. - extra_headers, if given, is a dict from http header name to value to add + request_headers, if given, is a dict from http header name to value to add to the request; this overrides internally generated values. ''' result = FeedParserDict() @@ -3435,23 +3628,31 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer result['entries'] = [] if _XML_AVAILABLE: result['bozo'] = 0 - if type(handlers) == types.InstanceType: + if not isinstance(handlers, list): handlers = [handlers] try: - f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, extra_headers) + f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers) data = f.read() - except Exception, e: + except Exception as e: result['bozo'] = 1 result['bozo_exception'] = e data = None f = None + if hasattr(f, 'headers'): + result['headers'] = dict(f.headers) + # overwrite existing headers using response_headers + if 'headers' in result: + result['headers'].update(response_headers) + elif response_headers: + result['headers'] = copy.deepcopy(response_headers) + # if feed is gzip-compressed, decompress it - if f and data and hasattr(f, 'headers'): - if gzip and f.headers.get('content-encoding', '') == 'gzip': + if f and data and 'headers' in result: + if gzip and result['headers'].get('content-encoding') == 'gzip': try: data = gzip.GzipFile(fileobj=_StringIO(data)).read() - except Exception, e: + except Exception as e: # Some feeds claim to be gzipped but they're not, so # we get garbage. Ideally, we should re-request the # feed without the 'Accept-encoding: gzip' header, @@ -3459,30 +3660,29 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer result['bozo'] = 1 result['bozo_exception'] = e data = '' - elif zlib and f.headers.get('content-encoding', '') == 'deflate': + elif zlib and result['headers'].get('content-encoding') == 'deflate': try: data = zlib.decompress(data, -zlib.MAX_WBITS) - except Exception, e: + except Exception as e: result['bozo'] = 1 result['bozo_exception'] = e data = '' # save HTTP headers - if hasattr(f, 'info'): - info = f.info() - etag = info.getheader('ETag') - if etag: - result['etag'] = etag - last_modified = info.getheader('Last-Modified') - if last_modified: - result['modified'] = _parse_date(last_modified) + if 'headers' in result: + if 'etag' in result['headers'] or 'ETag' in result['headers']: + etag = result['headers'].get('etag', result['headers'].get('ETag')) + if etag: + result['etag'] = etag + if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']: + modified = result['headers'].get('last-modified', result['headers'].get('Last-Modified')) + if modified: + result['modified'] = _parse_date(modified) if hasattr(f, 'url'): result['href'] = f.url result['status'] = 200 if hasattr(f, 'status'): result['status'] = f.status - if hasattr(f, 'headers'): - result['headers'] = f.headers.dict if hasattr(f, 'close'): f.close() @@ -3495,8 +3695,8 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer result['encoding'], http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type = \ _getCharacterEncoding(http_headers, data) if http_headers and (not acceptable_content_type): - if http_headers.has_key('content-type'): - bozo_message = '%s is not an XML media type' % http_headers['content-type'] + if http_headers.has_key('content-type') or http_headers.has_key('Content-type'): + bozo_message = '%s is not an XML media type' % http_headers.get('content-type', http_headers.get('Content-type')) else: bozo_message = 'no Content-type specified' result['bozo'] = 1 @@ -3505,8 +3705,12 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer if data is not None: result['version'], data, entities = _stripDoctype(data) - baseuri = http_headers.get('content-location', result.get('href')) - baselang = http_headers.get('content-language', None) + # ensure that baseuri is an absolute uri using an acceptable URI scheme + contentloc = http_headers.get('content-location', http_headers.get('Content-Location', '')) + href = result.get('href', '') + baseuri = _makeSafeAbsoluteURI(href, contentloc) or _makeSafeAbsoluteURI(contentloc) or href + + baselang = http_headers.get('content-language', http_headers.get('Content-Language', None)) # if server sent 304, we're done if result.get('status', 0) == 304: @@ -3582,7 +3786,7 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer elif proposed_encoding != result['encoding']: result['bozo'] = 1 result['bozo_exception'] = CharacterEncodingOverride( \ - 'documented declared as %s, but parsed as %s' % \ + 'document declared as %s, but parsed as %s' % \ (result['encoding'], proposed_encoding)) result['encoding'] = proposed_encoding @@ -3603,7 +3807,7 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer saxparser._ns_stack.append({'http://www.w3.org/XML/1998/namespace':'xml'}) try: saxparser.parse(source) - except Exception, e: + except Exception as e: if _debug: import traceback traceback.print_stack() @@ -3613,8 +3817,8 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer result['bozo_exception'] = feedparser.exc or e use_strict_parser = 0 if not use_strict_parser: - feedparser = _LooseFeedParser(baseuri, baselang, known_encoding and 'utf-8' or '', entities) - feedparser.feed(data) + feedparser = _LooseFeedParser(baseuri, baselang, 'utf-8', entities) + feedparser.feed(data.decode('utf-8', 'replace')) result['feed'] = feedparser.feeddata result['entries'] = feedparser.entries result['version'] = result['version'] or feedparser.version diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 6215132e4b..f2aeb4e4bd 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -14,7 +14,7 @@ from calibre import browser, __appname__, iswindows, \ - strftime, preferred_encoding + strftime, preferred_encoding, as_unicode from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag from calibre.ebooks.metadata.opf2 import OPFCreator from calibre import entity_to_unicode @@ -986,8 +986,8 @@ def _download_cover(self): self.cover_path = None try: cu = self.get_cover_url() - except Exception, err: - self.log.error(_('Could not download cover: %s')%str(err)) + except Exception as err: + self.log.error(_('Could not download cover: %s')%as_unicode(err)) self.log.debug(traceback.format_exc()) else: if not cu: @@ -1318,11 +1318,11 @@ def parse_feeds(self): oldest_article=self.oldest_article, max_articles_per_feed=self.max_articles_per_feed, get_article_url=self.get_article_url)) - except Exception, err: + except Exception as err: feed = Feed() msg = 'Failed feed: %s'%(title if title else url) feed.populate_from_preparsed_feed(msg, []) - feed.description = repr(err) + feed.description = as_unicode(err) parsed_feeds.append(feed) self.log.exception(msg) @@ -1468,7 +1468,7 @@ def download(self): 'http://news.calibre-ebook.com/subscribed_files/%s/0/temp.downloaded_recipe' % self.calibre_periodicals_slug ).read() - except Exception, e: + except Exception as e: if hasattr(e, 'getcode') and e.getcode() == 403: raise DownloadDenied( _('You do not have permission to download this issue.' diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index f2e22c8f5e..64a2c32fb3 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -210,7 +210,7 @@ def fetch_url(self, url): with closing(open_func(url, timeout=self.timeout)) as f: data = response(f.read()+f.read()) data.newurl = f.geturl() - except urllib2.URLError, err: + except urllib2.URLError as err: if hasattr(err, 'code') and responses.has_key(err.code): raise FetchError, responses[err.code] if getattr(err, 'reason', [0])[0] == 104 or \