From 48cbe5fb5f43a1634b47b25b24c338fba5059a28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= <tomek3d@gmail.com>
Date: Thu, 4 Apr 2013 22:39:27 +0200
Subject: [PATCH 01/28] icon for newsweek_polska

---
 recipes/icons/newsweek_polska.png | Bin 0 -> 905 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 recipes/icons/newsweek_polska.png
diff --git a/recipes/icons/newsweek_polska.png b/recipes/icons/newsweek_polska.png
new file mode 100644
index 0000000000000000000000000000000000000000..83e84b89e2e989e12e1d813f7c845ef56e713636
GIT binary patch
literal 905
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61SBU+%rFB|jKx9jP7LeL$-D$|I14-?iy0WW
zg+Z8+Vb&Z8pdfpRr>`sf9d03UZ4Os0iFgJEW@%3s#}ExsMgivce;6OL0hN;gK$epP
z5E^8DzJVaAmv@;2_<^SI)t12p;f5kKAcO??`MX=-Tr>t24L}v#agpqH)@Vv0T<jWv
zazOWB)xd&P6etL^fmvP_Qx>NUK&345a-?a%R1FltV#CivM}J?uf>kwk4M5QkCDo*9
z09x{O(Q>SMus8{#?qg}qzYiZV^&o;3Xevbd+u{{K22l0;)Lg!rGPJ;@eQG^E+2F8l
S-;*yO*Lb@6xvX<aXaWEWxl2U=

literal 0
HcmV?d00001


From 92dbc0e0b59728c9ee1ce11df15e185ebe5e96f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= <tomek3d@gmail.com>
Date: Sun, 7 Apr 2013 14:05:37 +0200
Subject: [PATCH 02/28] fix DRM detection

---
 src/calibre/gui2/store/stores/virtualo_plugin.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/store/stores/virtualo_plugin.py b/src/calibre/gui2/store/stores/virtualo_plugin.py
index 567da2df4e..132f9cef45 100644
--- a/src/calibre/gui2/store/stores/virtualo_plugin.py
+++ b/src/calibre/gui2/store/stores/virtualo_plugin.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 2 # Needed for dynamic plugin loading
+store_version = 3 # Needed for dynamic plugin loading
 
 __license__ = 'GPL 3'
 __copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
@@ -41,7 +41,7 @@ def search(self, query, max_results=12, timeout=60):
         url = 'http://virtualo.pl/?q=' + urllib.quote(query) + '&f=format_id:4,6,3'
 
         br = browser()
-        no_drm_pattern = re.compile("Znak wodny")
+        no_drm_pattern = re.compile(r'Znak wodny|Brak')
 
         counter = max_results
         with closing(br.open(url, timeout=timeout)) as f:
@@ -59,7 +59,7 @@ def search(self, query, max_results=12, timeout=60):
                 title = ''.join(data.xpath('.//div[@class="list_title list_text_left"]/a/text()'))
                 author = ', '.join(data.xpath('.//div[@class="list_authors list_text_left"]/a/text()'))
                 formats = [ form.split('_')[-1].replace('.png', '') for form in data.xpath('.//div[@style="width:55%;float:left;text-align:left;height:18px;"]//a/img/@src')]
-                nodrm = no_drm_pattern.search(''.join(data.xpath('.//div[@style="width:45%;float:right;text-align:right;height:18px;"]/div/div/text()')))
+                nodrm = no_drm_pattern.search(''.join(data.xpath('.//div[@style="width:45%;float:right;text-align:right;height:18px;"]//span[@class="prompt_preview"]/text()')))
 
                 counter -= 1
 
@@ -70,6 +70,6 @@ def search(self, query, max_results=12, timeout=60):
                 s.price = price + ' zł'
                 s.detail_item = 'http://virtualo.pl' + id.strip().split('http://')[0]
                 s.formats = ', '.join(formats).upper()
-                s.drm = SearchResult.DRM_UNLOCKED if nodrm else SearchResult.DRM_UNKNOWN
+                s.drm = SearchResult.DRM_UNLOCKED if nodrm else SearchResult.DRM_LOCKED
 
                 yield s

From 839406af3ae2db4289e9386864c189329bbb44a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= <tomek3d@gmail.com>
Date: Sun, 7 Apr 2013 14:18:32 +0200
Subject: [PATCH 03/28] fix format detection in virtualo

---
 src/calibre/gui2/store/stores/virtualo_plugin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/store/stores/virtualo_plugin.py b/src/calibre/gui2/store/stores/virtualo_plugin.py
index 132f9cef45..86824b5542 100644
--- a/src/calibre/gui2/store/stores/virtualo_plugin.py
+++ b/src/calibre/gui2/store/stores/virtualo_plugin.py
@@ -58,7 +58,7 @@ def search(self, query, max_results=12, timeout=60):
                 cover_url = ''.join(data.xpath('.//div[@class="list_middle_left"]//a//img/@src'))
                 title = ''.join(data.xpath('.//div[@class="list_title list_text_left"]/a/text()'))
                 author = ', '.join(data.xpath('.//div[@class="list_authors list_text_left"]/a/text()'))
-                formats = [ form.split('_')[-1].replace('.png', '') for form in data.xpath('.//div[@style="width:55%;float:left;text-align:left;height:18px;"]//a/img/@src')]
+                formats = [ form.split('_')[-1].replace('.png', '') for form in data.xpath('.//div[@style="width:55%;float:left;text-align:left;height:18px;"]//a/span/img/@src')]
                 nodrm = no_drm_pattern.search(''.join(data.xpath('.//div[@style="width:45%;float:right;text-align:right;height:18px;"]//span[@class="prompt_preview"]/text()')))
 
                 counter -= 1

From 95a2e354ea0e296b119975aa1d0c7f78a2c327f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= <tomek3d@gmail.com>
Date: Sun, 7 Apr 2013 14:31:21 +0200
Subject: [PATCH 04/28] fix cover fetching in nexto

---
 src/calibre/gui2/store/stores/nexto_plugin.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/store/stores/nexto_plugin.py b/src/calibre/gui2/store/stores/nexto_plugin.py
index df3867ec1a..813a31b4c2 100644
--- a/src/calibre/gui2/store/stores/nexto_plugin.py
+++ b/src/calibre/gui2/store/stores/nexto_plugin.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 from __future__ import (unicode_literals, division, absolute_import, print_function)
-store_version = 2 # Needed for dynamic plugin loading
+store_version = 3 # Needed for dynamic plugin loading
 
 __license__ = 'GPL 3'
 __copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
@@ -67,7 +67,7 @@ def search(self, query, max_results=10, timeout=60):
 
                     cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))
                     cover_url = re.sub(r'%2F', '/', cover_url)
-                    cover_url = re.sub(r'\widthMax=120&heightMax=200', 'widthMax=64&heightMax=64', cover_url)
+                    cover_url = re.sub(r'widthMax=120&heightMax=200', 'widthMax=64&heightMax=64', cover_url)
                     title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                     title = re.sub(r' - ebook$', '', title)
                     formats = ', '.join(data.xpath('.//ul[@class="formats_available"]/li//b/text()'))
@@ -82,7 +82,7 @@ def search(self, query, max_results=10, timeout=60):
                     counter -= 1
 
                     s = SearchResult()
-                    s.cover_url = 'http://www.nexto.pl' + cover_url
+                    s.cover_url = cover_url if cover_url[:4] == 'http' else 'http://www.nexto.pl' + cover_url
                     s.title = title.strip()
                     s.author = author.strip()
                     s.price = price

From 8523ad9103ae538031fa089d539067909cd5083e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 7 Apr 2013 21:15:42 +0530
Subject: [PATCH 05/28] TXT Input: When converting a txt file with a Byte Order
 Mark, remove the Byte Order Mark before further processing as it can cause
 the first line of the text to be mis-interpreted.

---
 src/calibre/ebooks/conversion/plugins/txt_input.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/calibre/ebooks/conversion/plugins/txt_input.py b/src/calibre/ebooks/conversion/plugins/txt_input.py
index e916b30c29..50f1409ea6 100644
--- a/src/calibre/ebooks/conversion/plugins/txt_input.py
+++ b/src/calibre/ebooks/conversion/plugins/txt_input.py
@@ -97,6 +97,12 @@ def convert(self, stream, options, file_ext, log,
         if not ienc:
             ienc = 'utf-8'
             log.debug('No input encoding specified and could not auto detect using %s' % ienc)
+        # Remove BOM from start of txt as its presence can confuse markdown
+        import codecs
+        for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8, codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
+            if txt.startswith(bom):
+                txt = txt[len(bom):]
+                break
         txt = txt.decode(ienc, 'replace')
 
         # Replace entities

From ee72df324b4b49558dead053cd43117862d884ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= <tomek3d@gmail.com>
Date: Sun, 7 Apr 2013 23:14:08 +0200
Subject: [PATCH 06/28] fixes for fronda

---
 recipes/fronda.recipe | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/recipes/fronda.recipe b/recipes/fronda.recipe
index 6ed5d052a3..5ae78ae848 100644
--- a/recipes/fronda.recipe
+++ b/recipes/fronda.recipe
@@ -23,6 +23,7 @@ class Fronda(BasicNewsRecipe):
     extra_css = '''
         h1 {font-size:150%}
         .body {text-align:left;}
+        div#featured-image {font-style:italic; font-size:70%}
     '''
 
     earliest_date = date.today() - timedelta(days=oldest_article)
@@ -55,7 +56,10 @@ def parse_index(self):
         articles = {}
 
         for url, genName in genres:
-            soup = self.index_to_soup('http://www.fronda.pl/c/'+ url)
+            try:
+                soup = self.index_to_soup('http://www.fronda.pl/c/'+ url)
+            except:
+                continue
             articles[genName] = []
             for item in soup.findAll('li'):
                 article_h = item.find('h2')
@@ -77,16 +81,15 @@ def parse_index(self):
         ]
 
     remove_tags = [
-        dict(name='div', attrs={'class':['related-articles',
-                'button right',
-                'pagination']}),
+        dict(name='div', attrs={'class':['related-articles','button right','pagination','related-articles content']}),
         dict(name='h3', attrs={'class':'block-header article comments'}),
-        dict(name='ul', attrs={'class':'comment-list'}),
-        dict(name='ul', attrs={'class':'category'}),
-        dict(name='ul', attrs={'class':'tag-list'}),
+        dict(name='ul', attrs={'class':['comment-list','category','tag-list']}),
         dict(name='p', attrs={'id':'comments-disclaimer'}),
         dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}),
         dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}),
-        dict(name='div', attrs={'class':'related-articles content'}),
-        dict(name='div', attrs={'id':'comment-form'})
+        dict(name='div', attrs={'id':'comment-form'}),
+        dict(name='span', attrs={'class':'separator'})
         ]
+
+    preprocess_regexps = [
+        (re.compile(r'komentarzy: .*?</h6>', re.IGNORECASE | re.DOTALL | re.M ), lambda match: '</h6>')]

From f024af31b037c6734180c7533645979106896858 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 8 Apr 2013 08:44:37 +0530
Subject: [PATCH 07/28] Update Tom's Hardware

---
 recipes/tomshardware.recipe | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/recipes/tomshardware.recipe b/recipes/tomshardware.recipe
index c75b19189d..e0f21ae2a4 100644
--- a/recipes/tomshardware.recipe
+++ b/recipes/tomshardware.recipe
@@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 tomshardware.com/us
 '''
@@ -16,22 +14,20 @@ class Tomshardware(BasicNewsRecipe):
     publisher           = "Tom's Hardware"
     category            = 'news, IT, hardware, USA'
     no_stylesheets      = True
-    needs_subscription  = True
-    language = 'en'
-
+    needs_subscription  = 'optional'
+    language            = 'en'
     INDEX               = 'http://www.tomshardware.com'
     LOGIN               = INDEX + '/membres/'
     remove_javascript   = True
     use_embedded_content= False
 
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
-
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+    
     def get_browser(self):
         br = BasicNewsRecipe.get_browser(self)
         br.open(self.INDEX+'/us/')
@@ -50,8 +46,8 @@ def get_browser(self):
                   ]
 
     feeds = [
-              (u'Latest Articles', u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-2.xml'          )
-             ,(u'Latest News'    , u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-1.xml')
+              (u'Reviews', u'http://www.tomshardware.com/feeds/rss2/tom-s-hardware-us,18-2.xml')
+             ,(u'News'   , u'http://www.tomshardware.com/feeds/rss2/tom-s-hardware-us,18-1.xml')
             ]
 
     def print_version(self, url):

From 5e28991c17fe0d15a631261821e19c6e3e30304f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 8 Apr 2013 09:19:21 +0530
Subject: [PATCH 08/28] ...

---
 src/calibre/ebooks/oeb/polish/main.py | 8 ++++----
 src/calibre/gui2/actions/polish.py    | 8 +++++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py
index c04686ed6c..7ec34b5174 100644
--- a/src/calibre/ebooks/oeb/polish/main.py
+++ b/src/calibre/ebooks/oeb/polish/main.py
@@ -43,8 +43,8 @@
 changes needed for the desired effect.</p>
 
 <p>You should use this tool as the last step in your ebook creation process.</p>
-
-<p>Note that polishing only works on files in the %s formats.</p>
+{0}
+<p>Note that polishing only works on files in the %s formats.</p>\
 ''')%_(' or ').join('<b>%s</b>'%x for x in SUPPORTED),
 
 'subset': _('''\
@@ -69,7 +69,7 @@
 'jacket': _('''\
 <p>Insert a "book jacket" page at the start of the book that contains
 all the book metadata such as title, tags, authors, series, comments,
-etc.</p>'''),
+etc. Any previous book jacket will be replaced.</p>'''),
 
 'remove_jacket': _('''\
 <p>Remove a previous inserted book jacket page.</p>
@@ -85,7 +85,7 @@
 
 def hfix(name, raw):
     if name == 'about':
-        return raw
+        return raw.format('')
     raw = raw.replace('\n\n', '__XX__')
     raw = raw.replace('\n', ' ')
     raw = raw.replace('__XX__', '\n')
diff --git a/src/calibre/gui2/actions/polish.py b/src/calibre/gui2/actions/polish.py
index 127749cc51..5aecbd2d87 100644
--- a/src/calibre/gui2/actions/polish.py
+++ b/src/calibre/gui2/actions/polish.py
@@ -37,7 +37,13 @@ def __init__(self, db, book_id_map, parent=None):
         self.setWindowTitle(title)
 
         self.help_text = {
-            'polish': _('<h3>About Polishing books</h3>%s')%HELP['about'],
+            'polish': _('<h3>About Polishing books</h3>%s')%HELP['about'].format(
+                _('''<p>If you have both EPUB and ORIGINAL_EPUB in your book,
+                  then polishing will run on ORIGINAL_EPUB (the same for other
+                  ORIGINAL_* formats).  So if you
+                  want Polishing to not run on the ORIGINAL_* format, delete the
+                  ORIGINAL_* format before running it.</p>''')
+            ),
 
             'subset':_('<h3>Subsetting fonts</h3>%s')%HELP['subset'],
 

From 2388668aa9c219801824e0c38e99fa675e32c24e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 8 Apr 2013 16:16:58 +0530
Subject: [PATCH 09/28] Allow restoring of the ORIGINAL_XXX format by
 right-clicking it in the book details panel

---
 src/calibre/gui2/actions/delete.py |  7 +++++++
 src/calibre/gui2/book_details.py   | 14 ++++++++++++--
 src/calibre/gui2/init.py           |  2 ++
 src/calibre/library/database2.py   |  8 ++++++++
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/actions/delete.py b/src/calibre/gui2/actions/delete.py
index 7bdcb18644..178d94a477 100644
--- a/src/calibre/gui2/actions/delete.py
+++ b/src/calibre/gui2/actions/delete.py
@@ -180,6 +180,13 @@ def remove_format_by_id(self, book_id, fmt):
                 self.gui.library_view.currentIndex())
         self.gui.tags_view.recount()
 
+    def restore_format(self, book_id, original_fmt):
+        self.gui.current_db.restore_original_format(book_id, original_fmt)
+        self.gui.library_view.model().refresh_ids([book_id])
+        self.gui.library_view.model().current_changed(self.gui.library_view.currentIndex(),
+                self.gui.library_view.currentIndex())
+        self.gui.tags_view.recount()
+
     def delete_selected_formats(self, *args):
         ids = self._get_selected_ids()
         if not ids:
diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py
index 4d00d282d5..45430da6f4 100644
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@@ -405,6 +405,7 @@ class BookInfo(QWebView):
     link_clicked = pyqtSignal(object)
     remove_format = pyqtSignal(int, object)
     save_format = pyqtSignal(int, object)
+    restore_format = pyqtSignal(int, object)
 
     def __init__(self, vertical, parent=None):
         QWebView.__init__(self, parent)
@@ -418,7 +419,7 @@ def __init__(self, vertical, parent=None):
         palette.setBrush(QPalette.Base, Qt.transparent)
         self.page().setPalette(palette)
         self.css = P('templates/book_details.css', data=True).decode('utf-8')
-        for x, icon in [('remove', 'trash.png'), ('save', 'save.png')]:
+        for x, icon in [('remove', 'trash.png'), ('save', 'save.png'), ('restore', 'edit-undo.png')]:
             ac = QAction(QIcon(I(icon)), '', self)
             ac.current_fmt = None
             ac.triggered.connect(getattr(self, '%s_format_triggerred'%x))
@@ -436,6 +437,9 @@ def remove_format_triggerred(self):
     def save_format_triggerred(self):
         self.context_action_triggered('save')
 
+    def restore_format_triggerred(self):
+        self.context_action_triggered('restore')
+
     def link_activated(self, link):
         self._link_clicked = True
         if unicode(link.scheme()) in ('http', 'https'):
@@ -479,7 +483,11 @@ def contextMenuEvent(self, ev):
                 traceback.print_exc()
             else:
                 for a, t in [('remove', _('Delete the %s format')),
-                    ('save', _('Save the %s format to disk'))]:
+                    ('save', _('Save the %s format to disk')),
+                    ('restore', _('Restore the %s format')),
+                ]:
+                    if a == 'restore' and not fmt.upper().startswith('ORIGINAL_'):
+                        continue
                     ac = getattr(self, '%s_format_action'%a)
                     ac.current_fmt = (book_id, fmt)
                     ac.setText(t%parts[2])
@@ -585,6 +593,7 @@ class BookDetails(QWidget): # {{{
     view_specific_format = pyqtSignal(int, object)
     remove_specific_format = pyqtSignal(int, object)
     save_specific_format = pyqtSignal(int, object)
+    restore_specific_format = pyqtSignal(int, object)
     remote_file_dropped = pyqtSignal(object, object)
     files_dropped = pyqtSignal(object, object)
     cover_changed = pyqtSignal(object, object)
@@ -654,6 +663,7 @@ def __init__(self, vertical, parent=None):
         self.book_info.link_clicked.connect(self.handle_click)
         self.book_info.remove_format.connect(self.remove_specific_format)
         self.book_info.save_format.connect(self.save_specific_format)
+        self.book_info.restore_format.connect(self.restore_specific_format)
         self.setCursor(Qt.PointingHandCursor)
 
     def handle_click(self, link):
diff --git a/src/calibre/gui2/init.py b/src/calibre/gui2/init.py
index eff36e865b..2a5b061819 100644
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@@ -272,6 +272,8 @@ def finalize_layout(self):
                 self.iactions['Remove Books'].remove_format_by_id)
         self.book_details.save_specific_format.connect(
                 self.iactions['Save To Disk'].save_library_format_by_ids)
+        self.book_details.restore_specific_format.connect(
+            self.iactions['Remove Books'].restore_format)
         self.book_details.view_device_book.connect(
                 self.iactions['View'].view_device_book)
 
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 376eb52c3c..06a2e5ff71 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -1541,6 +1541,14 @@ def original_fmt(self, book_id, fmt):
         opath = self.format_abspath(book_id, nfmt, index_is_id=True)
         return fmt if opath is None else nfmt
 
+    def restore_original_format(self, book_id, original_fmt, notify=True):
+        opath = self.format_abspath(book_id, original_fmt, index_is_id=True)
+        if opath is not None:
+            fmt = original_fmt.partition('_')[2]
+            with lopen(opath, 'rb') as f:
+                self.add_format(book_id, fmt, f, index_is_id=True, notify=False)
+            self.remove_format(book_id, original_fmt, index_is_id=True, notify=notify)
+
     def delete_book(self, id, notify=True, commit=True, permanent=False,
             do_clean=True):
         '''

From 8264280d733b2a233b76dfcab33e44a0ca21b781 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 8 Apr 2013 21:30:30 +0530
Subject: [PATCH 10/28] Add tests for invalid input to copy_*_to() methods

---
 src/calibre/db/tests/reading.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py
index bf691a3b03..979e2e9247 100644
--- a/src/calibre/db/tests/reading.py
+++ b/src/calibre/db/tests/reading.py
@@ -8,6 +8,7 @@
 __docformat__ = 'restructuredtext en'
 
 import datetime
+from io import BytesIO
 
 from calibre.utils.date import utc_tz
 from calibre.db.tests.base import BaseTest
@@ -205,6 +206,9 @@ def test_get_cover(self): # {{{
             else:
                 self.assertEqual(cdata, cache.cover(book_id, as_path=True),
                                  'Reading of null cover as path failed')
+        buf = BytesIO()
+        self.assertFalse(cache.copy_cover_to(99999, buf), 'copy_cover_to() did not return False for non-existent book_id')
+        self.assertFalse(cache.copy_cover_to(3, buf), 'copy_cover_to() did not return False for non-existent cover')
 
     # }}}
 
@@ -305,6 +309,7 @@ def compare_category(category, old, new):
     def test_get_formats(self): # {{{
         'Test reading ebook formats using the format() method'
         from calibre.library.database2 import LibraryDatabase2
+        from calibre.db.cache import NoSuchFormat
         old = LibraryDatabase2(self.library_path)
         ids = old.all_ids()
         lf = {i:set(old.formats(i, index_is_id=True).split(',')) if old.formats(
@@ -332,6 +337,9 @@ def test_get_formats(self): # {{{
                     self.assertEqual(old, f.read(),
                                  'Failed to read format as path')
 
+        buf = BytesIO()
+        self.assertRaises(NoSuchFormat, cache.copy_format_to, 99999, 'X', buf, 'copy_format_to() failed to raise an exception for non-existent book')
+        self.assertRaises(NoSuchFormat, cache.copy_format_to, 1, 'X', buf, 'copy_format_to() failed to raise an exception for non-existent format')
 
     # }}}
 

From 77b77fe94867bdb3f176048bf717c85c63be9485 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 09:24:51 +0530
Subject: [PATCH 11/28] Fix #1166562 (Updated recipe for The Onion)

---
 recipes/theonion.recipe | 64 ++++++++++++++++++++++++++++++++---------
 1 file changed, 50 insertions(+), 14 deletions(-)

diff --git a/recipes/theonion.recipe b/recipes/theonion.recipe
index d177e0978d..6cb19cee94 100644
--- a/recipes/theonion.recipe
+++ b/recipes/theonion.recipe
@@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2013, Darko Miletic <darko.miletic at gmail.com>'
 
 '''
 theonion.com
@@ -10,7 +10,7 @@
 class TheOnion(BasicNewsRecipe):
     title                 = 'The Onion'
     __author__            = 'Darko Miletic'
-    description           = "America's finest news source"
+    description           = "The Onion, America's Finest News Source, is an award-winning publication covering world, national, and * local issues. It is updated daily online and distributed weekly in select American cities."
     oldest_article        = 2
     max_articles_per_feed = 100
     publisher             = 'Onion, Inc.'
@@ -20,7 +20,8 @@ class TheOnion(BasicNewsRecipe):
     use_embedded_content  = False
     encoding              = 'utf-8'
     publication_type      = 'newsportal'
-    masthead_url          = 'http://o.onionstatic.com/img/headers/onion_190.png'
+    needs_subscription    = 'optional'
+    masthead_url          = 'http://www.theonion.com/static/onion/img/logo_1x.png'
     extra_css             = """
                                 body{font-family: Helvetica,Arial,sans-serif}
                                 .section_title{color: gray; text-transform: uppercase}
@@ -36,21 +37,56 @@ class TheOnion(BasicNewsRecipe):
                         , 'publisher': publisher
                         , 'language' : language
                         }
-    keep_only_tags = [dict(name='article', attrs={'class':'full-article'})]
-    remove_tags = [
-        dict(name=['nav', 'aside', 'section', 'meta']),
-        {'attrs':{'class':lambda x: x and ('share-tools' in x or 'ad-zone' in x)}},
-    ]
+
+    keep_only_tags    = [dict(attrs={'class':'full-article'})]
+    remove_attributes = ['lang','rel']
+    remove_tags       = [
+                         dict(name=['object','link','iframe','base','meta'])
+                        ,dict(attrs={'class':lambda x: x and 'share-tools' in x.split()})
+                        ]
+
 
     feeds = [
               (u'Daily'  , u'http://feeds.theonion.com/theonion/daily' )
              ,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' )
             ]
 
-    def preprocess_html(self, soup, *args):
-        for img in soup.findAll('img', attrs={'data-src':True}):
-            if img['data-src']:
-                img['src'] = img['data-src']
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.open('http://www.theonion.com/')
+        if self.username is not None and self.password is not None:
+            br.open('https://ui.ppjol.com/login/onion/u/j_spring_security_check')
+            br.select_form(name='f')
+            br['j_username'] = self.username
+            br['j_password'] = self.password
+            br.submit()
+        return br
+        
+    def get_article_url(self, article):
+        artl = BasicNewsRecipe.get_article_url(self, article)
+        if artl.startswith('http://www.theonion.com/audio/'):
+           artl = None
+        return artl
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name  = 'div'
+                  item.attrs = []
+                  if not limg.has_key('alt'):
+                     limg['alt'] = 'image'
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if item.has_key('data-src'):
+               item['src'] = item['data-src']           
         return soup
-
-
+        

From 6bea78733e5ef8c00f3f77cb376093bac178ec78 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 11:11:49 +0530
Subject: [PATCH 12/28] Implement metadata to OPF backup

---
 src/calibre/db/__init__.py      |   1 +
 src/calibre/db/backend.py       |  10 ++
 src/calibre/db/backup.py        | 115 +++++++++++++++
 src/calibre/db/cache.py         | 238 +++++++++++++++++++++++++-------
 src/calibre/db/tests/main.py    |   6 +-
 src/calibre/db/tests/writing.py |  63 +++++++++
 src/calibre/utils/monotonic.py  | 104 ++++++++++++++
 7 files changed, 481 insertions(+), 56 deletions(-)
 create mode 100644 src/calibre/db/backup.py
 create mode 100644 src/calibre/utils/monotonic.py

diff --git a/src/calibre/db/__init__.py b/src/calibre/db/__init__.py
index 5d12bdc686..b0916ebf73 100644
--- a/src/calibre/db/__init__.py
+++ b/src/calibre/db/__init__.py
@@ -68,4 +68,5 @@
     libraries/switching/on calibre startup.
     3. From refresh in the legacy interface: Rember to flush the composite
     column template cache.
+    4. Replace the metadatabackup thread with the new implementation when using the new backend.
 '''
diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py
index 9259fc628b..c2beb25e2e 100644
--- a/src/calibre/db/backend.py
+++ b/src/calibre/db/backend.py
@@ -1067,5 +1067,15 @@ def update_path(self, book_id, title, author, path_field, formats_field):
                         break # Fail silently since nothing catastrophic has happened
                 curpath = os.path.join(curpath, newseg)
 
+    def write_backup(self, path, raw):
+        path = os.path.abspath(os.path.join(self.library_path, path, 'metadata.opf'))
+        with lopen(path, 'wb') as f:
+            f.write(raw)
+
+    def read_backup(self, path):
+        path = os.path.abspath(os.path.join(self.library_path, path, 'metadata.opf'))
+        with lopen(path, 'rb') as f:
+            return f.read()
+
    # }}}
 
diff --git a/src/calibre/db/backup.py b/src/calibre/db/backup.py
new file mode 100644
index 0000000000..6410a347c6
--- /dev/null
+++ b/src/calibre/db/backup.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import weakref, traceback
+from threading import Thread, Event
+
+from calibre import prints
+from calibre.ebooks.metadata.opf2 import metadata_to_opf
+
+class Abort(Exception):
+    pass
+
+class MetadataBackup(Thread):
+    '''
+    Continuously backup changed metadata into OPF files
+    in the book directory. This class runs in its own
+    thread.
+    '''
+
+    def __init__(self, db, interval=2, scheduling_interval=0.1):
+        Thread.__init__(self)
+        self.daemon = True
+        self._db = weakref.ref(db)
+        self.stop_running = Event()
+        self.interval = interval
+        self.scheduling_interval = scheduling_interval
+
+    @property
+    def db(self):
+        ans = self._db()
+        if ans is None:
+            raise Abort()
+        return ans
+
+    def stop(self):
+        self.stop_running.set()
+
+    def wait(self, interval):
+        if self.stop_running.wait(interval):
+            raise Abort()
+
+    def run(self):
+        while not self.stop_running.is_set():
+            try:
+                self.wait(self.interval)
+                self.do_one()
+            except Abort:
+                break
+
+    def do_one(self):
+        try:
+            book_id = self.db.get_a_dirtied_book()
+            if book_id is None:
+                return
+        except Abort:
+            raise
+        except:
+            # Happens during interpreter shutdown
+            return
+
+        self.wait(0)
+
+        try:
+            mi, sequence = self.db.get_metadata_for_dump(book_id)
+        except:
+            prints('Failed to get backup metadata for id:', book_id, 'once')
+            traceback.print_exc()
+            self.wait(self.interval)
+            try:
+                mi, sequence = self.db.get_metadata_for_dump(book_id)
+            except:
+                prints('Failed to get backup metadata for id:', book_id, 'again, giving up')
+                traceback.print_exc()
+                return
+
+        if mi is None:
+            self.db.clear_dirtied(book_id, sequence)
+
+        # Give the GUI thread a chance to do something. Python threads don't
+        # have priorities, so this thread would naturally keep the processor
+        # until some scheduling event happens. The wait makes such an event
+        self.wait(self.scheduling_interval)
+
+        try:
+            raw = metadata_to_opf(mi)
+        except:
+            prints('Failed to convert to opf for id:', book_id)
+            traceback.print_exc()
+            return
+
+        self.wait(self.scheduling_interval)
+
+        try:
+            self.db.write_backup(book_id, raw)
+        except:
+            prints('Failed to write backup metadata for id:', book_id, 'once')
+            self.wait(self.interval)
+            try:
+                self.db.write_backup(book_id, raw)
+            except:
+                prints('Failed to write backup metadata for id:', book_id, 'again, giving up')
+                return
+
+        self.db.clear_dirtied(book_id, sequence)
+
+    def break_cycles(self):
+        # Legacy compatibility
+        pass
+
diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py
index 0f648e96dd..630757497b 100644
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@@ -7,7 +7,7 @@
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import os, traceback
+import os, traceback, random
 from io import BytesIO
 from collections import defaultdict
 from functools import wraps, partial
@@ -15,7 +15,7 @@
 from calibre.constants import iswindows
 from calibre.db import SPOOL_SIZE
 from calibre.db.categories import get_categories
-from calibre.db.locking import create_locks, RecordLock
+from calibre.db.locking import create_locks
 from calibre.db.errors import NoSuchFormat
 from calibre.db.fields import create_field
 from calibre.db.search import Search
@@ -23,9 +23,10 @@
 from calibre.db.write import get_series_values
 from calibre.db.lazy import FormatMetadata, FormatsList
 from calibre.ebooks.metadata.book.base import Metadata
+from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ptempfile import (base_dir, PersistentTemporaryFile,
                                SpooledTemporaryFile)
-from calibre.utils.date import now
+from calibre.utils.date import now as nowf
 from calibre.utils.icu import sort_key
 
 def api(f):
@@ -57,9 +58,10 @@ def __init__(self, backend):
         self.fields = {}
         self.composites = set()
         self.read_lock, self.write_lock = create_locks()
-        self.record_lock = RecordLock(self.read_lock)
         self.format_metadata_cache = defaultdict(dict)
         self.formatter_template_cache = {}
+        self.dirtied_cache = {}
+        self.dirtied_sequence = 0
         self._search_api = Search(self.field_metadata.get_search_terms())
 
         # Implement locking for all simple read/write API methods
@@ -78,17 +80,18 @@ def __init__(self, backend):
 
         self.initialize_dynamic()
 
+    @write_api
     def initialize_dynamic(self):
         # Reconstruct the user categories, putting them into field_metadata
         # Assumption is that someone else will fix them if they change.
         self.field_metadata.remove_dynamic_categories()
-        for user_cat in sorted(self.pref('user_categories', {}).iterkeys(), key=sort_key):
+        for user_cat in sorted(self._pref('user_categories', {}).iterkeys(), key=sort_key):
             cat_name = '@' + user_cat # add the '@' to avoid name collision
             self.field_metadata.add_user_category(label=cat_name, name=user_cat)
 
         # add grouped search term user categories
-        muc = frozenset(self.pref('grouped_search_make_user_categories', []))
-        for cat in sorted(self.pref('grouped_search_terms', {}).iterkeys(), key=sort_key):
+        muc = frozenset(self._pref('grouped_search_make_user_categories', []))
+        for cat in sorted(self._pref('grouped_search_terms', {}).iterkeys(), key=sort_key):
             if cat in muc:
                 # There is a chance that these can be duplicates of an existing
                 # user category. Print the exception and continue.
@@ -102,10 +105,15 @@ def initialize_dynamic(self):
         #     self.field_metadata.add_search_category(label='search', name=_('Searches'))
 
         self.field_metadata.add_grouped_search_terms(
-                                    self.pref('grouped_search_terms', {}))
+                                    self._pref('grouped_search_terms', {}))
 
         self._search_api.change_locations(self.field_metadata.get_search_terms())
 
+        self.dirtied_cache = {x:i for i, (x,) in enumerate(
+            self.backend.conn.execute('SELECT book FROM metadata_dirtied'))}
+        if self.dirtied_cache:
+            self.dirtied_sequence = max(self.dirtied_cache.itervalues())+1
+
     @property
     def field_metadata(self):
         return self.backend.field_metadata
@@ -131,7 +139,7 @@ def _get_metadata(self, book_id, get_user_categories=True): # {{{
         mi.author_link_map = aul
         mi.comments    = self._field_for('comments', book_id)
         mi.publisher   = self._field_for('publisher', book_id)
-        n = now()
+        n = nowf()
         mi.timestamp   = self._field_for('timestamp', book_id, default_value=n)
         mi.pubdate     = self._field_for('pubdate', book_id, default_value=n)
         mi.uuid        = self._field_for('uuid', book_id,
@@ -413,7 +421,7 @@ def cover(self, book_id,
                 ret = i
         return ret
 
-    @api
+    @read_api
     def copy_cover_to(self, book_id, dest, use_hardlink=False):
         '''
         Copy the cover to the file like object ``dest``. Returns False
@@ -422,17 +430,15 @@ def copy_cover_to(self, book_id, dest, use_hardlink=False):
         copied to it iff the path is different from the current path (taking
         case sensitivity into account).
         '''
-        with self.read_lock:
-            try:
-                path = self._field_for('path', book_id).replace('/', os.sep)
-            except:
-                return False
+        try:
+            path = self._field_for('path', book_id).replace('/', os.sep)
+        except AttributeError:
+            return False
 
-        with self.record_lock.lock(book_id):
-            return self.backend.copy_cover_to(path, dest,
+        return self.backend.copy_cover_to(path, dest,
                                               use_hardlink=use_hardlink)
 
-    @api
+    @read_api
     def copy_format_to(self, book_id, fmt, dest, use_hardlink=False):
         '''
         Copy the format ``fmt`` to the file like object ``dest``. If the
@@ -441,15 +447,13 @@ def copy_format_to(self, book_id, fmt, dest, use_hardlink=False):
         the path is different from the current path (taking case sensitivity
         into account).
         '''
-        with self.read_lock:
-            try:
-                name = self.fields['formats'].format_fname(book_id, fmt)
-                path = self._field_for('path', book_id).replace('/', os.sep)
-            except:
-                raise NoSuchFormat('Record %d has no %s file'%(book_id, fmt))
+        try:
+            name = self.fields['formats'].format_fname(book_id, fmt)
+            path = self._field_for('path', book_id).replace('/', os.sep)
+        except (KeyError, AttributeError):
+            raise NoSuchFormat('Record %d has no %s file'%(book_id, fmt))
 
-        with self.record_lock.lock(book_id):
-            return self.backend.copy_format_to(book_id, fmt, name, path, dest,
+        return self.backend.copy_format_to(book_id, fmt, name, path, dest,
                                                use_hardlink=use_hardlink)
 
     @read_api
@@ -520,16 +524,16 @@ def format(self, book_id, fmt, as_file=False, as_path=False, preserve_filename=F
                                   this means that repeated calls yield the same
                                   temp file (which is re-created each time)
         '''
-        with self.read_lock:
-            ext = ('.'+fmt.lower()) if fmt else ''
-            try:
-                fname = self.fields['formats'].format_fname(book_id, fmt)
-            except:
-                return None
-            fname += ext
-
+        ext = ('.'+fmt.lower()) if fmt else ''
         if as_path:
             if preserve_filename:
+                with self.read_lock:
+                    try:
+                        fname = self.fields['formats'].format_fname(book_id, fmt)
+                    except:
+                        return None
+                    fname += ext
+
                 bd = base_dir()
                 d = os.path.join(bd, 'format_abspath')
                 try:
@@ -537,36 +541,40 @@ def format(self, book_id, fmt, as_file=False, as_path=False, preserve_filename=F
                 except:
                     pass
                 ret = os.path.join(d, fname)
-                with self.record_lock.lock(book_id):
-                    try:
-                        self.copy_format_to(book_id, fmt, ret)
-                    except NoSuchFormat:
-                        return None
+                try:
+                    self.copy_format_to(book_id, fmt, ret)
+                except NoSuchFormat:
+                    return None
             else:
-                with PersistentTemporaryFile(ext) as pt, self.record_lock.lock(book_id):
+                with PersistentTemporaryFile(ext) as pt:
                     try:
                         self.copy_format_to(book_id, fmt, pt)
                     except NoSuchFormat:
                         return None
                     ret = pt.name
         elif as_file:
-            ret = SpooledTemporaryFile(SPOOL_SIZE)
-            with self.record_lock.lock(book_id):
+            with self.read_lock:
                 try:
-                    self.copy_format_to(book_id, fmt, ret)
-                except NoSuchFormat:
+                    fname = self.fields['formats'].format_fname(book_id, fmt)
+                except:
                     return None
+                fname += ext
+
+            ret = SpooledTemporaryFile(SPOOL_SIZE)
+            try:
+                self.copy_format_to(book_id, fmt, ret)
+            except NoSuchFormat:
+                return None
             ret.seek(0)
             # Various bits of code try to use the name as the default
             # title when reading metadata, so set it
             ret.name = fname
         else:
             buf = BytesIO()
-            with self.record_lock.lock(book_id):
-                try:
-                    self.copy_format_to(book_id, fmt, buf)
-                except NoSuchFormat:
-                    return None
+            try:
+                self.copy_format_to(book_id, fmt, buf)
+            except NoSuchFormat:
+                return None
 
             ret = buf.getvalue()
 
@@ -620,6 +628,30 @@ def get_categories(self, sort='name', book_ids=None, icon_map=None):
         return get_categories(self, sort=sort, book_ids=book_ids,
                               icon_map=icon_map)
 
+    @write_api
+    def update_last_modified(self, book_ids, now=None):
+        if now is None:
+            now = nowf()
+        if book_ids:
+            f = self.fields['last_modified']
+            f.writer.set_books({book_id:now for book_id in book_ids}, self.backend)
+
+    @write_api
+    def mark_as_dirty(self, book_ids):
+        self._update_last_modified(book_ids)
+        already_dirtied = set(self.dirtied_cache).intersection(book_ids)
+        new_dirtied = book_ids - already_dirtied
+        already_dirtied = {book_id:self.dirtied_sequence+i for i, book_id in enumerate(already_dirtied)}
+        if already_dirtied:
+            self.dirtied_sequence = max(already_dirtied.itervalues()) + 1
+        self.dirtied_cache.update(already_dirtied)
+        if new_dirtied:
+            self.backend.conn.executemany('INSERT OR IGNORE INTO metadata_dirtied (book) VALUES (?)',
+                                    ((x,) for x in new_dirtied))
+            new_dirtied = {book_id:self.dirtied_sequence+i for i, book_id in enumerate(new_dirtied)}
+            self.dirtied_sequence = max(new_dirtied.itervalues()) + 1
+            self.dirtied_cache.update(new_dirtied)
+
     @write_api
     def set_field(self, name, book_id_to_val_map, allow_case_change=True):
         f = self.fields[name]
@@ -657,7 +689,7 @@ def set_field(self, name, book_id_to_val_map, allow_case_change=True):
         if dirtied and update_path:
             self._update_path(dirtied, mark_as_dirtied=False)
 
-        # TODO: Mark these as dirtied so that the opf is regenerated
+        self._mark_as_dirty(dirtied)
 
         return dirtied
 
@@ -668,9 +700,111 @@ def update_path(self, book_ids, mark_as_dirtied=True):
             author = self._field_for('authors', book_id, default_value=(_('Unknown'),))[0]
             self.backend.update_path(book_id, title, author, self.fields['path'], self.fields['formats'])
             if mark_as_dirtied:
+                self._mark_as_dirty(book_ids)
+
+    @read_api
+    def get_a_dirtied_book(self):
+        if self.dirtied_cache:
+            return random.choice(tuple(self.dirtied_cache.iterkeys()))
+        return None
+
+    @read_api
+    def get_metadata_for_dump(self, book_id):
+        mi = None
+        # get the current sequence number for this book to pass back to the
+        # backup thread. This will avoid double calls in the case where the
+        # thread has not done the work between the put and the get_metadata
+        sequence = self.dirtied_cache.get(book_id, None)
+        if sequence is not None:
+            try:
+                # While a book is being created, the path is empty. Don't bother to
+                # try to write the opf, because it will go to the wrong folder.
+                if self._field_for('path', book_id):
+                    mi = self._get_metadata(book_id)
+                    # Always set cover to cover.jpg. Even if cover doesn't exist,
+                    # no harm done. This way no need to call dirtied when
+                    # cover is set/removed
+                    mi.cover = 'cover.jpg'
+            except:
+                # This almost certainly means that the book has been deleted while
+                # the backup operation sat in the queue.
                 pass
-            # TODO: Mark these books as dirtied so that metadata.opf is
-            # re-created
+        return mi, sequence
+
+    @write_api
+    def clear_dirtied(self, book_id, sequence):
+        '''
+        Clear the dirtied indicator for the books. This is used when fetching
+        metadata, creating an OPF, and writing a file are separated into steps.
+        The last step is clearing the indicator
+        '''
+        dc_sequence = self.dirtied_cache.get(book_id, None)
+        if dc_sequence is None or sequence is None or dc_sequence == sequence:
+            self.backend.conn.execute('DELETE FROM metadata_dirtied WHERE book=?',
+                    (book_id,))
+            self.dirtied_cache.pop(book_id, None)
+
+    @write_api
+    def write_backup(self, book_id, raw):
+        try:
+            path = self._field_for('path', book_id).replace('/', os.sep)
+        except:
+            return
+
+        self.backend.write_backup(path, raw)
+
+    @read_api
+    def dirty_queue_length(self):
+        return len(self.dirtied_cache)
+
+    @read_api
+    def read_backup(self, book_id):
+        ''' Return the OPF metadata backup for the book as a bytestring or None
+        if no such backup exists.  '''
+        try:
+            path = self._field_for('path', book_id).replace('/', os.sep)
+        except:
+            return
+
+        try:
+            return self.backend.read_backup(path)
+        except EnvironmentError:
+            return None
+
+    @write_api
+    def dump_metadata(self, book_ids=None, remove_from_dirtied=True,
+            callback=None):
+        '''
+        Write metadata for each record to an individual OPF file. If callback
+        is not None, it is called once at the start with the number of book_ids
+        being processed. And once for every book_id, with arguments (book_id,
+        mi, ok).
+        '''
+        if book_ids is None:
+            book_ids = set(self.dirtied_cache)
+
+        if callback is not None:
+            callback(len(book_ids), True, False)
+
+        for book_id in book_ids:
+            if self._field_for('path', book_id) is None:
+                if callback is not None:
+                    callback(book_id, None, False)
+                continue
+            mi, sequence = self._get_metadata_for_dump(book_id)
+            if mi is None:
+                if callback is not None:
+                    callback(book_id, mi, False)
+                continue
+            try:
+                raw = metadata_to_opf(mi)
+                self._write_backup(book_id, raw)
+                if remove_from_dirtied:
+                    self._clear_dirtied(book_id, sequence)
+            except:
+                pass
+            if callback is not None:
+                callback(book_id, mi, True)
 
     # }}}
 
diff --git a/src/calibre/db/tests/main.py b/src/calibre/db/tests/main.py
index c4bb058b7e..7268db3e99 100644
--- a/src/calibre/db/tests/main.py
+++ b/src/calibre/db/tests/main.py
@@ -16,8 +16,6 @@ def find_tests():
     parser = argparse.ArgumentParser()
     parser.add_argument('name', nargs='?', default=None, help='The name of the test to run, for e.g. writing.WritingTest.many_many_basic')
     args = parser.parse_args()
-    if args.name:
-        unittest.TextTestRunner(verbosity=4).run(unittest.defaultTestLoader.loadTestsFromName(args.name))
-    else:
-        unittest.TextTestRunner(verbosity=4).run(find_tests())
+    tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if args.name else find_tests()
+    unittest.TextTestRunner(verbosity=4).run(tests)
 
diff --git a/src/calibre/db/tests/writing.py b/src/calibre/db/tests/writing.py
index 127bcd3609..c54d21f055 100644
--- a/src/calibre/db/tests/writing.py
+++ b/src/calibre/db/tests/writing.py
@@ -9,6 +9,7 @@
 
 from collections import namedtuple
 from functools import partial
+from io import BytesIO
 
 from calibre.ebooks.metadata import author_to_author_sort
 from calibre.utils.date import UNDEFINED_DATE
@@ -292,3 +293,65 @@ def test_many_many_basic(self): # {{{
 
     # }}}
 
+    def test_dirtied(self): # {{{
+        'Test the setting of the dirtied flag and the last_modified column'
+        cl = self.cloned_library
+        cache = self.init_cache(cl)
+        ae, af, sf = self.assertEqual, self.assertFalse, cache.set_field
+        # First empty dirtied
+        cache.dump_metadata()
+        af(cache.dirtied_cache)
+        af(self.init_cache(cl).dirtied_cache)
+
+        prev = cache.field_for('last_modified', 3)
+        import calibre.db.cache as c
+        from datetime import timedelta
+        utime = prev+timedelta(days=1)
+        onowf = c.nowf
+        c.nowf = lambda : utime
+        try:
+            ae(sf('title', {3:'xxx'}), set([3]))
+            self.assertTrue(3 in cache.dirtied_cache)
+            ae(cache.field_for('last_modified', 3), utime)
+            cache.dump_metadata()
+            raw = cache.read_backup(3)
+            from calibre.ebooks.metadata.opf2 import OPF
+            opf = OPF(BytesIO(raw))
+            ae(opf.title, 'xxx')
+        finally:
+            c.nowf = onowf
+    # }}}
+
+    def test_backup(self): # {{{
+        'Test the automatic backup of changed metadata'
+        cl = self.cloned_library
+        cache = self.init_cache(cl)
+        ae, af, sf, ff = self.assertEqual, self.assertFalse, cache.set_field, cache.field_for
+        # First empty dirtied
+        cache.dump_metadata()
+        af(cache.dirtied_cache)
+        from calibre.db.backup import MetadataBackup
+        interval = 0.01
+        mb = MetadataBackup(cache, interval=interval, scheduling_interval=0)
+        mb.start()
+        try:
+            ae(sf('title', {1:'title1', 2:'title2', 3:'title3'}), {1,2,3})
+            ae(sf('authors', {1:'author1 & author2', 2:'author1 & author2', 3:'author1 & author2'}), {1,2,3})
+            count = 6
+            while cache.dirty_queue_length() and count > 0:
+                mb.join(interval)
+                count -= 1
+            af(cache.dirty_queue_length())
+        finally:
+            mb.stop()
+        mb.join(interval)
+        af(mb.is_alive())
+        from calibre.ebooks.metadata.opf2 import OPF
+        for book_id in (1, 2, 3):
+            raw = cache.read_backup(book_id)
+            opf = OPF(BytesIO(raw))
+            ae(opf.title, 'title%d'%book_id)
+            ae(opf.authors, ['author1', 'author2'])
+    # }}}
+
+
diff --git a/src/calibre/utils/monotonic.py b/src/calibre/utils/monotonic.py
new file mode 100644
index 0000000000..2bda006929
--- /dev/null
+++ b/src/calibre/utils/monotonic.py
@@ -0,0 +1,104 @@
+# vim:fileencoding=utf-8
+
+from __future__ import division, absolute_import
+
+try:
+    try:
+        # >=python-3.3, Unix
+        from time import clock_gettime
+        try:
+            # >={kernel}-sources-2.6.28
+            from time import CLOCK_MONOTONIC_RAW as CLOCK_ID
+        except ImportError:
+            from time import CLOCK_MONOTONIC as CLOCK_ID  # NOQA
+
+        monotonic = lambda: clock_gettime(CLOCK_ID)
+
+    except ImportError:
+        # >=python-3.3
+        from time import monotonic  # NOQA
+
+except ImportError:
+    import ctypes
+    import sys
+
+    try:
+        if sys.platform == 'win32':
+            # Windows only
+            GetTickCount64 = ctypes.windll.kernel32.GetTickCount64
+            GetTickCount64.restype = ctypes.c_ulonglong
+
+            def monotonic():  # NOQA
+                return GetTickCount64() / 1000
+
+        elif sys.platform == 'darwin':
+            # Mac OS X
+            from ctypes.util import find_library
+
+            libc_name = find_library('c')
+            if not libc_name:
+                raise OSError
+
+            libc = ctypes.CDLL(libc_name, use_errno=True)
+
+            mach_absolute_time = libc.mach_absolute_time
+            mach_absolute_time.argtypes = ()
+            mach_absolute_time.restype = ctypes.c_uint64
+
+            class mach_timebase_info_data_t(ctypes.Structure):
+                _fields_ = (
+                    ('numer', ctypes.c_uint32),
+                    ('denom', ctypes.c_uint32),
+                )
+            mach_timebase_info_data_p = ctypes.POINTER(mach_timebase_info_data_t)
+
+            _mach_timebase_info = libc.mach_timebase_info
+            _mach_timebase_info.argtypes = (mach_timebase_info_data_p,)
+            _mach_timebase_info.restype = ctypes.c_int
+
+            def mach_timebase_info():
+                timebase = mach_timebase_info_data_t()
+                _mach_timebase_info(ctypes.byref(timebase))
+                return (timebase.numer, timebase.denom)
+
+            timebase = mach_timebase_info()
+            factor = timebase[0] / timebase[1] * 1e-9
+
+            def monotonic():  # NOQA
+                return mach_absolute_time() * factor
+        else:
+            # linux only (no librt on OS X)
+            import os
+
+            # See <bits/time.h>
+            CLOCK_MONOTONIC = 1
+            CLOCK_MONOTONIC_RAW = 4
+
+            class timespec(ctypes.Structure):
+                _fields_ = (
+                    ('tv_sec', ctypes.c_long),
+                    ('tv_nsec', ctypes.c_long)
+                )
+            tspec = timespec()
+
+            librt = ctypes.CDLL('librt.so.1', use_errno=True)
+            clock_gettime = librt.clock_gettime
+            clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)]
+
+            if clock_gettime(CLOCK_MONOTONIC_RAW, ctypes.pointer(tspec)) == 0:
+                # >={kernel}-sources-2.6.28
+                clock_id = CLOCK_MONOTONIC_RAW
+            elif clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(tspec)) == 0:
+                clock_id = CLOCK_MONOTONIC
+            else:
+                raise OSError
+
+            def monotonic():  # NOQA
+                if clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(tspec)) != 0:
+                    errno_ = ctypes.get_errno()
+                    raise OSError(errno_, os.strerror(errno_))
+                return tspec.tv_sec + tspec.tv_nsec / 1e9
+
+    except:
+        from time import time as monotonic  # NOQA
+        monotonic

From 5a4e046f98f9e12cd8b9812e5958b32c7d5f1890 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 12:28:08 +0530
Subject: [PATCH 13/28] ...

---
 recipes/fronda.recipe | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recipes/fronda.recipe b/recipes/fronda.recipe
index 5ae78ae848..8372bb4d81 100644
--- a/recipes/fronda.recipe
+++ b/recipes/fronda.recipe
@@ -6,6 +6,7 @@
 fronda.pl
 '''
 
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import timedelta, date
 

From 47fb1178287ca3b619cae74a6962d8c45f521093 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 12:30:24 +0530
Subject: [PATCH 14/28] Switch to using flake8 to check for errors

---
 session.vim                  |  3 ---
 setup.cfg                    |  4 +++
 setup/check.py               | 49 ++++++------------------------------
 src/calibre/db/tests/main.py |  5 ++++
 src/calibre/linux.py         |  2 +-
 5 files changed, 18 insertions(+), 45 deletions(-)
 create mode 100644 setup.cfg

diff --git a/session.vim b/session.vim
index 5e127428cf..a67c5ed8e6 100644
--- a/session.vim
+++ b/session.vim
@@ -1,6 +1,3 @@
-" Project wide builtins
-let $PYFLAKES_BUILTINS = "_,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,icu_title,ngettext"
-
 " Include directories for C++ modules
 let g:syntastic_cpp_include_dirs = [ 
             \'/usr/include/python2.7',
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000000..ba2629d20f
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 160
+builtins = _,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,icu_title,ngettext
+ignore = E12,E221,E301,E302,E304,E401,W391
diff --git a/setup/check.py b/setup/check.py
index 0baec38a50..281527e51a 100644
--- a/setup/check.py
+++ b/setup/check.py
@@ -22,40 +22,12 @@ def __init__(self, filename, lineno, msg):
         self.filename, self.lineno, self.msg = filename, lineno, msg
 
     def __str__(self):
-        return '%s:%s: %s'%(self.filename, self.lineno, self.msg)
-
-def check_for_python_errors(code_string, filename):
-    import _ast
-    # First, compile into an AST and handle syntax errors.
-    try:
-        tree = compile(code_string, filename, "exec", _ast.PyCF_ONLY_AST)
-    except (SyntaxError, IndentationError) as value:
-        msg = value.args[0]
-
-        (lineno, offset, text) = value.lineno, value.offset, value.text
-
-        # If there's an encoding problem with the file, the text is None.
-        if text is None:
-            # Avoid using msg, since for the only known case, it contains a
-            # bogus message that claims the encoding the file declared was
-            # unknown.
-            msg = "%s: problem decoding source" % filename
-
-        return [Message(filename, lineno, msg)]
-    else:
-        checker = __import__('pyflakes.checker').checker
-        # Okay, it's syntactically valid.  Now check it.
-        w = checker.Checker(tree, filename)
-        w.messages.sort(lambda a, b: cmp(a.lineno, b.lineno))
-        return [Message(x.filename, x.lineno, x.message%x.message_args) for x in
-                w.messages]
+        return '%s:%s: %s' % (self.filename, self.lineno, self.msg)
 
 class Check(Command):
 
     description = 'Check for errors in the calibre source code'
 
-    BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P', 'lopen', 'icu_lower',
-            'icu_upper', 'icu_title', 'ngettext']
     CACHE = '.check-cache.pickle'
 
     def get_files(self, cache):
@@ -65,10 +37,10 @@ def get_files(self, cache):
                 mtime = os.stat(y).st_mtime
                 if cache.get(y, 0) == mtime:
                     continue
-                if (f.endswith('.py') and f not in ('feedparser.py',
-                    'pyparsing.py', 'markdown.py') and
-                    'prs500/driver.py' not in y):
-                        yield y, mtime
+                if (f.endswith('.py') and f not in (
+                        'feedparser.py', 'pyparsing.py', 'markdown.py') and
+                        'prs500/driver.py' not in y):
+                    yield y, mtime
                 if f.endswith('.coffee'):
                     yield y, mtime
 
@@ -79,25 +51,22 @@ def get_files(self, cache):
                 if f.endswith('.recipe') and cache.get(f, 0) != mtime:
                     yield f, mtime
 
-
     def run(self, opts):
         cache = {}
         if os.path.exists(self.CACHE):
             cache = cPickle.load(open(self.CACHE, 'rb'))
-        builtins = list(set_builtins(self.BUILTINS))
         for f, mtime in self.get_files(cache):
             self.info('\tChecking', f)
             errors = False
             ext = os.path.splitext(f)[1]
             if ext in {'.py', '.recipe'}:
-                w = check_for_python_errors(open(f, 'rb').read(), f)
-                if w:
+                p = subprocess.Popen(['flake8', '--ignore=E,W', f])
+                if p.wait() != 0:
                     errors = True
-                    self.report_errors(w)
             else:
                 from calibre.utils.serve_coffee import check_coffeescript
                 try:
-                   check_coffeescript(f)
+                    check_coffeescript(f)
                 except:
                     errors = True
             if errors:
@@ -106,8 +75,6 @@ def run(self, opts):
                                  self.j(self.SRC, '../session.vim'), '-f', f])
                 raise SystemExit(1)
             cache[f] = mtime
-        for x in builtins:
-            delattr(__builtin__, x)
         cPickle.dump(cache, open(self.CACHE, 'wb'), -1)
         wn_path = os.path.expanduser('~/work/servers/src/calibre_servers/main')
         if os.path.exists(wn_path):
diff --git a/src/calibre/db/tests/main.py b/src/calibre/db/tests/main.py
index 7268db3e99..7b9c63af22 100644
--- a/src/calibre/db/tests/main.py
+++ b/src/calibre/db/tests/main.py
@@ -9,6 +9,11 @@
 
 import unittest, os, argparse
 
+try:
+    import init_calibre  # noqa
+except ImportError:
+    pass
+
 def find_tests():
     return unittest.defaultTestLoader.discover(os.path.dirname(os.path.abspath(__file__)), pattern='*.py')
 
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 395831fa8f..d2b0c941a6 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -33,7 +33,7 @@
              'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main',
              'calibre-smtp = calibre.utils.smtp:main',
         ],
-        'gui_scripts'    : [
+        'gui_scripts' : [
             __appname__+' = calibre.gui2.main:main',
             'lrfviewer    = calibre.gui2.lrf_renderer.main:main',
             'ebook-viewer = calibre.gui2.viewer.main:main',

From a7274a2c8038bd78459f45632b643c82c5fa73a0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 12:47:55 +0530
Subject: [PATCH 15/28] Shortcut for running individual tests

---
 src/calibre/db/tests/main.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/calibre/db/tests/main.py b/src/calibre/db/tests/main.py
index 7b9c63af22..bdc9561ec5 100644
--- a/src/calibre/db/tests/main.py
+++ b/src/calibre/db/tests/main.py
@@ -19,8 +19,22 @@ def find_tests():
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('name', nargs='?', default=None, help='The name of the test to run, for e.g. writing.WritingTest.many_many_basic')
+    parser.add_argument('name', nargs='?', default=None,
+                        help='The name of the test to run, for e.g. writing.WritingTest.many_many_basic or .many_many_basic for a shortcut')
     args = parser.parse_args()
-    tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if args.name else find_tests()
+    if args.name and args.name.startswith('.'):
+        tests = find_tests()
+        ans = None
+        try:
+            for suite in tests:
+                for test in suite._tests:
+                    for s in test:
+                        if s._testMethodName == args.name[1:]:
+                            tests = s
+                            raise StopIteration()
+        except StopIteration:
+            pass
+    else:
+        tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if args.name else find_tests()
     unittest.TextTestRunner(verbosity=4).run(tests)
 

From d9acb3091b6f7bc1e1d49a8ef52a46691ade6c3d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 13:01:21 +0530
Subject: [PATCH 16/28] Fix pep8 compliance and add replace parameter to
 add_formats_with_hooks

---
 setup.cfg                        |  2 +-
 src/calibre/library/database2.py | 90 ++++++++++++++++----------------
 2 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index ba2629d20f..5a0ae0e629 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,4 @@
 [flake8]
 max-line-length = 160
 builtins = _,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,icu_title,ngettext
-ignore = E12,E221,E301,E302,E304,E401,W391
+ignore = E12,E22,E231,E301,E302,E304,E401,W391
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 06a2e5ff71..8e57647452 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -205,7 +205,7 @@ def get_property(self, idx, index_is_id=False, loc=-1):
             return row[loc]
 
     def initialize_dynamic(self):
-        self.field_metadata = FieldMetadata() #Ensure we start with a clean copy
+        self.field_metadata = FieldMetadata()  # Ensure we start with a clean copy
         self.prefs = DBPrefs(self)
         defs = self.prefs.defaults
         defs['gui_restriction'] = defs['cs_restriction'] = ''
@@ -352,7 +352,6 @@ def migrate_preference(key, default):
             '''.format(_('News')))
         self.conn.commit()
 
-
         CustomColumns.__init__(self)
         template = '''\
                 (SELECT {query} FROM books_{table}_link AS link INNER JOIN
@@ -444,7 +443,7 @@ def migrate_preference(key, default):
         # Assumption is that someone else will fix them if they change.
         self.field_metadata.remove_dynamic_categories()
         for user_cat in sorted(self.prefs.get('user_categories', {}).keys(), key=sort_key):
-            cat_name = '@' + user_cat # add the '@' to avoid name collision
+            cat_name = '@' + user_cat  # add the '@' to avoid name collision
             self.field_metadata.add_user_category(label=cat_name, name=user_cat)
 
         # add grouped search term user categories
@@ -596,7 +595,7 @@ def set_path(self, index, index_is_id=False):
         current title and author. If there was a previous directory, its contents
         are copied and it is deleted.
         '''
-        id = index if  index_is_id else self.id(index)
+        id = index if index_is_id else self.id(index)
         path = self.construct_path_name(id)
         current_path = self.path(id, index_is_id=True).replace(os.sep, '/')
         formats = self.formats(id, index_is_id=True)
@@ -620,7 +619,7 @@ def set_path(self, index, index_is_id=False):
             if not os.path.exists(tpath):
                 os.makedirs(tpath)
 
-            if source_ok: # Migrate existing files
+            if source_ok:  # Migrate existing files
                 self.copy_cover_to(id, os.path.join(tpath, 'cover.jpg'),
                         index_is_id=True, windows_atomic_move=wam,
                         use_hardlink=True)
@@ -668,7 +667,7 @@ def set_path(self, index, index_is_id=False):
                         os.rename(os.path.join(curpath, oldseg),
                                 os.path.join(curpath, newseg))
                     except:
-                        break # Fail silently since nothing catastrophic has happened
+                        break  # Fail silently since nothing catastrophic has happened
                 curpath = os.path.join(curpath, newseg)
 
     def add_listener(self, listener):
@@ -727,7 +726,7 @@ def cover(self, index, index_is_id=False, as_file=False, as_image=False,
             return ret
 
     def cover_last_modified(self, index, index_is_id=False):
-        id = index if  index_is_id else self.id(index)
+        id = index if index_is_id else self.id(index)
         path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
         try:
             return utcfromtimestamp(os.stat(path).st_mtime)
@@ -1074,8 +1073,8 @@ def fuzzy_title(title):
         identical_book_ids = set([])
         if mi.authors:
             try:
-                quathors = mi.authors[:10] # Too many authors causes parsing of
-                                           # the search expression to fail
+                quathors = mi.authors[:10]  # Too many authors causes parsing of
+                                            # the search expression to fail
                 query = u' and '.join([u'author:"=%s"'%(a.replace('"', '')) for a in
                     quathors])
                 qauthors = mi.authors[10:]
@@ -1307,7 +1306,7 @@ def format_abspath(self, index, format, index_is_id=False):
                 return fmt_path
             try:
                 candidates = glob.glob(os.path.join(path, '*'+format))
-            except: # If path contains strange characters this throws an exc
+            except:  # If path contains strange characters this throws an exc
                 candidates = []
             if format and candidates and os.path.exists(candidates[0]):
                 try:
@@ -1350,7 +1349,7 @@ def copy_format_to(self, index, fmt, dest, index_is_id=False,
                         if path != dest:
                             os.rename(path, dest)
                     except:
-                        pass # Nothing too catastrophic happened, the cases mismatch, that's all
+                        pass  # Nothing too catastrophic happened, the cases mismatch, that's all
                 else:
                     windows_atomic_move.copy_path_to(path, dest)
         else:
@@ -1366,7 +1365,7 @@ def copy_format_to(self, index, fmt, dest, index_is_id=False,
                         try:
                             os.rename(path, dest)
                         except:
-                            pass # Nothing too catastrophic happened, the cases mismatch, that's all
+                            pass  # Nothing too catastrophic happened, the cases mismatch, that's all
                 else:
                     if use_hardlink:
                         try:
@@ -1476,12 +1475,12 @@ def format(self, index, format, index_is_id=False, as_file=False,
             return ret
 
     def add_format_with_hooks(self, index, format, fpath, index_is_id=False,
-                              path=None, notify=True):
+                              path=None, notify=True, replace=True):
         npath = self.run_import_plugins(fpath, format)
         format = os.path.splitext(npath)[-1].lower().replace('.', '').upper()
         stream = lopen(npath, 'rb')
         format = check_ebook_format(stream, format)
-        retval = self.add_format(index, format, stream,
+        retval = self.add_format(index, format, stream, replace=replace,
                                index_is_id=index_is_id, path=path, notify=notify)
         run_plugins_on_postimport(self, id, format)
         return retval
@@ -1489,7 +1488,8 @@ def add_format_with_hooks(self, index, format, fpath, index_is_id=False,
     def add_format(self, index, format, stream, index_is_id=False, path=None,
             notify=True, replace=True, copy_function=None):
         id = index if index_is_id else self.id(index)
-        if not format: format = ''
+        if not format:
+            format = ''
         self.format_metadata_cache[id].pop(format.upper(), None)
         name = self.format_filename_cache[id].get(format.upper(), None)
         if path is None:
@@ -1576,7 +1576,8 @@ def delete_book(self, id, notify=True, commit=True, permanent=False,
     def remove_format(self, index, format, index_is_id=False, notify=True,
                       commit=True, db_only=False):
         id = index if index_is_id else self.id(index)
-        if not format: format = ''
+        if not format:
+            format = ''
         self.format_metadata_cache[id].pop(format.upper(), None)
         name = self.format_filename_cache[id].get(format.upper(), None)
         if name:
@@ -1745,12 +1746,12 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
             # Get the ids for the item values
             if not cat['is_custom']:
                 funcs = {
-                        'authors'  : self.get_authors_with_ids,
-                        'series'   : self.get_series_with_ids,
+                        'authors': self.get_authors_with_ids,
+                        'series': self.get_series_with_ids,
                         'publisher': self.get_publishers_with_ids,
-                        'tags'     : self.get_tags_with_ids,
+                        'tags': self.get_tags_with_ids,
                         'languages': self.get_languages_with_ids,
-                        'rating'   : self.get_ratings_with_ids,
+                        'rating': self.get_ratings_with_ids,
                     }
                 func = funcs.get(category, None)
                 if func:
@@ -1833,7 +1834,7 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
                             item.rc += 1
                         continue
                     try:
-                        (item_id, sort_val) = tid_cat[val] # let exceptions fly
+                        (item_id, sort_val) = tid_cat[val]  # let exceptions fly
                         item = tcats_cat.get(val, None)
                         if not item:
                             item = tag_class(val, sort_val)
@@ -1855,7 +1856,7 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
                                 tid_cat[val] = (val, val)
                     for val in vals:
                         try:
-                            (item_id, sort_val) = tid_cat[val] # let exceptions fly
+                            (item_id, sort_val) = tid_cat[val]  # let exceptions fly
                             item = tcats_cat.get(val, None)
                             if not item:
                                 item = tag_class(val, sort_val)
@@ -1923,7 +1924,7 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
             # in the main Tag loop. Saves a few %
             if datatype == 'rating':
                 formatter = (lambda x:u'\u2605'*int(x/2))
-                avgr = lambda x : x.n
+                avgr = lambda x: x.n
                 # eliminate the zero ratings line as well as count == 0
                 items = [v for v in tcategories[category].values() if v.c > 0 and v.n != 0]
             elif category == 'authors':
@@ -1940,7 +1941,7 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
 
             # sort the list
             if sort == 'name':
-                kf = lambda x :sort_key(x.s)
+                kf = lambda x:sort_key(x.s)
                 reverse=False
             elif sort == 'popularity':
                 kf = lambda x: x.c
@@ -2005,9 +2006,9 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
 
         if sort == 'popularity':
             categories['formats'].sort(key=lambda x: x.count, reverse=True)
-        else: # no ratings exist to sort on
+        else:  # no ratings exist to sort on
             # No need for ICU here.
-            categories['formats'].sort(key = lambda x:x.name)
+            categories['formats'].sort(key=lambda x:x.name)
 
         # Now do identifiers. This works like formats
         categories['identifiers'] = []
@@ -2034,9 +2035,9 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
 
         if sort == 'popularity':
             categories['identifiers'].sort(key=lambda x: x.count, reverse=True)
-        else: # no ratings exist to sort on
+        else:  # no ratings exist to sort on
             # No need for ICU here.
-            categories['identifiers'].sort(key = lambda x:x.name)
+            categories['identifiers'].sort(key=lambda x:x.name)
 
         #### Now do the user-defined categories. ####
         user_categories = dict.copy(self.clean_user_categories())
@@ -2083,7 +2084,7 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
                     else:
                         items.append(taglist[label][n])
                 # else: do nothing, to not include nodes w zero counts
-            cat_name = '@' + user_cat # add the '@' to avoid name collision
+            cat_name = '@' + user_cat  # add the '@' to avoid name collision
             # Not a problem if we accumulate entries in the icon map
             if icon_map is not None:
                 icon_map[cat_name] = icon_map['user:']
@@ -2331,11 +2332,10 @@ def should_replace_field(attr):
         elif mi_idents:
             identifiers = self.get_identifiers(id, index_is_id=True)
             for key, val in mi_idents.iteritems():
-                if val and val.strip(): # Don't delete an existing identifier
+                if val and val.strip():  # Don't delete an existing identifier
                     identifiers[icu_lower(key)] = val
             self.set_identifiers(id, identifiers, notify=False, commit=False)
 
-
         user_mi = mi.get_all_user_metadata(make_copy=False)
         for key in user_mi.iterkeys():
             if key in self.field_metadata and \
@@ -2455,7 +2455,7 @@ def _set_authors(self, id, authors, allow_case_change=False):
             try:
                 self.conn.execute('''INSERT INTO books_authors_link(book, author)
                                      VALUES (?,?)''', (id, aid))
-            except IntegrityError: # Sometimes books specify the same author twice in their metadata
+            except IntegrityError:  # Sometimes books specify the same author twice in their metadata
                 pass
             if case_change:
                 bks = self.conn.get('''SELECT book FROM books_authors_link
@@ -2614,7 +2614,6 @@ def set_pubdate(self, id, dt, notify=True, commit=True):
         if notify:
             self.notify('metadata', [id])
 
-
     def set_publisher(self, id, publisher, notify=True, commit=True,
                       allow_case_change=False):
         self.conn.execute('DELETE FROM books_publishers_link WHERE book=?',(id,))
@@ -2820,7 +2819,7 @@ def rename_publisher(self, old_id, new_name):
         if new_id is None or old_id == new_id:
             new_id = old_id
             # New name doesn't exist. Simply change the old name
-            self.conn.execute('UPDATE publishers SET name=? WHERE id=?', \
+            self.conn.execute('UPDATE publishers SET name=? WHERE id=?',
                               (new_name, old_id))
         else:
             # Change the link table to point at the new one
@@ -2860,7 +2859,7 @@ def set_link_field_for_author(self, aid, link, commit=True, notify=False):
             self.conn.commit()
 
     def set_sort_field_for_author(self, old_id, new_sort, commit=True, notify=False):
-        self.conn.execute('UPDATE authors SET sort=? WHERE id=?', \
+        self.conn.execute('UPDATE authors SET sort=? WHERE id=?',
                               (new_sort.strip(), old_id))
         if commit:
             self.conn.commit()
@@ -2959,7 +2958,7 @@ def get_tags(self, id):
     @classmethod
     def cleanup_tags(cls, tags):
         tags = [x.strip().replace(',', ';') for x in tags if x.strip()]
-        tags = [x.decode(preferred_encoding, 'replace') \
+        tags = [x.decode(preferred_encoding, 'replace')
                     if isbytestring(x) else x for x in tags]
         tags = [u' '.join(x.split()) for x in tags]
         ans, seen = [], set([])
@@ -3360,10 +3359,9 @@ def add_catalog(self, path, title):
             self.add_format(db_id, format, stream, index_is_id=True)
 
         self.conn.commit()
-        self.data.refresh_ids(self, [db_id]) # Needed to update format list and size
+        self.data.refresh_ids(self, [db_id])  # Needed to update format list and size
         return db_id
 
-
     def add_news(self, path, arg):
         from calibre.ebooks.metadata.meta import get_metadata
 
@@ -3399,7 +3397,7 @@ def add_news(self, path, arg):
         if not hasattr(path, 'read'):
             stream.close()
         self.conn.commit()
-        self.data.refresh_ids(self, [id]) # Needed to update format list and size
+        self.data.refresh_ids(self, [id])  # Needed to update format list and size
         return id
 
     def run_import_plugins(self, path_or_stream, format):
@@ -3463,7 +3461,6 @@ def create_book_entry(self, mi, cover=None, add_duplicates=True,
                 traceback.print_exc()
         return id
 
-
     def add_books(self, paths, formats, metadata, add_duplicates=True,
             return_ids=False):
         '''
@@ -3507,7 +3504,7 @@ def add_books(self, paths, formats, metadata, add_duplicates=True,
             stream.close()
             postimport.append((id, format))
         self.conn.commit()
-        self.data.refresh_ids(self, ids) # Needed to update format list and size
+        self.data.refresh_ids(self, ids)  # Needed to update format list and size
         for book_id, fmt in postimport:
             run_plugins_on_postimport(self, book_id, fmt)
         if duplicates:
@@ -3557,7 +3554,7 @@ def import_book(self, mi, formats, notify=True, import_hooks=True,
         # set_metadata, but probably isn't good enough
         self.dirtied([id], commit=False)
         self.conn.commit()
-        self.data.refresh_ids(self, [id]) # Needed to update format list and size
+        self.data.refresh_ids(self, [id])  # Needed to update format list and size
         if notify:
             self.notify('add', [id])
         return id
@@ -3651,7 +3648,8 @@ def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
                 FIELDS.add('%d_index'%x)
         data = []
         for record in self.data:
-            if record is None: continue
+            if record is None:
+                continue
             db_id = record[self.FIELD_MAP['id']]
             if ids is not None and db_id not in ids:
                 continue
@@ -3694,8 +3692,8 @@ def migrate_old(self, db, progress):
         progress.setValue(0)
         progress.setLabelText(header)
         QCoreApplication.processEvents()
-        db.conn.row_factory = lambda cursor, row : tuple(row)
-        db.conn.text_factory = lambda x : unicode(x, 'utf-8', 'replace')
+        db.conn.row_factory = lambda cursor, row: tuple(row)
+        db.conn.text_factory = lambda x: unicode(x, 'utf-8', 'replace')
         books = db.conn.get('SELECT id, title, sort, timestamp, series_index, author_sort, isbn FROM books ORDER BY id ASC')
         progress.setAutoReset(False)
         progress.setRange(0, len(books))
@@ -3771,7 +3769,7 @@ def find_books_in_directory(self, dirpath, single_book_per_directory):
                     continue
 
                 key = os.path.splitext(path)[0]
-                if not books.has_key(key):
+                if key not in books:
                     books[key] = []
                 books[key].append(path)
 

From c6a529613b447b9b11fe6b7d7e99cb7e4beda36e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 13:14:01 +0530
Subject: [PATCH 17/28] Space after usage text block and pep8 compliance

---
 src/calibre/utils/config.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py
index d9c0c2d412..fbd4c47a35 100644
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@@ -88,7 +88,7 @@ def __init__(self,
         if epilog is None:
             epilog = _('Created by ')+colored(__author__, fg='cyan')
         usage += '\n\n'+_('''Whenever you pass arguments to %prog that have spaces in them, '''
-                 '''enclose the arguments in quotation marks.''')
+                 '''enclose the arguments in quotation marks.''')+'\n'
         _OptionParser.__init__(self, usage=usage, version=version, epilog=epilog,
                                formatter=CustomHelpFormatter(),
                                conflict_handler=conflict_handler, **kwds)
@@ -171,7 +171,7 @@ def merge_options(self, lower, upper):
         non default values in lower.
         '''
         for dest in lower.__dict__.keys():
-            if not upper.__dict__.has_key(dest):
+            if not dest in upper.__dict__:
                 continue
             opt = self.option_by_dest(dest)
             if lower.__dict__[dest] != opt.default and \
@@ -319,12 +319,16 @@ def set(self, key, val):
         self.__setitem__(key, val)
 
     def __delitem__(self, key):
-        if dict.has_key(self, key):
+        try:
             dict.__delitem__(self, key)
+        except KeyError:
+            pass  # ignore missing keys
+        else:
             self.commit()
 
     def commit(self):
-        if self.no_commit: return
+        if self.no_commit:
+            return
         if hasattr(self, 'file_path') and self.file_path:
             dpath = os.path.dirname(self.file_path)
             if not os.path.exists(dpath):

From 86d4df245c65c684233f7ea3fd09bcceddcc98ae Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 13:16:33 +0530
Subject: [PATCH 18/28] calibredb add_format: Add an option to not replace
 existing formats. Also pep8 compliance.

---
 src/calibre/library/cli.py | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py
index 2fdec62ff0..b3737d15fa 100644
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@@ -88,7 +88,7 @@ def field_name(f):
     for f in data:
         fmts = [x for x in f['formats'] if x is not None]
         f['formats'] = u'[%s]'%u','.join(fmts)
-    widths = list(map(lambda x : 0, fields))
+    widths = list(map(lambda x: 0, fields))
     for record in data:
         for f in record.keys():
             if hasattr(record[f], 'isoformat'):
@@ -164,7 +164,8 @@ def list_option_parser(db=None):
     parser.add_option('--ascending', default=False, action='store_true',
                       help=_('Sort results in ascending order'))
     parser.add_option('-s', '--search', default=None,
-                      help=_('Filter the results by the search query. For the format of the search query, please see the search related documentation in the User Manual. Default is to do no filtering.'))
+                      help=_('Filter the results by the search query. For the format of the search query,'
+                             ' please see the search related documentation in the User Manual. Default is to do no filtering.'))
     parser.add_option('-w', '--line-width', default=-1, type=int,
                       help=_('The maximum width of a single line in the output. Defaults to detecting screen size.'))
     parser.add_option('--separator', default=' ', help=_('The string used to separate fields. Default is a space.'))
@@ -244,7 +245,8 @@ def do_add(db, paths, one_book_per_directory, recurse, add_duplicates, otitle,
                 mi.authors = [_('Unknown')]
             for x in ('title', 'authors', 'isbn', 'tags', 'series'):
                 val = locals()['o'+x]
-                if val: setattr(mi, x, val)
+                if val:
+                    setattr(mi, x, val)
             if oseries:
                 mi.series_index = oseries_index
             if ocover:
@@ -425,18 +427,26 @@ def command_remove(args, dbpath):
 
     return 0
 
-def do_add_format(db, id, fmt, path):
-    db.add_format_with_hooks(id, fmt.upper(), path, index_is_id=True)
-    send_message()
+def do_add_format(db, id, fmt, path, opts):
+    done = db.add_format_with_hooks(id, fmt.upper(), path, index_is_id=True,
+                             replace=opts.replace)
+    if not done and not opts.replace:
+        prints(_('A %s file already exists for book: %d, not replacing')%(fmt.upper(), id))
+    else:
+        send_message()
 
 def add_format_option_parser():
-    return get_parser(_(
+    parser = get_parser(_(
 '''\
 %prog add_format [options] id ebook_file
 
 Add the ebook in ebook_file to the available formats for the logical book identified \
-by id. You can get id by using the list command. If the format already exists, it is replaced.
+by id. You can get id by using the list command. If the format already exists, \
+it is replaced, unless the do not replace option is specified.\
 '''))
+    parser.add_option('--dont-replace', dest='replace', default=True, action='store_false',
+                      help=_('Do not replace the format if it already exists'))
+    return parser
 
 
 def command_add_format(args, dbpath):
@@ -451,7 +461,7 @@ def command_add_format(args, dbpath):
     id, path, fmt = int(args[1]), args[2], os.path.splitext(args[2])[-1]
     if not fmt:
         print _('ebook file must have an extension')
-    do_add_format(get_db(dbpath, opts), id, fmt[1:], path)
+    do_add_format(get_db(dbpath, opts), id, fmt[1:], path, opts)
     return 0
 
 def do_remove_format(db, id, fmt):
@@ -791,7 +801,7 @@ def validate_command_line(parser, args, log):
         if not file_extension in available_catalog_formats():
             print_help(parser, log)
             log.error("No catalog plugin available for extension '%s'.\n" % file_extension +
-                      "Catalog plugins available for %s\n" % ', '.join(available_catalog_formats()) )
+                      "Catalog plugins available for %s\n" % ', '.join(available_catalog_formats()))
             raise SystemExit(1)
 
         return output, file_extension
@@ -1214,7 +1224,8 @@ def command_restore_database(args, dbpath):
         dbpath = dbpath.decode(preferred_encoding)
 
     class Progress(object):
-        def __init__(self): self.total = 1
+        def __init__(self):
+            self.total = 1
 
         def __call__(self, msg, step):
             if msg is None:
@@ -1308,7 +1319,7 @@ def do_list():
         from calibre.utils.terminal import geometry, ColoredStream
 
         separator = ' '
-        widths = list(map(lambda x : 0, fields))
+        widths = list(map(lambda x: 0, fields))
         for i in data:
             for j, field in enumerate(fields):
                 widths[j] = max(widths[j], max(len(field), len(unicode(i[field]))))

From 98ebcf2a05fcf5bf0b9d433106156893d5322a6e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 13:26:07 +0530
Subject: [PATCH 19/28] ...

---
 src/calibre/db/tests/writing.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/calibre/db/tests/writing.py b/src/calibre/db/tests/writing.py
index c54d21f055..c37a173ae4 100644
--- a/src/calibre/db/tests/writing.py
+++ b/src/calibre/db/tests/writing.py
@@ -36,7 +36,7 @@ def create_setter(self, name, setter=None):
             ans = lambda db:partial(getattr(db, setter), commit=True)
         return ans
 
-    def create_test(self, name, vals, getter=None, setter=None ):
+    def create_test(self, name, vals, getter=None, setter=None):
         T = namedtuple('Test', 'name vals getter setter')
         return T(name, vals, self.create_getter(name, getter),
                  self.create_setter(name, setter))
@@ -72,7 +72,7 @@ def run_tests(self, tests):
                             test.name, old_sqlite_res, sqlite_res))
                 del db
 
-    def test_one_one(self): # {{{
+    def test_one_one(self):  # {{{
         'Test setting of values in one-one fields'
         tests = [self.create_test('#yesno', (True, False, 'true', 'false', None))]
         for name, getter, setter in (
@@ -113,7 +113,7 @@ def test_one_one(self): # {{{
         self.run_tests(tests)
     # }}}
 
-    def test_many_one_basic(self): # {{{
+    def test_many_one_basic(self):  # {{{
         'Test the different code paths for writing to a many-one field'
         cl = self.cloned_library
         cache = self.init_cache(cl)
@@ -200,7 +200,7 @@ def test_many_one_basic(self): # {{{
 
     # }}}
 
-    def test_many_many_basic(self): # {{{
+    def test_many_many_basic(self):  # {{{
         'Test the different code paths for writing to a many-many field'
         cl = self.cloned_library
         cache = self.init_cache(cl)
@@ -290,10 +290,9 @@ def test_many_many_basic(self): # {{{
             ae(c.field_for('sort', 1), 'Moose, The')
             ae(c.field_for('sort', 2), 'Cat')
 
-
     # }}}
 
-    def test_dirtied(self): # {{{
+    def test_dirtied(self):  # {{{
         'Test the setting of the dirtied flag and the last_modified column'
         cl = self.cloned_library
         cache = self.init_cache(cl)
@@ -308,7 +307,7 @@ def test_dirtied(self): # {{{
         from datetime import timedelta
         utime = prev+timedelta(days=1)
         onowf = c.nowf
-        c.nowf = lambda : utime
+        c.nowf = lambda: utime
         try:
             ae(sf('title', {3:'xxx'}), set([3]))
             self.assertTrue(3 in cache.dirtied_cache)
@@ -322,7 +321,7 @@ def test_dirtied(self): # {{{
             c.nowf = onowf
     # }}}
 
-    def test_backup(self): # {{{
+    def test_backup(self):  # {{{
         'Test the automatic backup of changed metadata'
         cl = self.cloned_library
         cache = self.init_cache(cl)

From 6bfcd2536a9111bcb34436aa00b378351c97e952 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 13:53:05 +0530
Subject: [PATCH 20/28] pep8 compliance

---
 src/calibre/web/feeds/news.py | 77 +++++++++++++++++------------------
 1 file changed, 37 insertions(+), 40 deletions(-)

diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 0e0afe2bc4..e0b31c7fcf 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -219,7 +219,7 @@ class BasicNewsRecipe(Recipe):
     #:    }
     #:
     #: All keys are optional. For a full explanantion of the search criteria, see
-    #: `Beautiful Soup <http://www.crummy.com/software/BeautifulSoup/documentation.html#The basic find method: findAll(name, attrs, recursive, text, limit, **kwargs)>`_
+    #: `Beautiful Soup <http://www.crummy.com/software/BeautifulSoup/bs3/documentation.html#Searching%20the%20Parse%20Tree>`_
     #: A common example::
     #:
     #:   remove_tags = [dict(name='div', attrs={'class':'advert'})]
@@ -556,7 +556,7 @@ def get_article_url(self, article):
                 url = article[key]
                 if url and url.startswith('http://'):
                     return url
-        ans = article.get('link',  None)
+        ans = article.get('link', None)
         if not ans and getattr(article, 'links', None):
             for item in article.links:
                 if item.get('rel', 'alternate') == 'alternate':
@@ -725,7 +725,7 @@ def sort_index_by(self, index, weights):
         `weights`: A dictionary that maps weights to titles. If any titles
         in index are not in weights, they are assumed to have a weight of 0.
         '''
-        weights = defaultdict(lambda : 0, weights)
+        weights = defaultdict(lambda: 0, weights)
         index.sort(cmp=lambda x, y: cmp(weights[x], weights[y]))
         return index
 
@@ -860,8 +860,8 @@ def __init__(self, options, log, progress_reporter):
             if isinstance(self.feeds, basestring):
                 self.feeds = [self.feeds]
 
-        if self.needs_subscription and (\
-                self.username is None or self.password is None or \
+        if self.needs_subscription and (
+                self.username is None or self.password is None or
                 (not self.username and not self.password)):
             if self.needs_subscription != 'optional':
                 raise ValueError(_('The "%s" recipe needs a username and password.')%self.title)
@@ -870,7 +870,7 @@ def __init__(self, options, log, progress_reporter):
         self.image_map, self.image_counter = {}, 1
         self.css_map = {}
 
-        web2disk_cmdline = [ 'web2disk',
+        web2disk_cmdline = ['web2disk',
             '--timeout', str(self.timeout),
             '--max-recursions', str(self.recursions),
             '--delay', str(self.delay),
@@ -913,7 +913,6 @@ def __init__(self, options, log, progress_reporter):
         self.failed_downloads = []
         self.partial_failures = []
 
-
     def _postprocess_html(self, soup, first_fetch, job_info):
         if self.no_stylesheets:
             for link in list(soup.findAll('link', type=re.compile('css')))+list(soup.findAll('style')):
@@ -923,7 +922,8 @@ def _postprocess_html(self, soup, first_fetch, job_info):
             head = soup.find('body')
         if not head:
             head = soup.find(True)
-        style = BeautifulSoup(u'<style type="text/css" title="override_css">%s</style>'%(self.template_css +'\n\n'+(self.extra_css if self.extra_css else ''))).find('style')
+        style = BeautifulSoup(u'<style type="text/css" title="override_css">%s</style>'%(
+            self.template_css +'\n\n'+(self.extra_css if self.extra_css else ''))).find('style')
         head.insert(len(head.contents), style)
         if first_fetch and job_info:
             url, f, a, feed_len = job_info
@@ -969,7 +969,6 @@ def _postprocess_html(self, soup, first_fetch, job_info):
                 self.populate_article_metadata(article, ans, first_fetch)
         return ans
 
-
     def download(self):
         '''
         Download and pre-process all articles from the feeds in this recipe.
@@ -1041,12 +1040,12 @@ def description_limiter(cls, src):
 
     def feed2index(self, f, feeds):
         feed = feeds[f]
-        if feed.image_url is not None: # Download feed image
+        if feed.image_url is not None:  # Download feed image
             imgdir = os.path.join(self.output_dir, 'images')
             if not os.path.isdir(imgdir):
                 os.makedirs(imgdir)
 
-            if self.image_map.has_key(feed.image_url):
+            if feed.image_url in self.image_map:
                 feed.image_url = self.image_map[feed.image_url]
             else:
                 bn = urlparse.urlsplit(feed.image_url).path
@@ -1065,7 +1064,6 @@ def feed2index(self, f, feeds):
             if isinstance(feed.image_url, str):
                 feed.image_url = feed.image_url.decode(sys.getfilesystemencoding(), 'strict')
 
-
         templ = (templates.TouchscreenFeedTemplate if self.touchscreen else
                     templates.FeedTemplate)
         templ = templ(lang=self.lang_for_html)
@@ -1074,7 +1072,6 @@ def feed2index(self, f, feeds):
         return templ.generate(f, feeds, self.description_limiter,
                               extra_css=css).render(doctype='xhtml')
 
-
     def _fetch_article(self, url, dir_, f, a, num_of_feeds):
         br = self.browser
         if self.get_browser.im_func is BasicNewsRecipe.get_browser.im_func:
@@ -1117,7 +1114,7 @@ def fetch_embedded_article(self, article, dir, f, a, num_of_feeds):
         with PersistentTemporaryFile('_feeds2disk.html') as pt:
             pt.write(raw)
             url = ('file:'+pt.name) if iswindows else ('file://'+pt.name)
-        return self._fetch_article(url, dir,  f, a, num_of_feeds)
+        return self._fetch_article(url, dir, f, a, num_of_feeds)
 
     def remove_duplicate_articles(self, feeds):
         seen_keys = defaultdict(set)
@@ -1223,9 +1220,9 @@ def build_index(self):
                 if not url:
                     continue
                 func, arg = (self.fetch_embedded_article, article) \
-                            if self.use_embedded_content or (self.use_embedded_content == None and feed.has_embedded_content()) \
+                            if self.use_embedded_content or (self.use_embedded_content is None and feed.has_embedded_content()) \
                             else \
-                            ((self.fetch_obfuscated_article if self.articles_are_obfuscated \
+                            ((self.fetch_obfuscated_article if self.articles_are_obfuscated
                               else self.fetch_article), url)
                 req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
                                       {}, (f, a), self.article_downloaded,
@@ -1235,13 +1232,11 @@ def build_index(self):
                 req.feed_dir = feed_dir
                 self.jobs.append(req)
 
-
         self.jobs_done = 0
         tp = ThreadPool(self.simultaneous_downloads)
         for req in self.jobs:
             tp.putRequest(req, block=True, timeout=0)
 
-
         self.report_progress(0, _('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
         while True:
             try:
@@ -1328,7 +1323,6 @@ def _download_masthead(self, mu):
         if os.path.exists(mpath):
             os.remove(mpath)
 
-
     def download_masthead(self, url):
         try:
             self._download_masthead(url)
@@ -1455,7 +1449,6 @@ def create_opf(self, feeds, dir=None):
         self.play_order_counter = 0
         self.play_order_map = {}
 
-
         def feed_index(num, parent):
             f = feeds[num]
             for j, a in enumerate(f):
@@ -1595,7 +1588,6 @@ def parse_feeds(self):
                 parsed_feeds.append(feed)
                 self.log.exception(msg)
 
-
         remove = [f for f in parsed_feeds if len(f) == 0 and
                 self.remove_empty_feeds]
         for f in remove:
@@ -1629,8 +1621,11 @@ def tag_to_string(self, tag, use_alt=True, normalize_whitespace=True):
                 res = self.tag_to_string(item)
                 if res:
                     strings.append(res)
-                elif use_alt and item.has_key('alt'):
-                    strings.append(item['alt'])
+                elif use_alt:
+                    try:
+                        strings.append(item['alt'])
+                    except KeyError:
+                        pass
         ans = u''.join(strings)
         if normalize_whitespace:
             ans = re.sub(r'\s+', ' ', ans)
@@ -1646,24 +1641,26 @@ def soup(cls, raw):
 
     @classmethod
     def adeify_images(cls, soup):
-         '''
-         If your recipe when converted to EPUB has problems with images when
-         viewed in Adobe Digital Editions, call this method from within
-         :meth:`postprocess_html`.
-         '''
-         for item in soup.findAll('img'):
-             for attrib in ['height','width','border','align','style']:
-                 if item.has_key(attrib):
+        '''
+        If your recipe when converted to EPUB has problems with images when
+        viewed in Adobe Digital Editions, call this method from within
+        :meth:`postprocess_html`.
+        '''
+        for item in soup.findAll('img'):
+            for attrib in ['height','width','border','align','style']:
+                try:
                     del item[attrib]
-             oldParent = item.parent
-             myIndex = oldParent.contents.index(item)
-             item.extract()
-             divtag = Tag(soup,'div')
-             brtag  = Tag(soup,'br')
-             oldParent.insert(myIndex,divtag)
-             divtag.append(item)
-             divtag.append(brtag)
-         return soup
+                except KeyError:
+                    pass
+            oldParent = item.parent
+            myIndex = oldParent.contents.index(item)
+            item.extract()
+            divtag = Tag(soup,'div')
+            brtag  = Tag(soup,'br')
+            oldParent.insert(myIndex,divtag)
+            divtag.append(item)
+            divtag.append(brtag)
+        return soup
 
 
 class CustomIndexRecipe(BasicNewsRecipe):

From adb0859108756d521be128bde8620eeab0cc945b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 14:03:02 +0530
Subject: [PATCH 21/28] pep8 compliance

---
 src/calibre/db/backend.py         | 49 +++++++++++-----------
 src/calibre/db/cache.py           | 17 ++++----
 src/calibre/db/categories.py      |  4 +-
 src/calibre/db/fields.py          | 36 ++++++++--------
 src/calibre/db/locking.py         |  4 +-
 src/calibre/db/schema_upgrades.py |  3 +-
 src/calibre/db/search.py          | 69 ++++++++++++++++---------------
 src/calibre/db/tables.py          |  2 +-
 src/calibre/db/view.py            |  6 +--
 src/calibre/db/write.py           |  2 +-
 10 files changed, 98 insertions(+), 94 deletions(-)

diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py
index c2beb25e2e..0194cfc2ae 100644
--- a/src/calibre/db/backend.py
+++ b/src/calibre/db/backend.py
@@ -41,8 +41,7 @@
 '''
 
 
-
-class DynamicFilter(object): # {{{
+class DynamicFilter(object):  # {{{
 
     'No longer used, present for legacy compatibility'
 
@@ -57,7 +56,7 @@ def change(self, ids):
         self.ids = frozenset(ids)
 # }}}
 
-class DBPrefs(dict): # {{{
+class DBPrefs(dict):  # {{{
 
     'Store preferences as key:value pairs in the db'
 
@@ -114,9 +113,10 @@ def get_namespaced(self, namespace, key, default=None):
             return default
 
     def set_namespaced(self, namespace, key, val):
-        if u':' in key: raise KeyError('Colons are not allowed in keys')
-        if u':' in namespace: raise KeyError('Colons are not allowed in'
-                ' the namespace')
+        if u':' in key:
+            raise KeyError('Colons are not allowed in keys')
+        if u':' in namespace:
+            raise KeyError('Colons are not allowed in the namespace')
         key = u'namespaced:%s:%s'%(namespace, key)
         self[key] = val
 
@@ -170,7 +170,8 @@ def pynocase(one, two, encoding='utf-8'):
     return cmp(one.lower(), two.lower())
 
 def _author_to_author_sort(x):
-    if not x: return ''
+    if not x:
+        return ''
     return author_to_author_sort(x.replace('|', ','))
 
 def icu_collator(s1, s2):
@@ -239,9 +240,9 @@ def finalize(ctxt):
 
 # }}}
 
-class Connection(apsw.Connection): # {{{
+class Connection(apsw.Connection):  # {{{
 
-    BUSY_TIMEOUT = 2000 # milliseconds
+    BUSY_TIMEOUT = 2000  # milliseconds
 
     def __init__(self, path):
         apsw.Connection.__init__(self, path)
@@ -257,7 +258,7 @@ def __init__(self, path):
         self.createscalarfunction('title_sort', title_sort, 1)
         self.createscalarfunction('author_to_author_sort',
                 _author_to_author_sort, 1)
-        self.createscalarfunction('uuid4', lambda : str(uuid.uuid4()),
+        self.createscalarfunction('uuid4', lambda: str(uuid.uuid4()),
                 0)
 
         # Dummy functions for dynamically created filters
@@ -380,7 +381,7 @@ def __init__(self, library_path, default_prefs=None, read_only=False):
         self.initialize_custom_columns()
         self.initialize_tables()
 
-    def initialize_prefs(self, default_prefs): # {{{
+    def initialize_prefs(self, default_prefs):  # {{{
         self.prefs = DBPrefs(self)
 
         if default_prefs is not None and not self._exists:
@@ -493,7 +494,7 @@ def migrate_preference(key, default):
             self.prefs.set('user_categories', user_cats)
     # }}}
 
-    def initialize_custom_columns(self): # {{{
+    def initialize_custom_columns(self):  # {{{
         with self.conn:
             # Delete previously marked custom columns
             for record in self.conn.get(
@@ -634,11 +635,11 @@ def adapt_number(x, d):
 
         self.custom_data_adapters = {
                 'float': adapt_number,
-                'int':   adapt_number,
-                'rating':lambda x,d : x if x is None else min(10., max(0., float(x))),
-                'bool':  adapt_bool,
+                'int': adapt_number,
+                'rating':lambda x,d: x if x is None else min(10., max(0., float(x))),
+                'bool': adapt_bool,
                 'comments': lambda x,d: adapt_text(x, {'is_multiple':False}),
-                'datetime' : adapt_datetime,
+                'datetime': adapt_datetime,
                 'text':adapt_text,
                 'series':adapt_text,
                 'enumeration': adapt_enum
@@ -661,7 +662,7 @@ def adapt_number(x, d):
 
     # }}}
 
-    def initialize_tables(self): # {{{
+    def initialize_tables(self):  # {{{
         tables = self.tables = {}
         for col in ('title', 'sort', 'author_sort', 'series_index', 'comments',
                 'timestamp', 'pubdate', 'uuid', 'path', 'cover',
@@ -866,8 +867,8 @@ def read_tables(self):
         Read all data from the db into the python in-memory tables
         '''
 
-        with self.conn: # Use a single transaction, to ensure nothing modifies
-                        # the db while we are reading
+        with self.conn:  # Use a single transaction, to ensure nothing modifies
+                         # the db while we are reading
             for table in self.tables.itervalues():
                 try:
                     table.read(self)
@@ -885,7 +886,7 @@ def format_abspath(self, book_id, fmt, fname, path):
             return fmt_path
         try:
             candidates = glob.glob(os.path.join(path, '*'+fmt))
-        except: # If path contains strange characters this throws an exc
+        except:  # If path contains strange characters this throws an exc
             candidates = []
         if fmt and candidates and os.path.exists(candidates[0]):
             shutil.copyfile(candidates[0], fmt_path)
@@ -954,7 +955,7 @@ def copy_format_to(self, book_id, fmt, fname, path, dest,
                         if path != dest:
                             os.rename(path, dest)
                     except:
-                        pass # Nothing too catastrophic happened, the cases mismatch, that's all
+                        pass  # Nothing too catastrophic happened, the cases mismatch, that's all
                 else:
                     windows_atomic_move.copy_path_to(path, dest)
         else:
@@ -970,7 +971,7 @@ def copy_format_to(self, book_id, fmt, fname, path, dest,
                         try:
                             os.rename(path, dest)
                         except:
-                            pass # Nothing too catastrophic happened, the cases mismatch, that's all
+                            pass  # Nothing too catastrophic happened, the cases mismatch, that's all
                 else:
                     if use_hardlink:
                         try:
@@ -1021,7 +1022,7 @@ def update_path(self, book_id, title, author, path_field, formats_field):
             if not os.path.exists(tpath):
                 os.makedirs(tpath)
 
-            if source_ok: # Migrate existing files
+            if source_ok:  # Migrate existing files
                 dest = os.path.join(tpath, 'cover.jpg')
                 self.copy_cover_to(current_path, dest,
                         windows_atomic_move=wam, use_hardlink=True)
@@ -1064,7 +1065,7 @@ def update_path(self, book_id, title, author, path_field, formats_field):
                         os.rename(os.path.join(curpath, oldseg),
                                 os.path.join(curpath, newseg))
                     except:
-                        break # Fail silently since nothing catastrophic has happened
+                        break  # Fail silently since nothing catastrophic has happened
                 curpath = os.path.join(curpath, newseg)
 
     def write_backup(self, path, raw):
diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py
index 630757497b..0fa280d997 100644
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@@ -86,7 +86,7 @@ def initialize_dynamic(self):
         # Assumption is that someone else will fix them if they change.
         self.field_metadata.remove_dynamic_categories()
         for user_cat in sorted(self._pref('user_categories', {}).iterkeys(), key=sort_key):
-            cat_name = '@' + user_cat # add the '@' to avoid name collision
+            cat_name = '@' + user_cat  # add the '@' to avoid name collision
             self.field_metadata.add_user_category(label=cat_name, name=user_cat)
 
         # add grouped search term user categories
@@ -118,7 +118,7 @@ def initialize_dynamic(self):
     def field_metadata(self):
         return self.backend.field_metadata
 
-    def _get_metadata(self, book_id, get_user_categories=True): # {{{
+    def _get_metadata(self, book_id, get_user_categories=True):  # {{{
         mi = Metadata(None, template_cache=self.formatter_template_cache)
         author_ids = self._field_ids_for('authors', book_id)
         aut_list = [self._author_data(i) for i in author_ids]
@@ -403,16 +403,19 @@ def cover(self, book_id,
         '''
         if as_file:
             ret = SpooledTemporaryFile(SPOOL_SIZE)
-            if not self.copy_cover_to(book_id, ret): return
+            if not self.copy_cover_to(book_id, ret):
+                return
             ret.seek(0)
         elif as_path:
             pt = PersistentTemporaryFile('_dbcover.jpg')
             with pt:
-                if not self.copy_cover_to(book_id, pt): return
+                if not self.copy_cover_to(book_id, pt):
+                    return
             ret = pt.name
         else:
             buf = BytesIO()
-            if not self.copy_cover_to(book_id, buf): return
+            if not self.copy_cover_to(book_id, buf):
+                return
             ret = buf.getvalue()
             if as_image:
                 from PyQt4.Qt import QImage
@@ -669,7 +672,7 @@ def set_field(self, name, book_id_to_val_map, allow_case_change=True):
                 else:
                     v = sid = None
                 if name.startswith('#') and sid is None:
-                    sid = 1.0 # The value will be set to 1.0 in the db table
+                    sid = 1.0  # The value will be set to 1.0 in the db table
                 bimap[k] = v
                 if sid is not None:
                     simap[k] = sid
@@ -808,7 +811,7 @@ def dump_metadata(self, book_ids=None, remove_from_dirtied=True,
 
     # }}}
 
-class SortKey(object): # {{{
+class SortKey(object):  # {{{
 
     def __init__(self, fields, sort_keys, book_id):
         self.orders = tuple(1 if f[1] else -1 for f in fields)
diff --git a/src/calibre/db/categories.py b/src/calibre/db/categories.py
index f49789e16c..3f7bbb9e61 100644
--- a/src/calibre/db/categories.py
+++ b/src/calibre/db/categories.py
@@ -18,7 +18,7 @@
 from calibre.utils.icu import sort_key
 from calibre.utils.search_query_parser import saved_searches
 
-CATEGORY_SORTS = ('name', 'popularity', 'rating') # This has to be a tuple not a set
+CATEGORY_SORTS = ('name', 'popularity', 'rating')  # This has to be a tuple not a set
 
 class Tag(object):
 
@@ -218,7 +218,7 @@ def get_categories(dbcache, sort='name', book_ids=None, icon_map=None):
                     else:
                         items.append(taglist[label][n])
                 # else: do nothing, to not include nodes w zero counts
-            cat_name = '@' + user_cat # add the '@' to avoid name collision
+            cat_name = '@' + user_cat  # add the '@' to avoid name collision
             # Not a problem if we accumulate entries in the icon map
             if icon_map is not None:
                 icon_map[cat_name] = icon_map['user:']
diff --git a/src/calibre/db/fields.py b/src/calibre/db/fields.py
index e0074de7d1..20d0d75ff4 100644
--- a/src/calibre/db/fields.py
+++ b/src/calibre/db/fields.py
@@ -31,7 +31,7 @@ def __init__(self, name, table):
         self.table_type = self.table.table_type
         self._sort_key = (sort_key if dt in ('text', 'series', 'enumeration') else lambda x: x)
         self._default_sort_key = ''
-        if dt in { 'int', 'float', 'rating' }:
+        if dt in {'int', 'float', 'rating'}:
             self._default_sort_key = 0
         elif dt == 'bool':
             self._default_sort_key = None
@@ -138,7 +138,7 @@ def __iter__(self):
         return self.table.book_col_map.iterkeys()
 
     def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
-        return {id_ : self._sort_key(self.table.book_col_map.get(id_,
+        return {id_: self._sort_key(self.table.book_col_map.get(id_,
             self._default_sort_key)) for id_ in all_book_ids}
 
     def iter_searchable_values(self, get_metadata, candidates, default_value=None):
@@ -183,7 +183,7 @@ def get_value_with_cache(self, book_id, get_metadata):
         return ans
 
     def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
-        return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
+        return {id_: sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
                 all_book_ids}
 
     def iter_searchable_values(self, get_metadata, candidates, default_value=None):
@@ -245,7 +245,7 @@ def __iter__(self):
         return iter(())
 
     def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
-        return {id_ : self.for_book(id_) for id_ in
+        return {id_: self.for_book(id_) for id_ in
                 all_book_ids}
 
     def iter_searchable_values(self, get_metadata, candidates, default_value=None):
@@ -280,12 +280,12 @@ def __iter__(self):
         return self.table.id_map.iterkeys()
 
     def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
-        ans = {id_ : self.table.book_col_map.get(id_, None)
+        ans = {id_: self.table.book_col_map.get(id_, None)
                 for id_ in all_book_ids}
-        sk_map = {cid : (self._default_sort_key if cid is None else
+        sk_map = {cid: (self._default_sort_key if cid is None else
                 self._sort_key(self.table.id_map[cid]))
                 for cid in ans.itervalues()}
-        return {id_ : sk_map[cid] for id_, cid in ans.iteritems()}
+        return {id_: sk_map[cid] for id_, cid in ans.iteritems()}
 
     def iter_searchable_values(self, get_metadata, candidates, default_value=None):
         cbm = self.table.col_book_map
@@ -327,14 +327,14 @@ def __iter__(self):
         return self.table.id_map.iterkeys()
 
     def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
-        ans = {id_ : self.table.book_col_map.get(id_, ())
+        ans = {id_: self.table.book_col_map.get(id_, ())
                 for id_ in all_book_ids}
         all_cids = set()
         for cids in ans.itervalues():
             all_cids = all_cids.union(set(cids))
-        sk_map = {cid : self._sort_key(self.table.id_map[cid])
+        sk_map = {cid: self._sort_key(self.table.id_map[cid])
                 for cid in all_cids}
-        return {id_ : (tuple(sk_map[cid] for cid in cids) if cids else
+        return {id_: (tuple(sk_map[cid] for cid in cids) if cids else
                         (self._default_sort_key,))
                 for id_, cids in ans.iteritems()}
 
@@ -369,9 +369,9 @@ def for_book(self, book_id, default_value=None):
 
     def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
         'Sort by identifier keys'
-        ans = {id_ : self.table.book_col_map.get(id_, ())
+        ans = {id_: self.table.book_col_map.get(id_, ())
                 for id_ in all_book_ids}
-        return {id_ : (tuple(sorted(cids.iterkeys())) if cids else
+        return {id_: (tuple(sorted(cids.iterkeys())) if cids else
                         (self._default_sort_key,))
                 for id_, cids in ans.iteritems()}
 
@@ -397,9 +397,9 @@ class AuthorsField(ManyToManyField):
 
     def author_data(self, author_id):
         return {
-            'name' : self.table.id_map[author_id],
-            'sort' : self.table.asort_map[author_id],
-            'link' : self.table.alink_map[author_id],
+            'name': self.table.id_map[author_id],
+            'sort': self.table.asort_map[author_id],
+            'link': self.table.alink_map[author_id],
         }
 
     def category_sort_value(self, item_id, book_ids, lang_map):
@@ -505,9 +505,9 @@ def get_news_category(self, tag_class, book_ids=None):
 
 def create_field(name, table):
     cls = {
-            ONE_ONE : OneToOneField,
-            MANY_ONE : ManyToOneField,
-            MANY_MANY : ManyToManyField,
+            ONE_ONE: OneToOneField,
+            MANY_ONE: ManyToOneField,
+            MANY_MANY: ManyToManyField,
         }[table.table_type]
     if name == 'authors':
         cls = AuthorsField
diff --git a/src/calibre/db/locking.py b/src/calibre/db/locking.py
index d08c7b99fe..0791a5ac07 100644
--- a/src/calibre/db/locking.py
+++ b/src/calibre/db/locking.py
@@ -39,7 +39,7 @@ def create_locks():
     l = SHLock()
     return RWLockWrapper(l), RWLockWrapper(l, is_shared=False)
 
-class SHLock(object): # {{{
+class SHLock(object):  # {{{
     '''
     Shareable lock class. Used to implement the Multiple readers-single writer
     paradigm. As best as I can tell, neither writer nor reader starvation
@@ -191,7 +191,7 @@ def _take_waiter(self):
         try:
             return self._free_waiters.pop()
         except IndexError:
-            return Condition(self._lock)#, verbose=True)
+            return Condition(self._lock)
 
     def _return_waiter(self, waiter):
         self._free_waiters.append(waiter)
diff --git a/src/calibre/db/schema_upgrades.py b/src/calibre/db/schema_upgrades.py
index f3ca6f9852..c8eaa748c7 100644
--- a/src/calibre/db/schema_upgrades.py
+++ b/src/calibre/db/schema_upgrades.py
@@ -172,7 +172,6 @@ def upgrade_version_5(self):
         '''
         )
 
-
     def upgrade_version_6(self):
         'Show authors in order'
         self.conn.execute('''
@@ -337,7 +336,7 @@ def create_std_tag_browser_view(table_name, column_name,
                 FROM {tn};
 
                 '''.format(tn=table_name, cn=column_name,
-                           vcn=view_column_name, scn= sort_column_name))
+                           vcn=view_column_name, scn=sort_column_name))
             self.conn.execute(script)
 
         def create_cust_tag_browser_view(table_name, link_table_name):
diff --git a/src/calibre/db/search.py b/src/calibre/db/search.py
index 57039e191d..c7fed18f9d 100644
--- a/src/calibre/db/search.py
+++ b/src/calibre/db/search.py
@@ -64,7 +64,7 @@ def _match(query, value, matchkind, use_primary_find_in_search=True):
     else:
         internal_match_ok = False
     for t in value:
-        try:     ### ignore regexp exceptions, required because search-ahead tries before typing is finished
+        try:  # ignore regexp exceptions, required because search-ahead tries before typing is finished
             t = icu_lower(t)
             if (matchkind == EQUALS_MATCH):
                 if internal_match_ok:
@@ -95,20 +95,20 @@ def _match(query, value, matchkind, use_primary_find_in_search=True):
     return False
 # }}}
 
-class DateSearch(object): # {{{
+class DateSearch(object):  # {{{
 
     def __init__(self):
         self.operators = {
-            '='   : (1, self.eq),
-            '!='  : (2, self.ne),
-            '>'   : (1, self.gt),
-            '>='  : (2, self.ge),
-            '<'   : (1, self.lt),
-            '<='  : (2, self.le),
+            '=': (1, self.eq),
+            '!=': (2, self.ne),
+            '>': (1, self.gt),
+            '>=': (2, self.ge),
+            '<': (1, self.lt),
+            '<=': (2, self.le),
         }
-        self.local_today         = { '_today', 'today', icu_lower(_('today')) }
-        self.local_yesterday     = { '_yesterday', 'yesterday', icu_lower(_('yesterday')) }
-        self.local_thismonth     = { '_thismonth', 'thismonth', icu_lower(_('thismonth')) }
+        self.local_today         = {'_today', 'today', icu_lower(_('today'))}
+        self.local_yesterday     = {'_yesterday', 'yesterday', icu_lower(_('yesterday'))}
+        self.local_thismonth     = {'_thismonth', 'thismonth', icu_lower(_('thismonth'))}
         self.daysago_pat = re.compile(r'(%s|daysago|_daysago)$'%_('daysago'))
 
     def eq(self, dbdate, query, field_count):
@@ -216,16 +216,16 @@ def __call__(self, query, field_iter):
         return matches
 # }}}
 
-class NumericSearch(object): # {{{
+class NumericSearch(object):  # {{{
 
     def __init__(self):
         self.operators = {
-            '=':( 1, lambda r, q: r == q ),
-            '>':( 1, lambda r, q: r is not None and r > q ),
-            '<':( 1, lambda r, q: r is not None and r < q ),
-            '!=':( 2, lambda r, q: r != q ),
-            '>=':( 2, lambda r, q: r is not None and r >= q ),
-            '<=':( 2, lambda r, q: r is not None and r <= q )
+            '=':(1, lambda r, q: r == q),
+            '>':(1, lambda r, q: r is not None and r > q),
+            '<':(1, lambda r, q: r is not None and r < q),
+            '!=':(2, lambda r, q: r != q),
+            '>=':(2, lambda r, q: r is not None and r >= q),
+            '<=':(2, lambda r, q: r is not None and r <= q)
         }
 
     def __call__(self, query, field_iter, location, datatype, candidates, is_many=False):
@@ -267,7 +267,7 @@ def __call__(self, query, field_iter, location, datatype, candidates, is_many=Fa
                 p, relop = self.operators['=']
 
             cast = int
-            if  dt == 'rating':
+            if dt == 'rating':
                 cast = lambda x: 0 if x is None else int(x)
                 adjust = lambda x: x/2
             elif dt in ('float', 'composite'):
@@ -303,7 +303,7 @@ def __call__(self, query, field_iter, location, datatype, candidates, is_many=Fa
 
 # }}}
 
-class BooleanSearch(object): # {{{
+class BooleanSearch(object):  # {{{
 
     def __init__(self):
         self.local_no        = icu_lower(_('no'))
@@ -324,27 +324,27 @@ def __call__(self, query, field_iter, bools_are_tristate):
         for val, book_ids in field_iter():
             val = force_to_bool(val)
             if not bools_are_tristate:
-                if val is None or not val: # item is None or set to false
-                    if query in { self.local_no, self.local_unchecked, 'no', '_no', 'false' }:
+                if val is None or not val:  # item is None or set to false
+                    if query in {self.local_no, self.local_unchecked, 'no', '_no', 'false'}:
                         matches |= book_ids
-                else: # item is explicitly set to true
-                    if query in { self.local_yes, self.local_checked, 'yes', '_yes', 'true' }:
+                else:  # item is explicitly set to true
+                    if query in {self.local_yes, self.local_checked, 'yes', '_yes', 'true'}:
                         matches |= book_ids
             else:
                 if val is None:
-                    if query in { self.local_empty, self.local_blank, 'empty', '_empty', 'false' }:
+                    if query in {self.local_empty, self.local_blank, 'empty', '_empty', 'false'}:
                         matches |= book_ids
-                elif not val: # is not None and false
-                    if query in { self.local_no, self.local_unchecked, 'no', '_no', 'true' }:
+                elif not val:  # is not None and false
+                    if query in {self.local_no, self.local_unchecked, 'no', '_no', 'true'}:
                         matches |= book_ids
-                else: # item is not None and true
-                    if query in { self.local_yes, self.local_checked, 'yes', '_yes', 'true' }:
+                else:  # item is not None and true
+                    if query in {self.local_yes, self.local_checked, 'yes', '_yes', 'true'}:
                         matches |= book_ids
         return matches
 
 # }}}
 
-class KeyPairSearch(object): # {{{
+class KeyPairSearch(object):  # {{{
 
     def __call__(self, query, field_iter, candidates, use_primary_find):
         matches = set()
@@ -547,11 +547,12 @@ def get_matches(self, location, query, candidates=None,
         field_metadata = {}
 
         for x, fm in self.field_metadata.iteritems():
-            if x.startswith('@'): continue
+            if x.startswith('@'):
+                continue
             if fm['search_terms'] and x != 'series_sort':
                 all_locs.add(x)
                 field_metadata[x] = fm
-                if fm['datatype'] in { 'composite', 'text', 'comments', 'series', 'enumeration' }:
+                if fm['datatype'] in {'composite', 'text', 'comments', 'series', 'enumeration'}:
                     text_fields.add(x)
 
         locations = all_locs if location == 'all' else {location}
@@ -687,8 +688,8 @@ def __call__(self, dbcache, query, search_restriction, virtual_fields=None):
             dbcache, all_book_ids, dbcache.pref('grouped_search_terms'),
             self.date_search, self.num_search, self.bool_search,
             self.keypair_search,
-            prefs[ 'limit_search_columns' ],
-            prefs[ 'limit_search_columns_to' ], self.all_search_locations,
+            prefs['limit_search_columns'],
+            prefs['limit_search_columns_to'], self.all_search_locations,
             virtual_fields)
 
         try:
diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py
index bbc5e3bdef..fc62fbe951 100644
--- a/src/calibre/db/tables.py
+++ b/src/calibre/db/tables.py
@@ -82,7 +82,7 @@ def read(self, db):
             self.metadata['column'], self.metadata['table'])):
             self.book_col_map[row[0]] = self.unserialize(row[1])
 
-class  PathTable(OneToOneTable):
+class PathTable(OneToOneTable):
 
     def set_path(self, book_id, path, db):
         self.book_col_map[book_id] = path
diff --git a/src/calibre/db/view.py b/src/calibre/db/view.py
index e0f99eede0..633fc6d9f9 100644
--- a/src/calibre/db/view.py
+++ b/src/calibre/db/view.py
@@ -60,10 +60,10 @@ def __init__(self, cache):
             else:
                 try:
                     self._field_getters[idx] = {
-                        'id'      : self._get_id,
-                        'au_map'  : self.get_author_data,
+                        'id': self._get_id,
+                        'au_map': self.get_author_data,
                         'ondevice': self.get_ondevice,
-                        'marked'  : self.get_marked,
+                        'marked': self.get_marked,
                     }[col]
                 except KeyError:
                     self._field_getters[idx] = partial(self.get, col)
diff --git a/src/calibre/db/write.py b/src/calibre/db/write.py
index 29a27e16bf..87e7179661 100644
--- a/src/calibre/db/write.py
+++ b/src/calibre/db/write.py
@@ -417,7 +417,7 @@ def many_many(book_id_val_map, db, field, allow_case_change, *args):
 
 # }}}
 
-def identifiers(book_id_val_map, db, field, *args): # {{{
+def identifiers(book_id_val_map, db, field, *args):  # {{{
     table = field.table
     updates = set()
     for book_id, identifiers in book_id_val_map.iteritems():

From d449d6b0bb0bb136ed3a8f246a36f92522be1ccf Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 14:55:28 +0530
Subject: [PATCH 22/28] ...

---
 src/calibre/gui2/proceed.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/proceed.py b/src/calibre/gui2/proceed.py
index 9bdf48e086..d09c247bd0 100644
--- a/src/calibre/gui2/proceed.py
+++ b/src/calibre/gui2/proceed.py
@@ -123,7 +123,8 @@ def do_resize(self):
         self.resize(sz)
 
     def show_question(self):
-        if self.isVisible(): return
+        if self.isVisible():
+            return
         if self.questions:
             question = self.questions[0]
             self.msg_label.setText(question.msg)

From 43331acbe57bf4497a5d08613698bbdee854eddd Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 15:00:31 +0530
Subject: [PATCH 23/28] Update San Francisco Chronicle

---
 recipes/san_fran_chronicle.recipe | 46 ++++++-------------------------
 1 file changed, 8 insertions(+), 38 deletions(-)

diff --git a/recipes/san_fran_chronicle.recipe b/recipes/san_fran_chronicle.recipe
index cfdf0b07f0..37f15dbe87 100644
--- a/recipes/san_fran_chronicle.recipe
+++ b/recipes/san_fran_chronicle.recipe
@@ -7,7 +7,6 @@
 '''
 
 from calibre.web.feeds.news import BasicNewsRecipe
-import re
 
 class SanFranciscoChronicle(BasicNewsRecipe):
     title                 = u'San Francisco Chronicle'
@@ -19,16 +18,7 @@ class SanFranciscoChronicle(BasicNewsRecipe):
     max_articles_per_feed = 100
     no_stylesheets        = True
     use_embedded_content  = False
-
-
-
-    remove_tags_before  = {'id':'printheader'}
-
-    remove_tags         = [
-                            dict(name='div',attrs={'id':'printheader'})
-                           ,dict(name='a', attrs={'href':re.compile('http://ads\.pheedo\.com.*')})
-                           ,dict(name='div',attrs={'id':'footer'})
-                          ]
+    auto_cleanup = True
 
     extra_css       = '''
                         h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
@@ -43,33 +33,13 @@ class SanFranciscoChronicle(BasicNewsRecipe):
                      '''
 
     feeds          = [
-                         (u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
+                         (u'Bay Area News', u'http://www.sfgate.com/bayarea/feed/Bay-Area-News-429.php'),
+                         (u'City Insider', u'http://www.sfgate.com/default/feed/City-Insider-Blog-573.php'),
+                         (u'Crime Scene', u'http://www.sfgate.com/rss/feed/Crime-Scene-Blog-599.php'),
+                         (u'Education News', u'http://www.sfgate.com/education/feed/Education-News-from-SFGate-430.php'),
+                         (u'National News', u'http://www.sfgate.com/rss/feed/National-News-RSS-Feed-435.php'),
+                         (u'Weird News', u'http://www.sfgate.com/weird/feed/Weird-News-RSS-Feed-433.php'),
+                         (u'World News', u'http://www.sfgate.com/rss/feed/World-News-From-SFGate-432.php'),
                      ]
 
-    def print_version(self,url):
-        url= url +"&type=printable"
-        return url
-
-    def get_article_url(self, article):
-        print str(article['title_detail']['value'])
-        url = article.get('guid',None)
-        url = "http://www.sfgate.com/cgi-bin/article.cgi?f="+url
-        if "Presented By:" in str(article['title_detail']['value']):
-            url = ''
-        return url
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 

From 2463ff774033dc3e2ec1361e84503f25f6f571eb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 15:52:59 +0530
Subject: [PATCH 24/28] Add note about not running file syncing tools and
 calibre at the same time

---
 manual/faq.rst | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/manual/faq.rst b/manual/faq.rst
index ceaa2bc735..a73a380029 100644
--- a/manual/faq.rst
+++ b/manual/faq.rst
@@ -647,12 +647,17 @@ computers. Run |app| on a single computer and access it via the Content Server
 or a Remote Desktop solution.
 
 If you must share the actual library, use a file syncing tool like
-DropBox or rsync or Microsoft SkyDrive instead of a networked drive. Even with
-these tools there is danger of data corruption/loss, so only do this if you are
-willing to live with that risk. In particular, be aware that **Google Drive**
-is incompatible with |app|, if you put your |app| library in Google Drive, you
-*will* suffer data loss. See
-`this thread <http://www.mobileread.com/forums/showthread.php?t=205581>`_ for details.
+DropBox or rsync or Microsoft SkyDrive instead of a networked drive. If you are
+using a file-syncing tool it is **essential** that you make sure that both
+|app| and the file syncing tool do not try to access the |app| library at the
+same time. In other words, **do not** run the file syncing tool and |app| at
+the same time.
+
+Even with these tools there is danger of data corruption/loss, so only do this
+if you are willing to live with that risk. In particular, be aware that
+**Google Drive** is incompatible with |app|, if you put your |app| library in
+Google Drive, **you will suffer data loss**. See `this thread
+<http://www.mobileread.com/forums/showthread.php?t=205581>`_ for details.
 
 Content From The Web
 ---------------------

From f45ca90c271603b945dd2d03ba8ca2d802ca2125 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 16:25:37 +0530
Subject: [PATCH 25/28] Allow adding an action button to the process dialog

---
 src/calibre/gui2/proceed.py | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/calibre/gui2/proceed.py b/src/calibre/gui2/proceed.py
index d09c247bd0..67efe48b53 100644
--- a/src/calibre/gui2/proceed.py
+++ b/src/calibre/gui2/proceed.py
@@ -18,7 +18,8 @@
 
 Question = namedtuple('Question', 'payload callback cancel_callback '
         'title msg html_log log_viewer_title log_is_file det_msg '
-        'show_copy_button checkbox_msg checkbox_checked')
+        'show_copy_button checkbox_msg checkbox_checked action_callback '
+        'action_label action_icon')
 
 class ProceedQuestion(QDialog):
 
@@ -51,6 +52,8 @@ def __init__(self, parent):
         self.copy_button = self.bb.addButton(_('&Copy to clipboard'),
                 self.bb.ActionRole)
         self.copy_button.clicked.connect(self.copy_to_clipboard)
+        self.action_button = self.bb.addButton('', self.bb.ActionRole)
+        self.action_button.clicked.connect(self.action_clicked)
         self.show_det_msg = _('Show &details')
         self.hide_det_msg = _('Hide &details')
         self.det_msg_toggle = self.bb.addButton(self.show_det_msg, self.bb.ActionRole)
@@ -81,6 +84,12 @@ def copy_to_clipboard(self, *args):
                     unicode(self.det_msg.toPlainText())))
         self.copy_button.setText(_('Copied'))
 
+    def action_clicked(self):
+        if self.questions:
+            q = self.questions[0]
+            self.questions[0] = q._replace(callback=q.action_callback)
+        self.accept()
+
     def accept(self):
         if self.questions:
             payload, callback, cancel_callback = self.questions[0][:3]
@@ -131,6 +140,11 @@ def show_question(self):
             self.setWindowTitle(question.title)
             self.log_button.setVisible(bool(question.html_log))
             self.copy_button.setVisible(bool(question.show_copy_button))
+            self.action_button.setVisible(question.action_callback is not None)
+            if question.action_callback is not None:
+                self.action_button.setText(question.action_label or '')
+                self.action_button.setIcon(
+                    QIcon() if question.action_icon is None else question.action_icon)
             self.det_msg.setPlainText(question.det_msg or '')
             self.det_msg.setVisible(False)
             self.det_msg_toggle.setVisible(bool(question.det_msg))
@@ -146,7 +160,8 @@ def show_question(self):
 
     def __call__(self, callback, payload, html_log, log_viewer_title, title,
             msg, det_msg='', show_copy_button=False, cancel_callback=None,
-            log_is_file=False, checkbox_msg=None, checkbox_checked=False):
+            log_is_file=False, checkbox_msg=None, checkbox_checked=False,
+            action_callback=None, action_label=None, action_icon=None):
         '''
         A non modal popup that notifies the user that a background task has
         been completed. This class guarantees that only a single popup is
@@ -171,11 +186,19 @@ def __call__(self, callback, payload, html_log, log_viewer_title, title,
                              called with both the payload and the state of the
                              checkbox as arguments.
         :param checkbox_checked: If True the checkbox is checked by default.
+        :param action_callback: If not None, an extra button is added, which
+                                when clicked will cause action_callback to be called
+                                instead of callback. action_callback is called in
+                                exactly the same way as callback.
+        :param action_label: The text on the action button
+        :param action_icon: The icon for the action button, must be a QIcon object or None
 
         '''
-        question = Question(payload, callback, cancel_callback, title, msg,
-                html_log, log_viewer_title, log_is_file, det_msg,
-                show_copy_button, checkbox_msg, checkbox_checked)
+        question = Question(
+            payload, callback, cancel_callback, title, msg, html_log,
+            log_viewer_title, log_is_file, det_msg, show_copy_button,
+            checkbox_msg, checkbox_checked, action_callback, action_label,
+            action_icon)
         self.questions.append(question)
         self.show_question()
 

From 9fb122cd4b35cff9d4da2130664270805762cc3a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 16:44:46 +0530
Subject: [PATCH 26/28] cleanup

---
 src/calibre/gui2/actions/edit_metadata.py | 71 ++++++++++-------------
 1 file changed, 32 insertions(+), 39 deletions(-)

diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index 4a78c6663a..485bc5bf90 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -279,7 +279,7 @@ def edit_bulk_metadata(self, checked):
         '''
         Edit metadata of selected books in library in bulk.
         '''
-        rows = [r.row() for r in \
+        rows = [r.row() for r in
                 self.gui.library_view.selectionModel().selectedRows()]
         m = self.gui.library_view.model()
         ids = [m.id(r) for r in rows]
@@ -469,45 +469,39 @@ def merge_metadata(self, dest_id, src_ids):
         if not had_orig_cover and dest_cover:
             db.set_cover(dest_id, dest_cover)
 
-        for key in db.field_metadata: #loop thru all defined fields
-          if db.field_metadata[key]['is_custom']:
-            colnum = db.field_metadata[key]['colnum']
+        for key in db.field_metadata:  # loop thru all defined fields
+            fm = db.field_metadata[key]
+            if not fm['is_custom']:
+                continue
+            dt = fm['datatype']
+            colnum = fm['colnum']
             # Get orig_dest_comments before it gets changed
-            if db.field_metadata[key]['datatype'] == 'comments':
-              orig_dest_value = db.get_custom(dest_id, num=colnum, index_is_id=True)
+            if dt == 'comments':
+                orig_dest_value = db.get_custom(dest_id, num=colnum, index_is_id=True)
+
             for src_id in src_ids:
-              dest_value = db.get_custom(dest_id, num=colnum, index_is_id=True)
-              src_value = db.get_custom(src_id, num=colnum, index_is_id=True)
-              if db.field_metadata[key]['datatype'] == 'comments':
-                if src_value and src_value != orig_dest_value:
-                  if not dest_value:
+                dest_value = db.get_custom(dest_id, num=colnum, index_is_id=True)
+                src_value = db.get_custom(src_id, num=colnum, index_is_id=True)
+                if (dt == 'comments' and src_value and src_value != orig_dest_value):
+                    if not dest_value:
+                        db.set_custom(dest_id, src_value, num=colnum)
+                    else:
+                        dest_value = unicode(dest_value) + u'\n\n' + unicode(src_value)
+                        db.set_custom(dest_id, dest_value, num=colnum)
+                if (dt in {'bool', 'int', 'float', 'rating', 'datetime'} and dest_value is None):
                     db.set_custom(dest_id, src_value, num=colnum)
-                  else:
-                    dest_value = unicode(dest_value) + u'\n\n' + unicode(src_value)
+                if (dt == 'series' and not dest_value and src_value):
+                    src_index = db.get_custom_extra(src_id, num=colnum, index_is_id=True)
+                    db.set_custom(dest_id, src_value, num=colnum, extra=src_index)
+                if (dt == 'enumeration' or (dt == 'text' and not fm['is_multiple']) and not dest_value):
+                    db.set_custom(dest_id, src_value, num=colnum)
+                if (dt == 'text' and fm['is_multiple'] and src_value):
+                    if not dest_value:
+                        dest_value = src_value
+                    else:
+                        dest_value.extend(src_value)
                     db.set_custom(dest_id, dest_value, num=colnum)
-              if db.field_metadata[key]['datatype'] in \
-                ('bool', 'int', 'float', 'rating', 'datetime') \
-                and dest_value is None:
-                db.set_custom(dest_id, src_value, num=colnum)
-              if db.field_metadata[key]['datatype'] == 'series' \
-                and not dest_value:
-                if src_value:
-                  src_index = db.get_custom_extra(src_id, num=colnum, index_is_id=True)
-                  db.set_custom(dest_id, src_value, num=colnum, extra=src_index)
-              if (db.field_metadata[key]['datatype'] == 'enumeration' or
-                        (db.field_metadata[key]['datatype'] == 'text' and
-                         not db.field_metadata[key]['is_multiple'])
-                    and not dest_value):
-                db.set_custom(dest_id, src_value, num=colnum)
-              if db.field_metadata[key]['datatype'] == 'text' \
-                and db.field_metadata[key]['is_multiple']:
-                if src_value:
-                  if not dest_value:
-                    dest_value = src_value
-                  else:
-                    dest_value.extend(src_value)
-                  db.set_custom(dest_id, dest_value, num=colnum)
-        # }}}
+    # }}}
 
     def edit_device_collections(self, view, oncard=None):
         model = view.model()
@@ -515,8 +509,8 @@ def edit_device_collections(self, view, oncard=None):
         d = DeviceCategoryEditor(self.gui, tag_to_match=None, data=result, key=sort_key)
         d.exec_()
         if d.result() == d.Accepted:
-            to_rename = d.to_rename # dict of new text to old ids
-            to_delete = d.to_delete # list of ids
+            to_rename = d.to_rename  # dict of new text to old ids
+            to_delete = d.to_delete  # list of ids
             for old_id, new_name in to_rename.iteritems():
                 model.rename_collection(old_id, new_name=unicode(new_name))
             for item in to_delete:
@@ -585,7 +579,6 @@ def do_one_apply(self):
             self.apply_pd.value += 1
         QTimer.singleShot(50, self.do_one_apply)
 
-
     def apply_mi(self, book_id, mi):
         db = self.gui.current_db
 

From 949b3c04f9ac927456f71e8f9df808010e831b35 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 17:07:24 +0530
Subject: [PATCH 27/28] Amazon metadata download: Ignore Spanish edition
 entries when searching for a book on amazon.com

---
 src/calibre/ebooks/metadata/sources/amazon.py | 118 +++++++++---------
 1 file changed, 60 insertions(+), 58 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index fe39c3cd16..4509608135 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -21,7 +21,7 @@
 from calibre.utils.date import parse_only_date
 from calibre.utils.localization import canonicalize_lang
 
-class Worker(Thread): # Get details {{{
+class Worker(Thread):  # Get details {{{
 
     '''
     Get book details from amazons book page in a separate thread
@@ -43,12 +43,12 @@ def __init__(self, url, result_queue, browser, log, relevance, domain,
 
         months = {
                 'de': {
-            1 : ['jän'],
-            2 : ['februar'],
-            3 : ['märz'],
-            5 : ['mai'],
-            6 : ['juni'],
-            7 : ['juli'],
+            1: ['jän'],
+            2: ['februar'],
+            3: ['märz'],
+            5: ['mai'],
+            6: ['juni'],
+            7: ['juli'],
             10: ['okt'],
             12: ['dez']
             },
@@ -276,7 +276,6 @@ def parse_details(self, raw, root):
             self.log.exception('Error parsing authors for url: %r'%self.url)
             authors = []
 
-
         if not title or not authors or not asin:
             self.log.error('Could not find title/authors/asin for %r'%self.url)
             self.log.error('ASIN: %r Title: %r Authors: %r'%(asin, title,
@@ -431,7 +430,6 @@ def _render_comments(self, desc):
         desc = re.sub(r'(?s)<!--.*?-->', '', desc)
         return sanitize_comments_html(desc)
 
-
     def parse_comments(self, root):
         ans = ''
         desc = root.xpath('//div[@id="ps-content"]/div[@class="content"]')
@@ -528,13 +526,13 @@ class Amazon(Source):
 
     AMAZON_DOMAINS = {
             'com': _('US'),
-            'fr' : _('France'),
-            'de' : _('Germany'),
-            'uk' : _('UK'),
-            'it' : _('Italy'),
-            'jp' : _('Japan'),
-            'es' : _('Spain'),
-            'br' : _('Brazil'),
+            'fr': _('France'),
+            'de': _('Germany'),
+            'uk': _('UK'),
+            'it': _('Italy'),
+            'jp': _('Japan'),
+            'es': _('Spain'),
+            'br': _('Brazil'),
     }
 
     options = (
@@ -592,7 +590,7 @@ def get_domain_and_asin(self, identifiers):
                     return domain, val
         return None, None
 
-    def get_book_url(self, identifiers): # {{{
+    def get_book_url(self, identifiers):  # {{{
         domain, asin = self.get_domain_and_asin(identifiers)
         if domain and asin:
             url = None
@@ -637,8 +635,7 @@ def clean_downloaded_metadata(self, mi):
             mi.tags = list(map(fixcase, mi.tags))
         mi.isbn = check_isbn(mi.isbn)
 
-
-    def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
+    def create_query(self, log, title=None, authors=None, identifiers={},  # {{{
             domain=None):
         if domain is None:
             domain = self.domain
@@ -648,8 +645,8 @@ def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
             domain = idomain
 
         # See the amazon detailed search page to get all options
-        q = {   'search-alias' : 'aps',
-                'unfiltered' : '1',
+        q = {'search-alias': 'aps',
+             'unfiltered': '1',
             }
 
         if domain == 'com':
@@ -704,7 +701,7 @@ def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
 
     # }}}
 
-    def get_cached_cover_url(self, identifiers): # {{{
+    def get_cached_cover_url(self, identifiers):  # {{{
         url = None
         domain, asin = self.get_domain_and_asin(identifiers)
         if asin is None:
@@ -717,14 +714,17 @@ def get_cached_cover_url(self, identifiers): # {{{
         return url
     # }}}
 
-    def parse_results_page(self, root): # {{{
+    def parse_results_page(self, root):  # {{{
         from lxml.html import tostring
 
         matches = []
 
         def title_ok(title):
             title = title.lower()
-            for x in ('bulk pack', '[audiobook]', '[audio cd]'):
+            bad = ['bulk pack', '[audiobook]', '[audio cd]']
+            if self.domain == 'com':
+                bad.append('(spanish edition)')
+            for x in bad:
                 if x in title:
                     return False
             return True
@@ -751,13 +751,12 @@ def title_ok(title):
                         matches.append(a.get('href'))
                     break
 
-
         # Keep only the top 5 matches as the matches are sorted by relevance by
         # Amazon so lower matches are not likely to be very relevant
         return matches[:5]
     # }}}
 
-    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
+    def identify(self, log, result_queue, abort, title=None, authors=None,  # {{{
             identifiers={}, timeout=30):
         '''
         Note this method will retry without identifiers automatically if no
@@ -795,7 +794,6 @@ def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
                 log.exception(msg)
             return as_unicode(msg)
 
-
         raw = clean_ascii_chars(xml_to_unicode(raw,
             strip_encoding_pats=True, resolve_entities=True)[0])
 
@@ -825,7 +823,6 @@ def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
                     # The error is almost always a not found error
                     found = False
 
-
         if found:
             matches = self.parse_results_page(root)
 
@@ -863,7 +860,7 @@ def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
         return None
     # }}}
 
-    def download_cover(self, log, result_queue, abort, # {{{
+    def download_cover(self, log, result_queue, abort,  # {{{
             title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
         cached_url = self.get_cached_cover_url(identifiers)
         if cached_url is None:
@@ -900,39 +897,44 @@ def download_cover(self, log, result_queue, abort, # {{{
             log.exception('Failed to download cover from:', cached_url)
     # }}}
 
-if __name__ == '__main__': # tests {{{
+if __name__ == '__main__':  # tests {{{
     # To run these test use: calibre-debug -e
     # src/calibre/ebooks/metadata/sources/amazon.py
     from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
             isbn_test, title_test, authors_test, comments_test, series_test)
-    com_tests = [ # {{{
+    com_tests = [  # {{{
 
-            ( # + in title and uses id="main-image" for cover
+            (  # Has a spanish edition
+             {'title':'11/22/63'},
+             [title_test('11/22/63: A Novel', exact=True), authors_test(['Stephen King']),]
+             ),
+
+            (  # + in title and uses id="main-image" for cover
              {'title':'C++ Concurrency in Action'},
              [title_test('C++ Concurrency in Action: Practical Multithreading',
                          exact=True),
               ]
              ),
 
-            ( # Series
+            (  # Series
                 {'identifiers':{'amazon':'0756407117'}},
                 [title_test(
-                "Throne of the Crescent Moon"
-                , exact=True), series_test('Crescent Moon Kingdoms', 1),
+                "Throne of the Crescent Moon",
+                exact=True), series_test('Crescent Moon Kingdoms', 1),
                 comments_test('Makhslood'),
                 ]
             ),
 
-            ( # Different comments markup, using Book Description section
+            (  # Different comments markup, using Book Description section
                 {'identifiers':{'amazon':'0982514506'}},
                 [title_test(
-                "Griffin's Destiny: Book Three: The Griffin's Daughter Trilogy"
-                , exact=True),
+                "Griffin's Destiny: Book Three: The Griffin's Daughter Trilogy",
+                exact=True),
                 comments_test('Jelena'), comments_test('Leslie'),
                 ]
             ),
 
-            ( # # in title
+            (  # # in title
                 {'title':'Expert C# 2008 Business Objects',
                     'authors':['Lhotka']},
                 [title_test('Expert C# 2008 Business Objects', exact=True),
@@ -948,13 +950,13 @@ def download_cover(self, log, result_queue, abort, # {{{
 
             ),
 
-            ( # Sophisticated comment formatting
+            (  # Sophisticated comment formatting
                 {'identifiers':{'isbn': '9781416580829'}},
                 [title_test('Angels & Demons - Movie Tie-In: A Novel',
                     exact=True), authors_test(['Dan Brown'])]
             ),
 
-            ( # No specific problems
+            (  # No specific problems
                 {'identifiers':{'isbn': '0743273567'}},
                 [title_test('The great gatsby', exact=True),
                     authors_test(['F. Scott Fitzgerald'])]
@@ -967,9 +969,9 @@ def download_cover(self, log, result_queue, abort, # {{{
 
             ),
 
-    ] # }}}
+    ]  # }}}
 
-    de_tests = [ # {{{
+    de_tests = [  # {{{
             (
                 {'identifiers':{'isbn': '3548283519'}},
                 [title_test('Wer Wind Sät: Der Fünfte Fall Für Bodenstein Und Kirchhoff',
@@ -977,9 +979,9 @@ def download_cover(self, log, result_queue, abort, # {{{
                  ]
 
             ),
-    ] # }}}
+    ]  # }}}
 
-    it_tests = [ # {{{
+    it_tests = [  # {{{
             (
                 {'identifiers':{'isbn': '8838922195'}},
                 [title_test('La briscola in cinque',
@@ -987,9 +989,9 @@ def download_cover(self, log, result_queue, abort, # {{{
                  ]
 
             ),
-    ] # }}}
+    ]  # }}}
 
-    fr_tests = [ # {{{
+    fr_tests = [  # {{{
             (
                 {'identifiers':{'isbn': '2221116798'}},
                 [title_test('L\'étrange voyage de Monsieur Daldry',
@@ -997,9 +999,9 @@ def download_cover(self, log, result_queue, abort, # {{{
                  ]
 
             ),
-    ] # }}}
+    ]  # }}}
 
-    es_tests = [ # {{{
+    es_tests = [  # {{{
             (
                 {'identifiers':{'isbn': '8483460831'}},
                 [title_test('Tiempos Interesantes',
@@ -1007,28 +1009,28 @@ def download_cover(self, log, result_queue, abort, # {{{
                  ]
 
             ),
-    ] # }}}
+    ]  # }}}
 
-    jp_tests = [ # {{{
-            ( # Adult filtering test
+    jp_tests = [  # {{{
+            (  # Adult filtering test
              {'identifiers':{'isbn':'4799500066'}},
              [title_test(u'Ｂｉｔｃｈ Ｔｒａｐ'),]
             ),
 
-            ( # isbn -> title, authors
-                {'identifiers':{'isbn': '9784101302720' }},
+            (  # isbn -> title, authors
+                {'identifiers':{'isbn': '9784101302720'}},
                 [title_test(u'精霊の守り人',
                     exact=True), authors_test([u'上橋 菜穂子'])
                  ]
             ),
-            ( # title, authors -> isbn (will use Shift_JIS encoding in query.)
+            (  # title, authors -> isbn (will use Shift_JIS encoding in query.)
                 {'title': u'考えない練習',
                  'authors': [u'小池 龍之介']},
                 [isbn_test('9784093881067'), ]
             ),
-    ] # }}}
+    ]  # }}}
 
-    br_tests = [ # {{{
+    br_tests = [  # {{{
             (
                 {'title':'Guerra dos Tronos'},
                 [title_test('A Guerra dos Tronos - As Crônicas de Gelo e Fogo',
@@ -1036,7 +1038,7 @@ def download_cover(self, log, result_queue, abort, # {{{
                  ]
 
             ),
-    ] # }}}
+    ]  # }}}
 
     def do_test(domain, start=0, stop=None):
         tests = globals().get(domain+'_tests')

From 892b706760771db130414c54e672f05872ceaa10 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 9 Apr 2013 17:31:48 +0530
Subject: [PATCH 28/28] ...

---
 src/calibre/db/tests/writing.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/calibre/db/tests/writing.py b/src/calibre/db/tests/writing.py
index c37a173ae4..f4fac34a57 100644
--- a/src/calibre/db/tests/writing.py
+++ b/src/calibre/db/tests/writing.py
@@ -17,6 +17,7 @@
 
 class WritingTest(BaseTest):
 
+    # Utils {{{
     def create_getter(self, name, getter=None):
         if getter is None:
             if name.endswith('_index'):
@@ -71,6 +72,7 @@ def run_tests(self, tests):
                         'Failed setting for %s, sqlite value not the same: %r != %r'%(
                             test.name, old_sqlite_res, sqlite_res))
                 del db
+    # }}}
 
     def test_one_one(self):  # {{{
         'Test setting of values in one-one fields'