diff --git a/recipes/chicago_tribune.recipe b/recipes/chicago_tribune.recipe
index 8fa006b5da..6842dcff2a 100644
--- a/recipes/chicago_tribune.recipe
+++ b/recipes/chicago_tribune.recipe
@@ -8,21 +8,25 @@
class ChicagoTribune(BasicNewsRecipe):
title = 'Chicago Tribune'
- __author__ = 'Kovid Goyal and Sujata Raman'
+ __author__ = 'Kovid Goyal and Sujata Raman, a.peter'
description = 'Politics, local and business news from Chicago'
- language = 'en'
+ language = 'en'
+ version = 2
- use_embedded_content = False
- no_stylesheets = True
- remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ remove_javascript = True
+ recursions = 1
keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
]
- remove_tags_after = [ {'class':['photo_article',]} ]
+ remove_tags_after = [{'class':['photo_article',]}]
- remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer"]},
- {'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent"]},
+ match_regexps = [r'page=[0-9]+']
+
+ remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']},
+ {'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']},
dict(name='font',attrs={'id':["cr-other-headlines"]})]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
@@ -37,7 +41,7 @@ class ChicagoTribune(BasicNewsRecipe):
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
- '''
+ '''
feeds = [
('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
@@ -76,8 +80,12 @@ def get_article_url(self, article):
print article.get('feedburner_origlink', article.get('guid', article.get('link')))
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
-
def postprocess_html(self, soup, first_fetch):
+ # Remove the navigation bar. It was kept until now to be able to follow
+ # the links to further pages. But now we don't need them anymore.
+ for nav in soup.findAll(attrs={'class':['toppaginate','article-nav clearfix']}):
+ nav.extract()
+
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
@@ -88,4 +96,3 @@ def postprocess_html(self, soup, first_fetch):
return soup
-
diff --git a/recipes/folhadesaopaulo_sub.recipe b/recipes/folhadesaopaulo_sub.recipe
index 660275330a..31ffb2db66 100644
--- a/recipes/folhadesaopaulo_sub.recipe
+++ b/recipes/folhadesaopaulo_sub.recipe
@@ -1,19 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
class FSP(BasicNewsRecipe):
- title = u'Folha de S\xE3o Paulo - Jornal'
+ title = u'Folha de S\xE3o Paulo'
__author__ = 'fluzao'
description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
language = 'pt'
no_stylesheets = True
- max_articles_per_feed = 30
+ max_articles_per_feed = 40
remove_javascript = True
needs_subscription = True
remove_tags_before = dict(name='b')
- remove_tags_after = dict(name='!--/NOTICIA--')
+ remove_tags = [dict(name='td', attrs={'align':'center'})]
remove_attributes = ['height','width']
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
@@ -26,6 +28,13 @@ class FSP(BasicNewsRecipe):
# this solves the problem with truncated content in Kindle
conversion_options = {'linearize_tables' : True}
+ # this bit removes the footer where there are links for Proximo Texto, Texto Anterior,
+ # Indice e Comunicar Erros
+ preprocess_regexps = [(re.compile(r'
Texto Anterior:.*',
+ re.DOTALL|re.IGNORECASE), lambda match: r''),
+ (re.compile(r'
Próximo Texto:.*',
+ re.DOTALL|re.IGNORECASE), lambda match: r'')]
+
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
diff --git a/recipes/leipzer_volkszeitung.recipe b/recipes/leipzer_volkszeitung.recipe
new file mode 100644
index 0000000000..807ec5282e
--- /dev/null
+++ b/recipes/leipzer_volkszeitung.recipe
@@ -0,0 +1,34 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+'''Calibre recipe to convert the RSS feeds of the Leipziger Volkszeitung to an ebook.'''
+
+class SportsIllustratedRecipe(BasicNewsRecipe) :
+ __author__ = 'a.peter'
+ __copyright__ = 'a.peter'
+ __license__ = 'GPL v3'
+ language = 'de'
+ description = 'Leipziger Volkszeitung Online RSS'
+ version = 1
+ title = u'Leipziger Volkszeitung Online RSS'
+ timefmt = ' [%d.%m.%Y]'
+
+ no_stylesheets = True
+ remove_javascript = True
+ use_embedded_content = False
+ publication_type = 'newspaper'
+
+ keep_only_tags = [dict(name='div', attrs={'class':'article'})]
+ remove_tags = [dict(name='div', attrs={'class':['ARTICLE_MORE', 'clearfloat']})]
+
+ feeds = [(u'Leipzig', u'http://nachrichten.lvz-online.de/rss/leipzig-rss.xml'),
+ (u'Mitteldeutschland', u'http://nachrichten.lvz-online.de/rss/mitteldeutschland-rss.xml'),
+ (u'Brennpunkte', u'http://nachrichten.lvz-online.de/rss/brennpunkte-rss.xml'),
+ (u'Polizeiticker', u'http://nachrichten.lvz-online.de/rss/polizeiticker-rss.xml'),
+ (u'Boulevard', u'http://nachrichten.lvz-online.de/rss/boulevard-rss.xml'),
+ (u'Kultur', u'http://nachrichten.lvz-online.de/rss/kultur-rss.xml'),
+ (u'Sport', u'http://nachrichten.lvz-online.de/rss/sport-rss.xml'),
+ (u'Regionalsport', u'http://nachrichten.lvz-online.de/rss/regionalsport-rss.xml'),
+ (u'Knipser', u'http://nachrichten.lvz-online.de/rss/knipser-rss.xml')]
+
+ def get_masthead_url(self):
+ return 'http://www.lvz-online.de/resources/themes/standard/images/global/logo.gif'
diff --git a/src/calibre/db/__init__.py b/src/calibre/db/__init__.py
index 826b7a99fd..cde01b5f01 100644
--- a/src/calibre/db/__init__.py
+++ b/src/calibre/db/__init__.py
@@ -64,4 +64,6 @@
columns/categories/searches info into
self.field_metadata. Finally, implement metadata dirtied
functionality.
+ 2. Catching DatabaseException and sqlite.Error when creating new
+ libraries/switching/on calibre startup.
'''
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index eabeba7b4f..cbc8b41529 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -315,7 +315,7 @@ def punctuation_unwrap(self, length, content, format):
supports a range of html markup and text files
'''
# define the pieces of the regex
-
+
lookahead = "(?<=.{"+str(length)+u"}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:“”)\IA\u00DF]|(?]*>\s*
\s*', '
'+ @@ -102,14 +117,21 @@ def restore_database(db, parent=None): _('Restoring database failed, click Show details to see details'), det_msg=r.tb, show=True) else: - if r.errors_occurred: - warning_dialog(parent, _('Success'), - _('Restoring the database succeeded with some warnings' - ' click Show details to see the details.'), - det_msg=r.report, show=True) - else: - info_dialog(parent, _('Success'), - _('Restoring database was successful'), show=True, - show_copy_button=False) + _show_success_msg(r, parent=parent) return True +def repair_library_at(library_path, parent=None): + d = DBRestore(parent, library_path) + d.exec_() + if d.rejected: + return False + r = d.restorer + if r.tb is not None: + error_dialog(parent, _('Failed'), + _('Restoring database failed, click Show details to see details'), + det_msg=r.tb, show=True) + return False + _show_success_msg(r, parent=parent) + return True + + diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 76b734a718..81324d31f8 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -4,16 +4,15 @@ import sys, os, time, socket, traceback from functools import partial -from PyQt4.Qt import QCoreApplication, QIcon, QObject, QTimer, \ - QThread, pyqtSignal, Qt, QProgressDialog, QString, QPixmap, \ - QSplashScreen, QApplication +from PyQt4.Qt import (QCoreApplication, QIcon, QObject, QTimer, + QPixmap, QSplashScreen, QApplication) -from calibre import prints, plugins -from calibre.constants import iswindows, __appname__, isosx, DEBUG, \ - filesystem_encoding +from calibre import prints, plugins, force_unicode +from calibre.constants import (iswindows, __appname__, isosx, DEBUG, + filesystem_encoding) from calibre.utils.ipc import ADDRESS, RC -from calibre.gui2 import ORG_NAME, APP_UID, initialize_file_icon_provider, \ - Application, choose_dir, error_dialog, question_dialog, gprefs +from calibre.gui2 import (ORG_NAME, APP_UID, initialize_file_icon_provider, + Application, choose_dir, error_dialog, question_dialog, gprefs) from calibre.gui2.main_window import option_parser as _option_parser from calibre.utils.config import prefs, dynamic from calibre.library.database2 import LibraryDatabase2 @@ -110,36 +109,9 @@ def get_library_path(parent=None): default_dir=get_default_library_path()) return library_path -class DBRepair(QThread): - - repair_done = pyqtSignal(object, object) - progress = pyqtSignal(object, object) - - def __init__(self, library_path, parent, pd): - QThread.__init__(self, parent) - self.library_path = library_path - self.pd = pd - self.progress.connect(self._callback, type=Qt.QueuedConnection) - - def _callback(self, num, is_length): - if is_length: - self.pd.setRange(0, num-1) - num = 0 - self.pd.setValue(num) - - def callback(self, num, is_length): - self.progress.emit(num, is_length) - - def run(self): - from calibre.debug import reinit_db - try: - reinit_db(os.path.join(self.library_path, 'metadata.db'), - self.callback) - db = LibraryDatabase2(self.library_path) - tb = None - except: - db, tb = None, traceback.format_exc() - self.repair_done.emit(db, tb) +def repair_library(library_path): + from calibre.gui2.dialogs.restore_library import repair_library_at + return repair_library_at(library_path) class GuiRunner(QObject): '''Make sure an event loop is running before starting the main work of @@ -184,9 +156,6 @@ def initialization_failed(self): raise SystemExit(1) def initialize_db_stage2(self, db, tb): - repair_pd = getattr(self, 'repair_pd', None) - if repair_pd is not None: - repair_pd.cancel() if db is None and tb is not None: # DB Repair failed @@ -219,23 +188,16 @@ def initialize_db(self): db = LibraryDatabase2(self.library_path) except (sqlite.Error, DatabaseException): repair = question_dialog(self.splash_screen, _('Corrupted database'), - _('Your calibre database appears to be corrupted. Do ' - 'you want calibre to try and repair it automatically? ' - 'If you say No, a new empty calibre library will be created.'), + _('The library database at %s appears to be corrupted. Do ' + 'you want calibre to try and rebuild it automatically? ' + 'The rebuild may not be completely successful. ' + 'If you say No, a new empty calibre library will be created.') + % force_unicode(self.library_path, filesystem_encoding), det_msg=traceback.format_exc() ) if repair: - self.repair_pd = QProgressDialog(_('Repairing database. This ' - 'can take a very long time for a large collection'), QString(), - 0, 0) - self.repair_pd.setWindowModality(Qt.WindowModal) - self.repair_pd.show() - - self.repair = DBRepair(self.library_path, self, self.repair_pd) - self.repair.repair_done.connect(self.initialize_db_stage2, - type=Qt.QueuedConnection) - self.repair.start() - return + if repair_library(self.library_path): + db = LibraryDatabase2(self.library_path) except: error_dialog(self.splash_screen, _('Bad database location'), _('Bad database location %r. Will start with ' diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 928e6c63ab..c12a15829a 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -18,8 +18,8 @@ QMenu, QIcon, pyqtSignal, QUrl, QDialog, QSystemTrayIcon, QApplication) -from calibre import prints -from calibre.constants import __appname__, isosx +from calibre import prints, force_unicode +from calibre.constants import __appname__, isosx, filesystem_encoding from calibre.utils.config import prefs, dynamic from calibre.utils.ipc.server import Server from calibre.library.database2 import LibraryDatabase2 @@ -41,7 +41,7 @@ from calibre.gui2.search_restriction_mixin import SearchRestrictionMixin from calibre.gui2.tag_browser.ui import TagBrowserMixin from calibre.gui2.keyboard import Manager - +from calibre.library.sqlite import sqlite, DatabaseException class Listener(Thread): # {{{ @@ -475,7 +475,8 @@ def current_view(self): def booklists(self): return self.memory_view.model().db, self.card_a_view.model().db, self.card_b_view.model().db - def library_moved(self, newloc, copy_structure=False, call_close=True): + def library_moved(self, newloc, copy_structure=False, call_close=True, + allow_rebuild=False): if newloc is None: return default_prefs = None try: @@ -484,7 +485,26 @@ def library_moved(self, newloc, copy_structure=False, call_close=True): default_prefs = olddb.prefs except: olddb = None - db = LibraryDatabase2(newloc, default_prefs=default_prefs) + try: + db = LibraryDatabase2(newloc, default_prefs=default_prefs) + except (DatabaseException, sqlite.Error): + if not allow_rebuild: raise + import traceback + repair = question_dialog(self, _('Corrupted database'), + _('The library database at %s appears to be corrupted. Do ' + 'you want calibre to try and rebuild it automatically? ' + 'The rebuild may not be completely successful.') + % force_unicode(newloc, filesystem_encoding), + det_msg=traceback.format_exc() + ) + if repair: + from calibre.gui2.dialogs.restore_library import repair_library_at + if repair_library_at(newloc, parent=self): + db = LibraryDatabase2(newloc, default_prefs=default_prefs) + else: + return + else: + return if self.content_server is not None: self.content_server.set_database(db) self.library_path = newloc