'
__docformat__ = 'restructuredtext en'
-import sys, cPickle, shutil
+import sys, cPickle, shutil, importlib
from PyQt4.Qt import QString, SIGNAL, QAbstractListModel, Qt, QVariant, QFont
@@ -182,8 +182,8 @@ def widget_factory(cls):
output_widget = None
name = self.plumber.output_plugin.name.lower().replace(' ', '_')
try:
- output_widget = __import__('calibre.gui2.convert.'+name,
- fromlist=[1])
+ output_widget = importlib.import_module(
+ 'calibre.gui2.convert.'+name)
pw = output_widget.PluginWidget
pw.ICON = I('back.png')
pw.HELP = _('Options specific to the output format.')
@@ -193,8 +193,8 @@ def widget_factory(cls):
input_widget = None
name = self.plumber.input_plugin.name.lower().replace(' ', '_')
try:
- input_widget = __import__('calibre.gui2.convert.'+name,
- fromlist=[1])
+ input_widget = importlib.import_module(
+ 'calibre.gui2.convert.'+name)
pw = input_widget.PluginWidget
pw.ICON = I('forward.png')
pw.HELP = _('Options specific to the input format.')
diff --git a/src/calibre/gui2/custom_column_widgets.py b/src/calibre/gui2/custom_column_widgets.py
index beaca77a38..10602fb28c 100644
--- a/src/calibre/gui2/custom_column_widgets.py
+++ b/src/calibre/gui2/custom_column_widgets.py
@@ -226,10 +226,18 @@ def getter(self):
class Text(Base):
def setup_ui(self, parent):
+ if self.col_metadata['display'].get('is_names', False):
+ self.sep = u' & '
+ else:
+ self.sep = u', '
values = self.all_values = list(self.db.all_custom(num=self.col_id))
values.sort(key=sort_key)
if self.col_metadata['is_multiple']:
w = MultiCompleteLineEdit(parent)
+ w.set_separator(self.sep.strip())
+ if self.sep == u' & ':
+ w.set_space_before_sep(True)
+ w.set_add_separator(tweaks['authors_completer_append_separator'])
w.update_items_cache(values)
w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
else:
@@ -261,12 +269,12 @@ def setter(self, val):
if self.col_metadata['is_multiple']:
if not val:
val = []
- self.widgets[1].setText(u', '.join(val))
+ self.widgets[1].setText(self.sep.join(val))
def getter(self):
if self.col_metadata['is_multiple']:
val = unicode(self.widgets[1].text()).strip()
- ans = [x.strip() for x in val.split(',') if x.strip()]
+ ans = [x.strip() for x in val.split(self.sep.strip()) if x.strip()]
if not ans:
ans = None
return ans
@@ -847,13 +855,20 @@ def setup_ui(self, parent):
self.main_widget.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
self.adding_widget = self.main_widget
- w = RemoveTags(parent, values)
- self.widgets.append(QLabel('&'+self.col_metadata['name']+': ' +
- _('tags to remove'), parent))
- self.widgets.append(w)
- self.removing_widget = w
- w.tags_box.textChanged.connect(self.a_c_checkbox_changed)
- w.checkbox.stateChanged.connect(self.a_c_checkbox_changed)
+ if not self.col_metadata['display'].get('is_names', False):
+ w = RemoveTags(parent, values)
+ self.widgets.append(QLabel('&'+self.col_metadata['name']+': ' +
+ _('tags to remove'), parent))
+ self.widgets.append(w)
+ self.removing_widget = w
+ self.main_widget.set_separator(',')
+ w.tags_box.textChanged.connect(self.a_c_checkbox_changed)
+ w.checkbox.stateChanged.connect(self.a_c_checkbox_changed)
+ else:
+ self.main_widget.set_separator('&')
+ self.main_widget.set_space_before_sep(True)
+ self.main_widget.set_add_separator(
+ tweaks['authors_completer_append_separator'])
else:
self.make_widgets(parent, MultiCompleteComboBox)
self.main_widget.set_separator(None)
@@ -882,21 +897,26 @@ def commit(self, book_ids, notify=False):
if not self.a_c_checkbox.isChecked():
return
if self.col_metadata['is_multiple']:
- remove_all, adding, rtext = self.gui_val
- remove = set()
- if remove_all:
- remove = set(self.db.all_custom(num=self.col_id))
+ if self.col_metadata['display'].get('is_names', False):
+ val = self.gui_val
+ add = [v.strip() for v in val.split('&') if v.strip()]
+ self.db.set_custom_bulk(book_ids, add, num=self.col_id)
else:
- txt = rtext
+ remove_all, adding, rtext = self.gui_val
+ remove = set()
+ if remove_all:
+ remove = set(self.db.all_custom(num=self.col_id))
+ else:
+ txt = rtext
+ if txt:
+ remove = set([v.strip() for v in txt.split(',')])
+ txt = adding
if txt:
- remove = set([v.strip() for v in txt.split(',')])
- txt = adding
- if txt:
- add = set([v.strip() for v in txt.split(',')])
- else:
- add = set()
- self.db.set_custom_bulk_multiple(book_ids, add=add, remove=remove,
- num=self.col_id)
+ add = set([v.strip() for v in txt.split(',')])
+ else:
+ add = set()
+ self.db.set_custom_bulk_multiple(book_ids, add=add,
+ remove=remove, num=self.col_id)
else:
val = self.gui_val
val = self.normalize_ui_val(val)
@@ -905,10 +925,11 @@ def commit(self, book_ids, notify=False):
def getter(self):
if self.col_metadata['is_multiple']:
- return self.removing_widget.checkbox.isChecked(), \
- unicode(self.adding_widget.text()), \
- unicode(self.removing_widget.tags_box.text())
-
+ if not self.col_metadata['display'].get('is_names', False):
+ return self.removing_widget.checkbox.isChecked(), \
+ unicode(self.adding_widget.text()), \
+ unicode(self.removing_widget.tags_box.text())
+ return unicode(self.adding_widget.text())
val = unicode(self.main_widget.currentText()).strip()
if not val:
val = None
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 215e67c46f..ab2177cef1 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -64,7 +64,7 @@ def run(self):
self.result = self.func(*self.args, **self.kwargs)
if self._aborted:
return
- except (Exception, SystemExit), err:
+ except (Exception, SystemExit) as err:
if self._aborted:
return
self.failed = True
@@ -162,7 +162,7 @@ def do_connect(self, connected_devices, device_kind):
dev.reset(detected_device=detected_device,
report_progress=self.report_progress)
dev.open(self.current_library_uuid)
- except OpenFeedback, e:
+ except OpenFeedback as e:
if dev not in self.ejected_devices:
self.open_feedback_msg(dev.get_gui_name(), e.feedback_msg)
self.ejected_devices.add(dev)
diff --git a/src/calibre/gui2/device_drivers/configwidget.py b/src/calibre/gui2/device_drivers/configwidget.py
index 97c492b550..fc7e16e639 100644
--- a/src/calibre/gui2/device_drivers/configwidget.py
+++ b/src/calibre/gui2/device_drivers/configwidget.py
@@ -133,7 +133,7 @@ def validate(self):
try:
validation_formatter.validate(tmpl)
return True
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid template'),
''+_('The template %s is invalid:')%tmpl + \
'
'+unicode(err), show=True)
diff --git a/src/calibre/gui2/dialogs/catalog.py b/src/calibre/gui2/dialogs/catalog.py
index ebca7235eb..a8f7ed160f 100644
--- a/src/calibre/gui2/dialogs/catalog.py
+++ b/src/calibre/gui2/dialogs/catalog.py
@@ -6,7 +6,7 @@
__copyright__ = '2010, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import os, sys
+import os, sys, importlib
from calibre.customize.ui import config
from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
@@ -43,8 +43,7 @@ def __init__(self, parent, dbspec, ids, db):
name = plugin.name.lower().replace(' ', '_')
if type(plugin) in builtin_plugins:
try:
- catalog_widget = __import__('calibre.gui2.catalog.'+name,
- fromlist=[1])
+ catalog_widget = importlib.import_module('calibre.gui2.catalog.'+name)
pw = catalog_widget.PluginWidget()
pw.initialize(name, db)
pw.ICON = I('forward.png')
@@ -75,7 +74,7 @@ def __init__(self, parent, dbspec, ids, db):
# Import the dynamic PluginWidget() from .py file provided in plugin.zip
try:
sys.path.insert(0, plugin.resources_path)
- catalog_widget = __import__(name, fromlist=[1])
+ catalog_widget = importlib.import_module(name)
pw = catalog_widget.PluginWidget()
pw.initialize(name)
pw.ICON = I('forward.png')
diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py
index 560090d2b3..95f99d4034 100644
--- a/src/calibre/gui2/dialogs/check_library.py
+++ b/src/calibre/gui2/dialogs/check_library.py
@@ -68,7 +68,7 @@ def do_one_dump(self):
self.start_load()
return
QTimer.singleShot(0, self.do_one_dump)
- except Exception, e:
+ except Exception as e:
import traceback
self.error = (as_unicode(e), traceback.format_exc())
self.reject()
@@ -90,7 +90,7 @@ def start_load(self):
self.conn.commit()
QTimer.singleShot(0, self.do_one_load)
- except Exception, e:
+ except Exception as e:
import traceback
self.error = (as_unicode(e), traceback.format_exc())
self.reject()
@@ -111,7 +111,7 @@ def do_one_load(self):
self.pb.setValue(self.pb.value() + 1)
self.count -= 1
QTimer.singleShot(0, self.do_one_load)
- except Exception, e:
+ except Exception as e:
import traceback
self.error = (as_unicode(e), traceback.format_exc())
self.reject()
diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py
index 9b25545252..0683f2cb91 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@@ -120,7 +120,7 @@ def do_one_safe(self):
self.msg.setText(self.msg_text.format(self.phases[self.current_phase],
percent))
self.do_one(id)
- except Exception, err:
+ except Exception as err:
import traceback
try:
err = unicode(err)
@@ -653,7 +653,10 @@ def s_r_do_destination(self, mi, val):
if self.destination_field_fm['is_multiple']:
if self.comma_separated.isChecked():
- if dest == 'authors':
+ if dest == 'authors' or \
+ (self.destination_field_fm['is_custom'] and
+ self.destination_field_fm['datatype'] == 'text' and
+ self.destination_field_fm['display'].get('is_names', False)):
splitter = ' & '
else:
splitter = ','
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 9efe7f7160..f6b7b94453 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -76,7 +76,7 @@ def run(self):
self.cover_data, self.errors = download_cover(mi,
timeout=self.timeout)
- except Exception, e:
+ except Exception as e:
self.exception = e
self.traceback = traceback.format_exc()
print self.traceback
@@ -183,7 +183,7 @@ def select_cover(self, checked):
try:
cf = open(_file, "rb")
cover = cf.read()
- except IOError, e:
+ except IOError as e:
d = error_dialog(self, _('Error reading file'),
_("There was an error reading from file:
") + _file + "
"+str(e))
d.exec_()
diff --git a/src/calibre/gui2/dialogs/tag_editor.py b/src/calibre/gui2/dialogs/tag_editor.py
index 6bd8eb7dbe..bf3bb9fd4e 100644
--- a/src/calibre/gui2/dialogs/tag_editor.py
+++ b/src/calibre/gui2/dialogs/tag_editor.py
@@ -122,6 +122,8 @@ def add_tag(self):
tags = unicode(self.add_tag_input.text()).split(',')
for tag in tags:
tag = tag.strip()
+ if not tag:
+ continue
for item in self.available_tags.findItems(tag, Qt.MatchFixedString):
self.available_tags.takeItem(self.available_tags.row(item))
if tag not in self.tags:
diff --git a/src/calibre/gui2/dialogs/user_profiles.py b/src/calibre/gui2/dialogs/user_profiles.py
index 5453a90766..d66d02d211 100644
--- a/src/calibre/gui2/dialogs/user_profiles.py
+++ b/src/calibre/gui2/dialogs/user_profiles.py
@@ -237,7 +237,7 @@ def add_profile(self, clicked):
try:
compile_recipe(src)
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid input'),
_('Could not create recipe. Error:
%s')%str(err)).exec_()
return
@@ -246,7 +246,7 @@ def add_profile(self, clicked):
src = unicode(self.source_code.toPlainText())
try:
title = compile_recipe(src).title
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid input'),
_('
Could not create recipe. Error:
%s')%str(err)).exec_()
return
@@ -333,7 +333,7 @@ def load(self):
try:
profile = open(file, 'rb').read().decode('utf-8')
title = compile_recipe(profile).title
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid input'),
_('
Could not create recipe. Error:
%s')%str(err)).exec_()
return
diff --git a/src/calibre/gui2/dnd.py b/src/calibre/gui2/dnd.py
index 928de72578..1f9dbdfa34 100644
--- a/src/calibre/gui2/dnd.py
+++ b/src/calibre/gui2/dnd.py
@@ -35,7 +35,7 @@ def run(self):
try:
br = browser()
br.retrieve(self.url, self.fpath, self.callback)
- except Exception, e:
+ except Exception as e:
self.err = as_unicode(e)
import traceback
self.tb = traceback.format_exc()
diff --git a/src/calibre/gui2/email.py b/src/calibre/gui2/email.py
index c84b3180f7..81c1d9c255 100644
--- a/src/calibre/gui2/email.py
+++ b/src/calibre/gui2/email.py
@@ -116,7 +116,7 @@ def run(self):
try:
self.sendmail(job)
break
- except Exception, e:
+ except Exception as e:
if not self._run:
return
import traceback
diff --git a/src/calibre/gui2/library/delegates.py b/src/calibre/gui2/library/delegates.py
index 3a090f8102..0f74500099 100644
--- a/src/calibre/gui2/library/delegates.py
+++ b/src/calibre/gui2/library/delegates.py
@@ -398,7 +398,7 @@ def setModelData(self, editor, model, index):
val = unicode(editor.textbox.toPlainText())
try:
validation_formatter.validate(val)
- except Exception, err:
+ except Exception as err:
error_dialog(self.parent(), _('Invalid template'),
'
'+_('The template %s is invalid:')%val + \
'
'+str(err), show=True)
diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py
index a200562ea9..c921ea125f 100644
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@@ -640,18 +640,18 @@ def ondevice_decorator(r, idx=-1):
return self.bool_yes_icon
return self.bool_blank_icon
- def text_type(r, mult=False, idx=-1):
+ def text_type(r, mult=None, idx=-1):
text = self.db.data[r][idx]
- if text and mult:
- return QVariant(', '.join(sorted(text.split('|'),key=sort_key)))
+ if text and mult is not None:
+ if mult:
+ return QVariant(u' & '.join(text.split('|')))
+ return QVariant(u', '.join(sorted(text.split('|'),key=sort_key)))
return QVariant(text)
- def decorated_text_type(r, mult=False, idx=-1):
+ def decorated_text_type(r, idx=-1):
text = self.db.data[r][idx]
if force_to_bool(text) is not None:
return None
- if text and mult:
- return QVariant(', '.join(sorted(text.split('|'),key=sort_key)))
return QVariant(text)
def number_type(r, idx=-1):
@@ -659,7 +659,7 @@ def number_type(r, idx=-1):
self.dc = {
'title' : functools.partial(text_type,
- idx=self.db.field_metadata['title']['rec_index'], mult=False),
+ idx=self.db.field_metadata['title']['rec_index'], mult=None),
'authors' : functools.partial(authors,
idx=self.db.field_metadata['authors']['rec_index']),
'size' : functools.partial(size,
@@ -671,14 +671,14 @@ def number_type(r, idx=-1):
'rating' : functools.partial(rating_type,
idx=self.db.field_metadata['rating']['rec_index']),
'publisher': functools.partial(text_type,
- idx=self.db.field_metadata['publisher']['rec_index'], mult=False),
+ idx=self.db.field_metadata['publisher']['rec_index'], mult=None),
'tags' : functools.partial(tags,
idx=self.db.field_metadata['tags']['rec_index']),
'series' : functools.partial(series_type,
idx=self.db.field_metadata['series']['rec_index'],
siix=self.db.field_metadata['series_index']['rec_index']),
'ondevice' : functools.partial(text_type,
- idx=self.db.field_metadata['ondevice']['rec_index'], mult=False),
+ idx=self.db.field_metadata['ondevice']['rec_index'], mult=None),
}
self.dc_decorator = {
@@ -692,11 +692,12 @@ def number_type(r, idx=-1):
datatype = self.custom_columns[col]['datatype']
if datatype in ('text', 'comments', 'composite', 'enumeration'):
mult=self.custom_columns[col]['is_multiple']
+ if mult is not None:
+ mult = self.custom_columns[col]['display'].get('is_names', False)
self.dc[col] = functools.partial(text_type, idx=idx, mult=mult)
if datatype in ['text', 'composite', 'enumeration'] and not mult:
if self.custom_columns[col]['display'].get('use_decorations', False):
- self.dc[col] = functools.partial(decorated_text_type,
- idx=idx, mult=mult)
+ self.dc[col] = functools.partial(decorated_text_type, idx=idx)
self.dc_decorator[col] = functools.partial(
bool_type_decorator, idx=idx,
bool_cols_are_tristate=
diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py
index c62936a46f..0cce33da9e 100644
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@@ -78,6 +78,7 @@ def __init__(self, parent, modelcls=BooksModel):
self.pubdate_delegate = PubDateDelegate(self)
self.tags_delegate = CompleteDelegate(self, ',', 'all_tags')
self.authors_delegate = CompleteDelegate(self, '&', 'all_author_names', True)
+ self.cc_names_delegate = CompleteDelegate(self, '&', 'all_custom', True)
self.series_delegate = TextDelegate(self)
self.publisher_delegate = TextDelegate(self)
self.text_delegate = TextDelegate(self)
@@ -410,6 +411,7 @@ def set_database(self, db):
self.save_state()
self._model.set_database(db)
self.tags_delegate.set_database(db)
+ self.cc_names_delegate.set_database(db)
self.authors_delegate.set_database(db)
self.series_delegate.set_auto_complete_function(db.all_series)
self.publisher_delegate.set_auto_complete_function(db.all_publishers)
@@ -431,12 +433,17 @@ def database_changed(self, db):
self.setItemDelegateForColumn(cm.index(colhead), delegate)
elif cc['datatype'] == 'comments':
self.setItemDelegateForColumn(cm.index(colhead), self.cc_comments_delegate)
- elif cc['datatype'] in ('text', 'series'):
+ elif cc['datatype'] == 'text':
if cc['is_multiple']:
- self.setItemDelegateForColumn(cm.index(colhead), self.tags_delegate)
+ if cc['display'].get('is_names', False):
+ self.setItemDelegateForColumn(cm.index(colhead),
+ self.cc_names_delegate)
+ else:
+ self.setItemDelegateForColumn(cm.index(colhead),
+ self.tags_delegate)
else:
self.setItemDelegateForColumn(cm.index(colhead), self.cc_text_delegate)
- elif cc['datatype'] in ('int', 'float'):
+ elif cc['datatype'] in ('series', 'int', 'float'):
self.setItemDelegateForColumn(cm.index(colhead), self.cc_text_delegate)
elif cc['datatype'] == 'bool':
self.setItemDelegateForColumn(cm.index(colhead), self.cc_bool_delegate)
diff --git a/src/calibre/gui2/lrf_renderer/main.py b/src/calibre/gui2/lrf_renderer/main.py
index 2acfd3c9a7..e68e04adcf 100644
--- a/src/calibre/gui2/lrf_renderer/main.py
+++ b/src/calibre/gui2/lrf_renderer/main.py
@@ -35,7 +35,7 @@ def run(self):
self.stream = None
if self.aborted:
self.lrf = None
- except Exception, err:
+ except Exception as err:
self.lrf, self.stream = None, None
self.exception = err
self.formatted_traceback = traceback.format_exc()
diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py
index 976b679726..c67ec8c2b4 100644
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@@ -399,7 +399,7 @@ def main(args=sys.argv):
if __name__ == '__main__':
try:
sys.exit(main())
- except Exception, err:
+ except Exception as err:
if not iswindows: raise
tb = traceback.format_exc()
from PyQt4.QtGui import QErrorMessage
diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index d5a8de7b67..635a037482 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -656,7 +656,7 @@ def select_cover(self, *args):
try:
cf = open(_file, "rb")
cover = cf.read()
- except IOError, e:
+ except IOError as e:
d = error_dialog(self, _('Error reading file'),
_("
There was an error reading from file:
")
+ _file + "
"+str(e))
diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py
index 461f56b60c..7a7f49dabf 100644
--- a/src/calibre/gui2/metadata/bulk_download.py
+++ b/src/calibre/gui2/metadata/bulk_download.py
@@ -88,7 +88,7 @@ def __init__(self, db, ids, get_covers, set_metadata=True,
def run(self):
try:
self._run()
- except Exception, e:
+ except Exception as e:
self.exception = e
self.tb = traceback.format_exc()
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index 3b6dd0e253..5b17b454e7 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -303,7 +303,7 @@ def apply_changes(self):
return False
self.books_to_refresh |= getattr(widget, 'books_to_refresh',
set([]))
- except IOError, err:
+ except IOError as err:
if err.errno == 13: # Permission denied
import traceback
fname = err.filename if err.filename else 'file'
diff --git a/src/calibre/gui2/notify.py b/src/calibre/gui2/notify.py
index 501f7007eb..947d98f1a4 100644
--- a/src/calibre/gui2/notify.py
+++ b/src/calibre/gui2/notify.py
@@ -34,7 +34,7 @@ def __init__(self, server, path, interface):
import dbus
self.dbus = dbus
self._notify = dbus.Interface(dbus.SessionBus().get_object(server, path), interface)
- except Exception, err:
+ except Exception as err:
self.ok = False
self.err = str(err)
diff --git a/src/calibre/gui2/preferences/conversion.py b/src/calibre/gui2/preferences/conversion.py
index 8de9ee1661..b5240227d3 100644
--- a/src/calibre/gui2/preferences/conversion.py
+++ b/src/calibre/gui2/preferences/conversion.py
@@ -5,6 +5,8 @@
__copyright__ = '2010, Kovid Goyal '
__docformat__ = 'restructuredtext en'
+import importlib
+
from PyQt4.Qt import QIcon, Qt, QStringListModel, QVariant
from calibre.gui2.preferences import ConfigWidgetBase, test_widget, AbortCommit
@@ -104,8 +106,8 @@ def load_conversion_widgets(self):
for plugin in output_format_plugins():
name = plugin.name.lower().replace(' ', '_')
try:
- output_widget = __import__('calibre.gui2.convert.'+name,
- fromlist=[1])
+ output_widget = importlib.import_module(
+ 'calibre.gui2.convert.'+name)
pw = output_widget.PluginWidget
self.conversion_widgets.append(pw)
except ImportError:
diff --git a/src/calibre/gui2/preferences/create_custom_column.py b/src/calibre/gui2/preferences/create_custom_column.py
index cee34f150e..f476845f8b 100644
--- a/src/calibre/gui2/preferences/create_custom_column.py
+++ b/src/calibre/gui2/preferences/create_custom_column.py
@@ -63,7 +63,7 @@ def __init__(self, parent, editing, standard_colheads, standard_colnames):
for col, name in [('isbn', _('ISBN')), ('formats', _('Formats')),
('last_modified', _('Modified Date')), ('yesno', _('Yes/No')),
('tags', _('Tags')), ('series', _('Series')), ('rating',
- _('Rating'))]:
+ _('Rating')), ('people', _("People's names"))]:
text += ' %s,'%(col, name)
text = text[:-1]
self.shortcuts.setText(text)
@@ -125,6 +125,8 @@ def __init__(self, parent, editing, standard_colheads, standard_colnames):
self.datatype_changed()
if ct in ['text', 'composite', 'enumeration']:
self.use_decorations.setChecked(c['display'].get('use_decorations', False))
+ elif ct == '*text':
+ self.is_names.setChecked(c['display'].get('is_names', False))
self.exec_()
def shortcut_activated(self, url):
@@ -134,6 +136,7 @@ def shortcut_activated(self, url):
'tags' : 1,
'series': 3,
'rating': 8,
+ 'people': 1,
}.get(which, 10))
self.column_name_box.setText(which)
self.column_heading_box.setText({
@@ -143,7 +146,9 @@ def shortcut_activated(self, url):
'tags': _('My Tags'),
'series': _('My Series'),
'rating': _('My Rating'),
- 'last_modified':_('Modified Date')}[which])
+ 'last_modified':_('Modified Date'),
+ 'people': _('People')}[which])
+ self.is_names.setChecked(which == 'people')
if self.composite_box.isVisible():
self.composite_box.setText(
{
@@ -153,7 +158,6 @@ def shortcut_activated(self, url):
}[which])
self.composite_sort_by.setCurrentIndex(2 if which == 'last_modified' else 0)
-
def datatype_changed(self, *args):
try:
col_type = self.column_types[self.column_type_box.currentIndex()]['datatype']
@@ -167,6 +171,7 @@ def datatype_changed(self, *args):
for x in ('box', 'default_label', 'label'):
getattr(self, 'enum_'+x).setVisible(col_type == 'enumeration')
self.use_decorations.setVisible(col_type in ['text', 'composite', 'enumeration'])
+ self.is_names.setVisible(col_type == '*text')
def accept(self):
col = unicode(self.column_name_box.text()).strip()
@@ -241,6 +246,8 @@ def accept(self):
return self.simple_error('', _('The value "{0}" is in the '
'list more than once').format(l[i]))
display_dict = {'enum_values': l}
+ elif col_type == 'text' and is_multiple:
+ display_dict = {'is_names': self.is_names.isChecked()}
if col_type in ['text', 'composite', 'enumeration']:
display_dict['use_decorations'] = self.use_decorations.checkState()
diff --git a/src/calibre/gui2/preferences/create_custom_column.ui b/src/calibre/gui2/preferences/create_custom_column.ui
index 3290d3c846..619b0c6212 100644
--- a/src/calibre/gui2/preferences/create_custom_column.ui
+++ b/src/calibre/gui2/preferences/create_custom_column.ui
@@ -9,7 +9,7 @@
0
0
- 603
+ 831
344
@@ -110,27 +110,37 @@
-
-
- Show checkmarks
-
Show check marks in the GUI. Values of 'yes', 'checked', and 'true'
will show a green check. Values of 'no', 'unchecked', and 'false' will show a red X.
Everything else will show nothing.
+
+ Show checkmarks
+
+
+
+ -
+
+
+ Check this box if this column contains names, like the authors column.
+
+
+ Contains names
+
-
-
- Qt::Horizontal
-
10
0
+
+ Qt::Horizontal
+
20
@@ -241,25 +251,25 @@ Everything else will show nothing.
-
-
- Show in tags browser
-
If checked, this column will appear in the tags browser as a category
+
+ Show in tags browser
+
-
-
- Qt::Horizontal
-
10
0
+
+ Qt::Horizontal
+
20
diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py
index 206f2b97fb..a2d2236039 100644
--- a/src/calibre/gui2/preferences/look_feel.py
+++ b/src/calibre/gui2/preferences/look_feel.py
@@ -64,8 +64,9 @@ def genesis(self, gui):
r('tags_browser_collapse_at', gprefs)
choices = set([k for k in db.field_metadata.all_field_keys()
- if db.field_metadata[k]['is_category'] and
- db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']])
+ if db.field_metadata[k]['is_category'] and
+ (db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']) and
+ not db.field_metadata[k]['display'].get('is_names', False)])
choices -= set(['authors', 'publisher', 'formats', 'news', 'identifiers'])
choices |= set(['search'])
self.opt_categories_using_hierarchy.update_items_cache(choices)
diff --git a/src/calibre/gui2/preferences/plugboard.py b/src/calibre/gui2/preferences/plugboard.py
index e1dc6b03bd..8f2b084d76 100644
--- a/src/calibre/gui2/preferences/plugboard.py
+++ b/src/calibre/gui2/preferences/plugboard.py
@@ -251,7 +251,7 @@ def ok_clicked(self):
if d != 0:
try:
validation_formatter.validate(s)
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid template'),
'
'+_('The template %s is invalid:')%s + \
'
'+str(err), show=True)
diff --git a/src/calibre/gui2/preferences/save_template.py b/src/calibre/gui2/preferences/save_template.py
index 4c00a14c0f..96ca8c8945 100644
--- a/src/calibre/gui2/preferences/save_template.py
+++ b/src/calibre/gui2/preferences/save_template.py
@@ -57,7 +57,7 @@ def validate(self):
return question_dialog(self, _('Constant template'),
_('The template contains no {fields}, so all '
'books will have the same name. Is this OK?'))
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid template'),
'
'+_('The template %s is invalid:')%tmpl + \
'
'+str(err), show=True)
diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py
index 34fa3a8b10..6b1ce2f851 100644
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@@ -658,8 +658,7 @@ def category_data(self, role):
def tag_data(self, role):
tag = self.tag
- if tag.category == 'authors' and \
- tweaks['categories_use_field_for_author_name'] == 'author_sort':
+ if tag.use_sort_as_name:
name = tag.sort
tt_author = True
else:
@@ -1275,6 +1274,7 @@ def process_one_node(category, state_map): # {{{
if len(components) == 0 or '.'.join(components) != tag.original_name:
components = [tag.original_name]
if (not tag.is_hierarchical) and (in_uc or
+ (fm['is_custom'] and fm['display'].get('is_names', False)) or
key in ['authors', 'publisher', 'news', 'formats', 'rating'] or
key not in self.db.prefs.get('categories_using_hierarchy', []) or
len(components) == 1):
diff --git a/src/calibre/gui2/viewer/dictionary.py b/src/calibre/gui2/viewer/dictionary.py
index dad8d1821c..d5dd4d0a86 100644
--- a/src/calibre/gui2/viewer/dictionary.py
+++ b/src/calibre/gui2/viewer/dictionary.py
@@ -36,7 +36,7 @@ def define(self):
def run(self):
try:
self.define()
- except Exception, e:
+ except Exception as e:
import traceback
self.exception = e
self.traceback = traceback.format_exc()
diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py
index c570a6e159..ea0509b51a 100644
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@@ -97,7 +97,7 @@ def initialize(self, defaults=False):
def do_test(self):
try:
pat = self.pattern()
- except Exception, err:
+ except Exception as err:
error_dialog(self, _('Invalid regular expression'),
_('Invalid regular expression: %s')%err).exec_()
return
diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py
index c629b10b5d..a32347dc72 100644
--- a/src/calibre/gui2/wizard/__init__.py
+++ b/src/calibre/gui2/wizard/__init__.py
@@ -565,7 +565,7 @@ def move_library(oldloc, newloc, parent, callback_on_complete):
# Try to load existing library at new location
try:
LibraryDatabase2(newloc)
- except Exception, err:
+ except Exception as err:
det = traceback.format_exc()
error_dialog(parent, _('Invalid database'),
_('
An invalid library already exists at '
@@ -577,7 +577,7 @@ def move_library(oldloc, newloc, parent, callback_on_complete):
else:
callback(newloc)
return
- except Exception, err:
+ except Exception as err:
det = traceback.format_exc()
error_dialog(parent, _('Could not move library'),
unicode(err), det, show=True)
diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 19ef7e213c..e5864ceaaf 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -15,7 +15,7 @@
from calibre.utils.date import parse_date, now, UNDEFINED_DATE
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.utils.pyparsing import ParseException
-from calibre.ebooks.metadata import title_sort
+from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre import prints
@@ -1023,7 +1023,11 @@ def itervals(self, record):
if val:
sep = fm['is_multiple']
if sep:
- val = sep.join(sorted(val.split(sep),
+ if fm['display'].get('is_names', False):
+ val = sep.join(
+ [author_to_author_sort(v) for v in val.split(sep)])
+ else:
+ val = sep.join(sorted(val.split(sep),
key=self.string_sort_key))
val = self.string_sort_key(val)
diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py
index dec55f2b02..48960ac871 100644
--- a/src/calibre/library/custom_columns.py
+++ b/src/calibre/library/custom_columns.py
@@ -117,7 +117,7 @@ def adapt_text(x, d):
if x is None:
return []
if isinstance(x, (str, unicode, bytes)):
- x = x.split(',')
+ x = x.split('&' if d['display'].get('is_names', False) else',')
x = [y.strip() for y in x if y.strip()]
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
unicode) else y for y in x]
@@ -482,8 +482,11 @@ def _set_custom(self, id_, val, label=None, num=None, append=False,
set_val = val if data['is_multiple'] else [val]
existing = getter()
if not existing:
- existing = []
- for x in set(set_val) - set(existing):
+ existing = set([])
+ else:
+ existing = set(existing)
+ # preserve the order in set_val
+ for x in [v for v in set_val if v not in existing]:
# normalized types are text and ratings, so we can do this check
# to see if we need to re-add the value
if not x:
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index e751d4d522..b23c8ff4a4 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -48,7 +48,7 @@ class Tag(object):
def __init__(self, name, id=None, count=0, state=0, avg=0, sort=None,
tooltip=None, icon=None, category=None, id_set=None,
- is_editable = True, is_searchable=True):
+ is_editable = True, is_searchable=True, use_sort_as_name=False):
self.name = self.original_name = name
self.id = id
self.count = count
@@ -59,6 +59,7 @@ def __init__(self, name, id=None, count=0, state=0, avg=0, sort=None,
self.id_set = id_set if id_set is not None else set([])
self.avg_rating = avg/2.0 if avg is not None else 0
self.sort = sort
+ self.use_sort_as_name = use_sort_as_name
if self.avg_rating > 0:
if tooltip:
tooltip = tooltip + ': '
@@ -1323,6 +1324,11 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
for l in list:
(id, val) = (l[0], l[1])
tids[category][val] = (id, '{0:05.2f}'.format(val))
+ elif cat['datatype'] == 'text' and cat['is_multiple'] and \
+ cat['display'].get('is_names', False):
+ for l in list:
+ (id, val) = (l[0], l[1])
+ tids[category][val] = (id, author_to_author_sort(val))
else:
for l in list:
(id, val) = (l[0], l[1])
@@ -1480,11 +1486,20 @@ def get_categories(self, sort='name', ids=None, icon_map=None):
reverse=True
items.sort(key=kf, reverse=reverse)
+ if tweaks['categories_use_field_for_author_name'] == 'author_sort' and\
+ (category == 'authors' or
+ (cat['display'].get('is_names', False) and
+ cat['is_custom'] and cat['is_multiple'] and
+ cat['datatype'] == 'text')):
+ use_sort_as_name = True
+ else:
+ use_sort_as_name = False
is_editable = category not in ['news', 'rating']
categories[category] = [tag_class(formatter(r.n), count=r.c, id=r.id,
avg=avgr(r), sort=r.s, icon=icon,
tooltip=tooltip, category=category,
- id_set=r.id_set, is_editable=is_editable)
+ id_set=r.id_set, is_editable=is_editable,
+ use_sort_as_name=use_sort_as_name)
for r in items]
#print 'end phase "tags list":', time.clock() - last, 'seconds'
diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py
index 83d395dec5..dba6abbfa5 100644
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@@ -222,7 +222,7 @@ def start(self):
# cherrypy.engine.signal_handler.subscribe()
cherrypy.engine.block()
- except Exception, e:
+ except Exception as e:
self.exception = e
finally:
self.is_running = False
diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py
index f1d9b9785c..895fbb06e9 100644
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@@ -15,7 +15,7 @@
prepare_string_for_xml
from calibre.utils.ordered_dict import OrderedDict
from calibre.utils.filenames import ascii_filename
-from calibre.utils.config import prefs, tweaks
+from calibre.utils.config import prefs
from calibre.utils.icu import sort_key
from calibre.utils.magick import Image
from calibre.library.comments import comments_to_html
@@ -155,8 +155,7 @@ def item(i):
'
{1}
'
'{2}
')
rating, rstring = render_rating(i.avg_rating, prefix)
- if i.category == 'authors' and \
- tweaks['categories_use_field_for_author_name'] == 'author_sort':
+ if i.use_sort_as_name:
name = xml(i.sort)
else:
name = xml(i.name)
@@ -696,7 +695,10 @@ def browse_get_book_args(self, mi, id_, add_category_links=False):
xml(href, True),
xml(val if len(dbtags) == 1 else tag.name),
xml(key, True)))
- join = ' & ' if key == 'authors' else ', '
+ join = ' & ' if key == 'authors' or \
+ (fm['is_custom'] and
+ fm['display'].get('is_names', False)) \
+ else ', '
args[key] = join.join(vals)
added_key = True
if not added_key:
diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py
index 11ea2b951e..919f5a7969 100644
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@@ -169,7 +169,7 @@ def get_cover(self, id, thumbnail=False, thumb_width=60, thumb_height=80):
return cover
return save_cover_data_to(img, 'img.jpg', return_data=True,
resize_to=(width, height))
- except Exception, err:
+ except Exception as err:
import traceback
cherrypy.log.error('Failed to generate cover:')
cherrypy.log.error(traceback.print_exc())
diff --git a/src/calibre/library/server/main.py b/src/calibre/library/server/main.py
index e4de710c6a..3a6f918022 100644
--- a/src/calibre/library/server/main.py
+++ b/src/calibre/library/server/main.py
@@ -69,7 +69,7 @@ def daemonize(stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
if pid > 0:
# exit first parent
sys.exit(0)
- except OSError, e:
+ except OSError as e:
print >>sys.stderr, "fork #1 failed: %d (%s)" % (e.errno, e.strerror)
sys.exit(1)
@@ -84,7 +84,7 @@ def daemonize(stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
if pid > 0:
# exit from second parent
sys.exit(0)
- except OSError, e:
+ except OSError as e:
print >>sys.stderr, "fork #2 failed: %d (%s)" % (e.errno, e.strerror)
sys.exit(1)
diff --git a/src/calibre/library/server/opds.py b/src/calibre/library/server/opds.py
index e7fdffbbbb..bdd35c16f1 100644
--- a/src/calibre/library/server/opds.py
+++ b/src/calibre/library/server/opds.py
@@ -22,7 +22,6 @@
from calibre import guess_type, prepare_string_for_xml as xml
from calibre.utils.icu import sort_key
from calibre.utils.ordered_dict import OrderedDict
-from calibre.utils.config import tweaks
BASE_HREFS = {
0 : '/stanza',
@@ -126,8 +125,7 @@ def CATALOG_ENTRY(item, item_kind, base_href, version, updated,
count = (_('%d books') if item.count > 1 else _('%d book'))%item.count
if ignore_count:
count = ''
- if item.category == 'authors' and \
- tweaks['categories_use_field_for_author_name'] == 'author_sort':
+ if item.use_sort_as_name:
name = item.sort
else:
name = item.name
diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py
index 2075ab5880..511106fe7b 100644
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@@ -193,7 +193,7 @@ def load_c_extensions(conn, debug=DEBUG):
conn.load_extension(ext_path)
conn.enable_load_extension(False)
return True
- except Exception, e:
+ except Exception as e:
if debug:
print 'Failed to load high performance sqlite C extension'
print e
@@ -247,14 +247,14 @@ def run(self):
if func == 'dump':
try:
ok, res = True, tuple(self.conn.iterdump())
- except Exception, err:
+ except Exception as err:
ok, res = False, (err, traceback.format_exc())
elif func == 'create_dynamic_filter':
try:
f = DynamicFilter(args[0])
self.conn.create_function(args[0], 1, f)
ok, res = True, f
- except Exception, err:
+ except Exception as err:
ok, res = False, (err, traceback.format_exc())
else:
bfunc = getattr(self.conn, func)
@@ -263,7 +263,7 @@ def run(self):
try:
ok, res = True, bfunc(*args, **kwargs)
break
- except OperationalError, err:
+ except OperationalError as err:
# Retry if unable to open db file
e = str(err)
if 'unable to open' not in e or i == 2:
@@ -273,10 +273,10 @@ def run(self):
reprlib.repr(kwargs))
raise
time.sleep(0.5)
- except Exception, err:
+ except Exception as err:
ok, res = False, (err, traceback.format_exc())
self.results.put((ok, res))
- except Exception, err:
+ except Exception as err:
self.unhandled_error = (err, traceback.format_exc())
class DatabaseException(Exception):
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 22f8af56c2..dfab13e3b8 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -3,7 +3,7 @@
''' Post installation script for linux '''
-import sys, os, cPickle, textwrap, stat
+import sys, os, cPickle, textwrap, stat, importlib
from subprocess import check_call
from calibre import __appname__, prints, guess_type
@@ -59,7 +59,7 @@
shutil.rmtree(x)
else:
os.unlink(x)
- except Exception, e:
+ except Exception as e:
print 'Failed to delete', x
print '\t', e
@@ -285,7 +285,7 @@ def setup_completion(self): # {{{
complete -o nospace -C calibre-complete ebook-convert
'''))
- except TypeError, err:
+ except TypeError as err:
if 'resolve_entities' in str(err):
print 'You need python-lxml >= 2.0.5 for calibre'
sys.exit(1)
@@ -309,7 +309,7 @@ def install_man_pages(self): # {{{
for src in entry_points['console_scripts']:
prog, right = src.split('=')
prog = prog.strip()
- module = __import__(right.split(':')[0].strip(), fromlist=['a'])
+ module = importlib.import_module(right.split(':')[0].strip())
parser = getattr(module, 'option_parser', None)
if parser is None:
continue
diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index 948611f775..97ef32e9d4 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -493,7 +493,16 @@ Most purchased EPUB books have `DRM `_. Thi
I am getting a "Permission Denied" error?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A permission denied error can occur because of many possible reasons, none of them having anything to do with |app|. You can get permission denied errors if you are using an SD card with write protect enabled. Or if you, or some program you used changed the file permissions of the files in question to read only. Or if there is a filesystem error on the device which caused your operating system to mount the filesystem in read only mode or mark a particular file as read only pending recovery. Or if the files have their owner set to a user other than you. Or if your file is open in another program. You will need to fix the underlying cause of the permissions error before resuming to use |app|. Read the error message carefully, see what file it points to and fix the permissions on that file.
+A permission denied error can occur because of many possible reasons, none of them having anything to do with |app|.
+
+ * You can get permission denied errors if you are using an SD card with write protect enabled.
+ * If you, or some program you used changed the file permissions of the files in question to read only.
+ * If there is a filesystem error on the device which caused your operating system to mount the filesystem in read only mode or mark a particular file as read only pending recovery.
+ * If the files have their owner set to a user other than you.
+ * If your file is open in another program.
+ * If the file resides on a device, you may have reached the limit of a maximum of 256 files in the root of the device. In this case you need to reformat the device/sd card referered to in the error message with a FAT32 filesystem, or delete some files from the SD card/device memory.
+
+You will need to fix the underlying cause of the permissions error before resuming to use |app|. Read the error message carefully, see what file it points to and fix the permissions on that file.
Can I have the comment metadata show up on my reader?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -510,7 +519,7 @@ You have two choices:
How is |app| licensed?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode `_. You are free to use the results of conversions from |app| however you want. You cannot use code, libraries from |app| in your software without maing your software open source. For details, see `The GNU GPL v3 http://www.gnu.org/licenses/gpl.html`_.
+|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode `_. You are free to use the results of conversions from |app| however you want. You cannot use code, libraries from |app| in your software without maing your software open source. For details, see `The GNU GPL v3 `_.
How do I run calibre from my USB stick?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/src/calibre/manual/news.rst b/src/calibre/manual/news.rst
index d0838ccb0f..ed306a168e 100644
--- a/src/calibre/manual/news.rst
+++ b/src/calibre/manual/news.rst
@@ -137,7 +137,7 @@ to the recipe. Finally, lets replace some of the :term:`CSS` that we disabled ea
With these additions, our recipe has become "production quality", indeed it is very close to the actual recipe used by |app| for the *BBC*, shown below:
-.. literalinclude:: ../../../resources/recipes/bbc.recipe
+.. literalinclude:: ../../../recipes/bbc.recipe
This :term:`recipe` explores only the tip of the iceberg when it comes to the power of |app|. To explore more of the abilities of |app| we'll examine a more complex real life example in the next section.
diff --git a/src/calibre/manual/sub_groups.rst b/src/calibre/manual/sub_groups.rst
index c27b3581f8..e5a433dce9 100644
--- a/src/calibre/manual/sub_groups.rst
+++ b/src/calibre/manual/sub_groups.rst
@@ -105,8 +105,8 @@ After creating the saved search, you can use it as a restriction.
.. image:: images/sg_restrict2.jpg
:align: center
- Useful Template Functions
- -------------------------
+Useful Template Functions
+-------------------------
You might want to use the genre information in a template, such as with save to disk or send to device. The question might then be "How do I get the outermost genre name or names?" An |app| template function, subitems, is provided to make doing this easier.
@@ -114,4 +114,4 @@ After creating the saved search, you can use it as a restriction.
{#genre:subitems(0,1)||/}{title} - {authors}
-See :ref:`The |app| template language ` for more information templates and the subitem function.
\ No newline at end of file
+See :ref:`The |app| template language ` for more information templates and the subitem function.
diff --git a/src/calibre/utils/Zeroconf.py b/src/calibre/utils/Zeroconf.py
index f4a7119d16..fbb9b4e71f 100755
--- a/src/calibre/utils/Zeroconf.py
+++ b/src/calibre/utils/Zeroconf.py
@@ -863,7 +863,7 @@ def run(self):
for socket in rr:
try:
self.readers[socket].handle_read()
- except NonLocalNameException, err:
+ except NonLocalNameException as err:
print err
except UnicodeDecodeError:
if DEBUG:
diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py
index 740e67bee8..2e40275beb 100644
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@@ -316,7 +316,7 @@ def safe_format(self, fmt, kwargs, error_value, book):
self.locals = {}
try:
ans = self.vformat(fmt, [], kwargs).strip()
- except Exception, e:
+ except Exception as e:
if DEBUG:
traceback.print_exc()
ans = error_value + ' ' + e.message
diff --git a/src/calibre/utils/ipc/worker.py b/src/calibre/utils/ipc/worker.py
index e187235a9e..9594f64ae4 100644
--- a/src/calibre/utils/ipc/worker.py
+++ b/src/calibre/utils/ipc/worker.py
@@ -6,7 +6,7 @@
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import os, cPickle, sys
+import os, cPickle, sys, importlib
from multiprocessing.connection import Client
from threading import Thread
from Queue import Queue
@@ -75,7 +75,7 @@ def run(self):
def get_func(name):
module, func, notification = PARALLEL_FUNCS[name]
- module = __import__(module, fromlist=[1])
+ module = importlib.import_module(module)
func = getattr(module, func)
return func, notification
diff --git a/src/calibre/utils/lock.py b/src/calibre/utils/lock.py
index 5098c78f90..0b66be963b 100644
--- a/src/calibre/utils/lock.py
+++ b/src/calibre/utils/lock.py
@@ -32,7 +32,7 @@ def __init__(self, path, timeout=20):
None, #No template file
)
break
- except pywintypes.error, err:
+ except pywintypes.error as err:
if getattr(err, 'args', [-1])[0] in (0x20, 0x21):
time.sleep(1)
continue
diff --git a/src/calibre/utils/pdftk.py b/src/calibre/utils/pdftk.py
index 1263b60306..f4fcb8a2e3 100644
--- a/src/calibre/utils/pdftk.py
+++ b/src/calibre/utils/pdftk.py
@@ -56,7 +56,7 @@ def set_metadata(stream, mi):
try:
p.wait()
break
- except OSError, e:
+ except OSError as e:
if e.errno == errno.EINTR:
continue
else:
diff --git a/src/calibre/utils/smtp.py b/src/calibre/utils/smtp.py
index 744021f911..81936a8f71 100644
--- a/src/calibre/utils/smtp.py
+++ b/src/calibre/utils/smtp.py
@@ -76,7 +76,7 @@ def sendmail_direct(from_, to, msg, timeout, localhost, verbose,
s.connect(host, 25)
s.sendmail(from_, [to], msg)
return s.quit()
- except Exception, e:
+ except Exception as e:
last_error, last_traceback = e, traceback.format_exc()
if last_error is not None:
print last_traceback
diff --git a/src/calibre/web/feeds/feedparser.py b/src/calibre/web/feeds/feedparser.py
index ead9207b70..99c3e09666 100755
--- a/src/calibre/web/feeds/feedparser.py
+++ b/src/calibre/web/feeds/feedparser.py
@@ -6,12 +6,11 @@
Visit http://feedparser.org/ for the latest version
Visit http://feedparser.org/docs/ for the latest documentation
-Required: Python 2.1 or later
-Recommended: Python 2.3 or later
+Required: Python 2.4 or later
Recommended: CJKCodecs and iconv_codec
"""
-__version__ = "4.2-pre-" + "$Revision: 316 $"[11:14] + "-svn"
+__version__ = "5.0.1"
__license__ = """Copyright (c) 2002-2008, Mark Pilgrim, All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@@ -42,14 +41,14 @@
"Kevin Marks ",
"Sam Ruby ",
"Ade Oshineye ",
- "Martin Pool "]
+ "Martin Pool ",
+ "Kurt McKee "]
_debug = 0
# HTTP "User-Agent" header to send to servers when downloading feeds.
# If you are embedding feedparser in a larger application, you should
# change this to your application name and URL.
-USER_AGENT = 'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4' # Changed by Kovid
-
+USER_AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11' # Changed by Kovid
# HTTP "Accept" header to send to servers when downloading feeds. If you don't
# want to send an Accept header, set this to None.
ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"
@@ -76,12 +75,73 @@
# HTML content, set this to 1.
SANITIZE_HTML = 1
-# ---------- required modules (should come with any Python distribution) ----------
-import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2
+# ---------- Python 3 modules (make it work if possible) ----------
try:
- from cStringIO import StringIO as _StringIO
+ import rfc822
+except ImportError:
+ from email import _parseaddr as rfc822
+
+try:
+ # Python 3.1 introduces bytes.maketrans and simultaneously
+ # deprecates string.maketrans; use bytes.maketrans if possible
+ _maketrans = bytes.maketrans
+except (NameError, AttributeError):
+ import string
+ _maketrans = string.maketrans
+
+# base64 support for Atom feeds that contain embedded binary data
+try:
+ import base64, binascii
+ # Python 3.1 deprecates decodestring in favor of decodebytes
+ _base64decode = getattr(base64, 'decodebytes', base64.decodestring)
except:
- from StringIO import StringIO as _StringIO
+ base64 = binascii = None
+
+def _s2bytes(s):
+ # Convert a UTF-8 str to bytes if the interpreter is Python 3
+ try:
+ return bytes(s, 'utf8')
+ except (NameError, TypeError):
+ # In Python 2.5 and below, bytes doesn't exist (NameError)
+ # In Python 2.6 and above, bytes and str are the same (TypeError)
+ return s
+
+def _l2bytes(l):
+ # Convert a list of ints to bytes if the interpreter is Python 3
+ try:
+ if bytes is not str:
+ # In Python 2.6 and above, this call won't raise an exception
+ # but it will return bytes([65]) as '[65]' instead of 'A'
+ return bytes(l)
+ raise NameError
+ except NameError:
+ return ''.join(map(chr, l))
+
+# If you want feedparser to allow all URL schemes, set this to ()
+# List culled from Python's urlparse documentation at:
+# http://docs.python.org/library/urlparse.html
+# as well as from "URI scheme" at Wikipedia:
+# https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
+# Many more will likely need to be added!
+ACCEPTABLE_URI_SCHEMES = (
+ 'file', 'ftp', 'gopher', 'h323', 'hdl', 'http', 'https', 'imap', 'mailto',
+ 'mms', 'news', 'nntp', 'prospero', 'rsync', 'rtsp', 'rtspu', 'sftp',
+ 'shttp', 'sip', 'sips', 'snews', 'svn', 'svn+ssh', 'telnet', 'wais',
+ # Additional common-but-unofficial schemes
+ 'aim', 'callto', 'cvs', 'facetime', 'feed', 'git', 'gtalk', 'irc', 'ircs',
+ 'irc6', 'itms', 'mms', 'msnim', 'skype', 'ssh', 'smb', 'svn', 'ymsg',
+)
+#ACCEPTABLE_URI_SCHEMES = ()
+
+# ---------- required modules (should come with any Python distribution) ----------
+import sgmllib, re, sys, copy, urlparse, time, types, cgi, urllib, urllib2, datetime
+try:
+ from io import BytesIO as _StringIO
+except ImportError:
+ try:
+ from cStringIO import StringIO as _StringIO
+ except:
+ from StringIO import StringIO as _StringIO
# ---------- optional modules (feedparser will work without these, but with reduced functionality) ----------
@@ -114,12 +174,6 @@ def _xmlescape(data,entities={}):
data = data.replace(char, entity)
return data
-# base64 support for Atom feeds that contain embedded binary data
-try:
- import base64, binascii
-except:
- base64 = binascii = None
-
# cjkcodecs and iconv_codec provide support for more character encodings.
# Both are available from http://cjkpython.i18n.org/
try:
@@ -172,17 +226,27 @@ class UndeclaredNamespace(Exception): pass
sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
sgmllib.special = re.compile(']|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''')
+ class EndBracketRegEx:
+ def __init__(self):
+ # Overriding the built-in sgmllib.endbracket regex allows the
+ # parser to find angle brackets embedded in element attributes.
+ self.endbracket = re.compile('''([^'"<>]|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''')
def search(self,string,index=0):
- self.match = self.endbracket.match(string,index)
- if self.match: return self
- def start(self,n):
+ match = self.endbracket.match(string,index)
+ if match is not None:
+ # Returning a new object in the calling thread's context
+ # resolves a thread-safety.
+ return EndBracketMatch(match)
+ return None
+ class EndBracketMatch:
+ def __init__(self, match):
+ self.match = match
+ def start(self, n):
return self.match.end(n)
- sgmllib.endbracket = EndBracketMatch()
+ sgmllib.endbracket = EndBracketRegEx()
SUPPORTED_VERSIONS = {'': 'unknown',
'rss090': 'RSS 0.90',
@@ -220,7 +284,7 @@ class FeedParserDict(UserDict):
'guid': 'id',
'date': 'updated',
'date_parsed': 'updated_parsed',
- 'description': ['subtitle', 'summary'],
+ 'description': ['summary', 'subtitle'],
'url': ['href'],
'modified': 'updated',
'modified_parsed': 'updated_parsed',
@@ -245,9 +309,9 @@ def __getitem__(self, key):
realkey = self.keymap.get(key, key)
if type(realkey) == types.ListType:
for k in realkey:
- if UserDict.has_key(self, k):
+ if UserDict.__contains__(self, k):
return UserDict.__getitem__(self, k)
- if UserDict.has_key(self, key):
+ if UserDict.__contains__(self, key):
return UserDict.__getitem__(self, key)
return UserDict.__getitem__(self, realkey)
@@ -272,9 +336,12 @@ def setdefault(self, key, value):
def has_key(self, key):
try:
- return hasattr(self, key) or UserDict.has_key(self, key)
+ return hasattr(self, key) or UserDict.__contains__(self, key)
except AttributeError:
return False
+ # This alias prevents the 2to3 tool from changing the semantics of the
+ # __contains__ function below and exhausting the maximum recursion depth
+ __has_key = has_key
def __getattr__(self, key):
try:
@@ -294,7 +361,7 @@ def __setattr__(self, key, value):
return self.__setitem__(key, value)
def __contains__(self, key):
- return self.has_key(key)
+ return self.__has_key(key)
def zopeCompatibilityHack():
global FeedParserDict
@@ -327,9 +394,8 @@ def _ebcdic_to_ascii(s):
92,159,83,84,85,86,87,88,89,90,244,245,246,247,248,249,
48,49,50,51,52,53,54,55,56,57,250,251,252,253,254,255
)
- import string
- _ebcdic_to_ascii_map = string.maketrans( \
- ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+ _ebcdic_to_ascii_map = _maketrans( \
+ _l2bytes(range(256)), _l2bytes(emap))
return s.translate(_ebcdic_to_ascii_map)
_cp1252 = {
@@ -483,6 +549,10 @@ def unknown_starttag(self, tag, attrs):
# normalize attrs
attrs = [(k.lower(), v) for k, v in attrs]
attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]
+ # the sgml parser doesn't handle entities in attributes, but
+ # strict xml parsers do -- account for this difference
+ if isinstance(self, _LooseFeedParser):
+ attrs = [(k, v.replace('&', '&')) for k, v in attrs]
# track xml:base and xml:lang
attrsD = dict(attrs)
@@ -492,7 +562,12 @@ def unknown_starttag(self, tag, attrs):
baseuri = unicode(baseuri, self.encoding)
except:
baseuri = unicode(baseuri, 'iso-8859-1')
- self.baseuri = _urljoin(self.baseuri, baseuri)
+ # ensure that self.baseuri is always an absolute URI that
+ # uses a whitelisted URI scheme (e.g. not `javscript:`)
+ if self.baseuri:
+ self.baseuri = _makeSafeAbsoluteURI(self.baseuri, baseuri) or self.baseuri
+ else:
+ self.baseuri = _urljoin(self.baseuri, baseuri)
lang = attrsD.get('xml:lang', attrsD.get('lang'))
if lang == '':
# xml:lang could be explicitly set to '', we need to capture that
@@ -671,7 +746,7 @@ def parse_declaration(self, i):
def mapContentType(self, contentType):
contentType = contentType.lower()
- if contentType == 'text':
+ if contentType == 'text' or contentType == 'plain':
contentType = 'text/plain'
elif contentType == 'html':
contentType = 'text/html'
@@ -735,6 +810,11 @@ def pop(self, element, stripWhitespace=1):
else:
pieces = pieces[1:-1]
+ # Ensure each piece is a str for Python 3
+ for (i, v) in enumerate(pieces):
+ if not isinstance(v, basestring):
+ pieces[i] = v.decode('utf-8')
+
output = ''.join(pieces)
if stripWhitespace:
output = output.strip()
@@ -743,11 +823,15 @@ def pop(self, element, stripWhitespace=1):
# decode base64 content
if base64 and self.contentparams.get('base64', 0):
try:
- output = base64.decodestring(output)
+ output = _base64decode(output)
except binascii.Error:
pass
except binascii.Incomplete:
pass
+ except TypeError:
+ # In Python 3, base64 takes and outputs bytes, not str
+ # This may not be the most correct way to accomplish this
+ output = _base64decode(output.encode('utf-8')).decode('utf-8')
# resolve relative URIs
if (element in self.can_be_relative_uri) and output:
@@ -805,7 +889,7 @@ def pop(self, element, stripWhitespace=1):
# address common error where people take data that is already
# utf-8, presume that it is iso-8859-1, and re-encode it.
- if self.encoding=='utf-8' and type(output) == type(u''):
+ if self.encoding in ('utf-8', 'utf-8_INVALID_PYTHON_3') and type(output) == type(u''):
try:
output = unicode(output.encode('iso-8859-1'), 'utf-8')
except:
@@ -830,9 +914,14 @@ def pop(self, element, stripWhitespace=1):
contentparams['value'] = output
self.entries[-1][element].append(contentparams)
elif element == 'link':
- self.entries[-1][element] = output
- if output:
- self.entries[-1]['links'][-1]['href'] = output
+ if not self.inimage:
+ # query variables in urls in link elements are improperly
+ # converted from `?a=1&b=2` to `?a=1&b;=2` as if they're
+ # unhandled character references. fix this special case.
+ output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output)
+ self.entries[-1][element] = output
+ if output:
+ self.entries[-1]['links'][-1]['href'] = output
else:
if element == 'description':
element = 'summary'
@@ -847,6 +936,9 @@ def pop(self, element, stripWhitespace=1):
element = 'subtitle'
context[element] = output
if element == 'link':
+ # fix query variables; see above for the explanation
+ output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output)
+ context[element] = output
context['links'][-1]['href'] = output
elif self.incontent:
contentparams = copy.deepcopy(self.contentparams)
@@ -874,21 +966,21 @@ def popContent(self, tag):
# text, but this is routinely ignored. This is an attempt to detect
# the most common cases. As false positives often result in silent
# data loss, this function errs on the conservative side.
- def lookslikehtml(self, str):
+ def lookslikehtml(self, s):
if self.version.startswith('atom'): return
if self.contentparams.get('type','text/html') != 'text/plain': return
# must have a close tag or a entity reference to qualify
- if not (re.search(r'(\w+)>',str) or re.search("?\w+;",str)): return
+ if not (re.search(r'(\w+)>',s) or re.search("?\w+;",s)): return
# all tags must be in a restricted subset of valid HTML tags
if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements,
- re.findall(r'?(\w+)',str)): return
+ re.findall(r'?(\w+)',s)): return
# all entities must have been defined as valid HTML entities
from htmlentitydefs import entitydefs
if filter(lambda e: e not in entitydefs.keys(),
- re.findall(r'&(\w+);',str)): return
+ re.findall(r'&(\w+);',s)): return
return 1
@@ -929,9 +1021,12 @@ def _itsAnHrefDamnIt(self, attrsD):
attrsD['href'] = href
return attrsD
- def _save(self, key, value):
+ def _save(self, key, value, overwrite=False):
context = self._getContext()
- context.setdefault(key, value)
+ if overwrite:
+ context[key] = value
+ else:
+ context.setdefault(key, value)
def _start_rss(self, attrsD):
versionmap = {'0.91': 'rss091u',
@@ -988,7 +1083,8 @@ def _end_channel(self):
def _start_image(self, attrsD):
context = self._getContext()
- context.setdefault('image', FeedParserDict())
+ if not self.inentry:
+ context.setdefault('image', FeedParserDict())
self.inimage = 1
self.hasTitle = 0
self.push('image', 0)
@@ -1013,6 +1109,10 @@ def _end_textinput(self):
def _start_author(self, attrsD):
self.inauthor = 1
self.push('author', 1)
+ # Append a new FeedParserDict when expecting an author
+ context = self._getContext()
+ context.setdefault('authors', [])
+ context['authors'].append(FeedParserDict())
_start_managingeditor = _start_author
_start_dc_author = _start_author
_start_dc_creator = _start_author
@@ -1147,6 +1247,8 @@ def _save_author(self, key, value, prefix='author'):
context.setdefault(prefix + '_detail', FeedParserDict())
context[prefix + '_detail'][key] = value
self._sync_author_detail()
+ context.setdefault('authors', [FeedParserDict()])
+ context['authors'][-1][key] = value
def _save_contributor(self, key, value):
context = self._getContext()
@@ -1252,7 +1354,7 @@ def _start_published(self, attrsD):
def _end_published(self):
value = self.pop('published')
- self._save('published_parsed', _parse_date(value))
+ self._save('published_parsed', _parse_date(value), overwrite=True)
_end_dcterms_issued = _end_published
_end_issued = _end_published
@@ -1262,15 +1364,17 @@ def _start_updated(self, attrsD):
_start_dcterms_modified = _start_updated
_start_pubdate = _start_updated
_start_dc_date = _start_updated
+ _start_lastbuilddate = _start_updated
def _end_updated(self):
value = self.pop('updated')
parsed_value = _parse_date(value)
- self._save('updated_parsed', parsed_value)
+ self._save('updated_parsed', parsed_value, overwrite=True)
_end_modified = _end_updated
_end_dcterms_modified = _end_updated
_end_pubdate = _end_updated
_end_dc_date = _end_updated
+ _end_lastbuilddate = _end_updated
def _start_created(self, attrsD):
self.push('created', 1)
@@ -1278,14 +1382,14 @@ def _start_created(self, attrsD):
def _end_created(self):
value = self.pop('created')
- self._save('created_parsed', _parse_date(value))
+ self._save('created_parsed', _parse_date(value), overwrite=True)
_end_dcterms_created = _end_created
def _start_expirationdate(self, attrsD):
self.push('expired', 1)
def _end_expirationdate(self):
- self._save('expired_parsed', _parse_date(self.pop('expired')))
+ self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True)
def _start_cc_license(self, attrsD):
context = self._getContext()
@@ -1334,6 +1438,10 @@ def _start_category(self, attrsD):
_start_dc_subject = _start_category
_start_keywords = _start_category
+ def _start_media_category(self, attrsD):
+ attrsD.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema')
+ self._start_category(attrsD)
+
def _end_itunes_keywords(self):
for term in self.pop('itunes_keywords').split():
self._addTag(term, 'http://www.itunes.com/', None)
@@ -1354,6 +1462,7 @@ def _end_category(self):
_end_dc_subject = _end_category
_end_keywords = _end_category
_end_itunes_category = _end_category
+ _end_media_category = _end_category
def _start_cloud(self, attrsD):
self._getContext()['cloud'] = FeedParserDict(attrsD)
@@ -1368,11 +1477,10 @@ def _start_link(self, attrsD):
attrsD = self._itsAnHrefDamnIt(attrsD)
if attrsD.has_key('href'):
attrsD['href'] = self.resolveURI(attrsD['href'])
- if attrsD.get('rel')=='enclosure' and not context.get('id'):
- context['id'] = attrsD.get('href')
expectingText = self.infeed or self.inentry or self.insource
context.setdefault('links', [])
- context['links'].append(FeedParserDict(attrsD))
+ if not (self.inentry and self.inimage):
+ context['links'].append(FeedParserDict(attrsD))
if attrsD.has_key('href'):
expectingText = 0
if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
@@ -1498,9 +1606,6 @@ def _start_enclosure(self, attrsD):
context = self._getContext()
attrsD['rel']='enclosure'
context.setdefault('links', []).append(FeedParserDict(attrsD))
- href = attrsD.get('href')
- if href and not context.get('id'):
- context['id'] = href
def _start_source(self, attrsD):
if 'url' in attrsD:
@@ -1537,10 +1642,10 @@ def _start_content_encoded(self, attrsD):
_start_fullitem = _start_content_encoded
def _end_content(self):
- copyToDescription = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types)
+ copyToSummary = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types)
value = self.popContent('content')
- if copyToDescription:
- self._save('description', value)
+ if copyToSummary:
+ self._save('summary', value)
_end_body = _end_content
_end_xhtml_body = _end_content
@@ -1550,7 +1655,8 @@ def _end_content(self):
def _start_itunes_image(self, attrsD):
self.push('itunes_image', 0)
- self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')})
+ if attrsD.get('href'):
+ self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')})
_start_itunes_link = _start_itunes_image
def _end_itunes_block(self):
@@ -1559,7 +1665,10 @@ def _end_itunes_block(self):
def _end_itunes_explicit(self):
value = self.pop('itunes_explicit', 0)
- self._getContext()['itunes_explicit'] = (value == 'yes') and 1 or 0
+ # Convert 'yes' -> True, 'clean' to False, and any other value to None
+ # False and None both evaluate as False, so the difference can be ignored
+ # by applications that only need to know if the content is explicit.
+ self._getContext()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0]
def _start_media_content(self, attrsD):
context = self._getContext()
@@ -1588,6 +1697,17 @@ def _end_media_player(self):
context = self._getContext()
context['media_player']['content'] = value
+ def _start_newlocation(self, attrsD):
+ self.push('newlocation', 1)
+
+ def _end_newlocation(self):
+ url = self.pop('newlocation')
+ context = self._getContext()
+ # don't set newlocation if the context isn't right
+ if context is not self.feeddata:
+ return
+ context['newlocation'] = _makeSafeAbsoluteURI(self.baseuri, url.strip())
+
if _XML_AVAILABLE:
class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler):
def __init__(self, baseuri, baselang, encoding):
@@ -1689,9 +1809,9 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
'source', 'track', 'wbr'
]
- def __init__(self, encoding, type):
+ def __init__(self, encoding, _type):
self.encoding = encoding
- self.type = type
+ self._type = _type
if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding)
sgmllib.SGMLParser.__init__(self)
@@ -1708,7 +1828,7 @@ def _shorttag_replace(self, match):
def parse_starttag(self,i):
j=sgmllib.SGMLParser.parse_starttag(self, i)
- if self.type == 'application/xhtml+xml':
+ if self._type == 'application/xhtml+xml':
if j>2 and self.rawdata[j-2:j]=='/>':
self.unknown_endtag(self.lasttag)
return j
@@ -1719,8 +1839,14 @@ def feed(self, data):
data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
data = data.replace(''', "'")
data = data.replace('"', '"')
- if self.encoding and type(data) == type(u''):
- data = data.encode(self.encoding)
+ try:
+ bytes
+ if bytes is str:
+ raise NameError
+ self.encoding = self.encoding + '_INVALID_PYTHON_3'
+ except NameError:
+ if self.encoding and type(data) == type(u''):
+ data = data.encode(self.encoding)
sgmllib.SGMLParser.feed(self, data)
sgmllib.SGMLParser.close(self)
@@ -1749,7 +1875,11 @@ def unknown_starttag(self, tag, attrs):
value = unicode(value, self.encoding)
except:
value = unicode(value, 'iso-8859-1')
- uattrs.append((unicode(key, self.encoding), value))
+ try:
+ # Currently, in Python 3 the key is already a str, and cannot be decoded again
+ uattrs.append((unicode(key, self.encoding), value))
+ except TypeError:
+ uattrs.append((key, value))
strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs])
if self.encoding:
try:
@@ -1840,6 +1970,14 @@ def output(self):
'''Return processed HTML as a single string'''
return ''.join([str(p) for p in self.pieces])
+ def parse_declaration(self, i):
+ try:
+ return sgmllib.SGMLParser.parse_declaration(self, i)
+ except sgmllib.SGMLParseError:
+ # escape the doctype declaration and continue parsing
+ self.handle_data('<')
+ return i+1
+
class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor):
def __init__(self, baseuri, baselang, encoding, entities):
sgmllib.SGMLParser.__init__(self)
@@ -2019,10 +2157,10 @@ def findVCards(self, elmRoot, bAgentParsing=0):
arLines = []
def processSingleString(sProperty):
- sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1)
+ sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1).decode(self.encoding)
if sValue:
arLines.append(self.vcardFold(sProperty.upper() + ':' + sValue))
- return sValue or ''
+ return sValue or u''
def processSingleURI(sProperty):
sValue = self.getPropertyValue(elmCard, sProperty, self.URI)
@@ -2071,8 +2209,8 @@ def processTypeValue(sProperty, arDefaultType, arForceType=None):
sAgentValue = sAgentValue.replace(';', '\\;')
if sAgentValue:
arLines.append(self.vcardFold('AGENT:' + sAgentValue))
- elmAgent['class'] = ''
- elmAgent.contents = []
+ # Completely remove the agent element from the parse tree
+ elmAgent.extract()
else:
sAgentValue = self.getPropertyValue(elmAgent, 'value', self.URI, bAutoEscape=1);
if sAgentValue:
@@ -2219,8 +2357,8 @@ def processTypeValue(sProperty, arDefaultType, arForceType=None):
processSingleURI('key')
if arLines:
- arLines = ['BEGIN:vCard','VERSION:3.0'] + arLines + ['END:vCard']
- sVCards += '\n'.join(arLines) + '\n'
+ arLines = [u'BEGIN:vCard',u'VERSION:3.0'] + arLines + [u'END:vCard']
+ sVCards += u'\n'.join(arLines) + u'\n'
return sVCards.strip()
@@ -2277,7 +2415,12 @@ def findXFN(self):
def _parseMicroformats(htmlSource, baseURI, encoding):
if not BeautifulSoup: return
if _debug: sys.stderr.write('entering _parseMicroformats\n')
- p = _MicroformatsParser(htmlSource, baseURI, encoding)
+ try:
+ p = _MicroformatsParser(htmlSource, baseURI, encoding)
+ except UnicodeEncodeError:
+ # sgmllib throws this exception when performing lookups of tags
+ # with non-ASCII characters in them.
+ return
p.vcard = p.findVCards(p.document)
p.findTags()
p.findEnclosures()
@@ -2311,12 +2454,12 @@ class _RelativeURIResolver(_BaseHTMLProcessor):
('q', 'cite'),
('script', 'src')]
- def __init__(self, baseuri, encoding, type):
- _BaseHTMLProcessor.__init__(self, encoding, type)
+ def __init__(self, baseuri, encoding, _type):
+ _BaseHTMLProcessor.__init__(self, encoding, _type)
self.baseuri = baseuri
def resolveURI(self, uri):
- return _urljoin(self.baseuri, uri.strip())
+ return _makeSafeAbsoluteURI(_urljoin(self.baseuri, uri.strip()))
def unknown_starttag(self, tag, attrs):
if _debug:
@@ -2325,27 +2468,44 @@ def unknown_starttag(self, tag, attrs):
attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs]
_BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
-def _resolveRelativeURIs(htmlSource, baseURI, encoding, type):
+def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type):
if _debug:
sys.stderr.write('entering _resolveRelativeURIs\n')
- p = _RelativeURIResolver(baseURI, encoding, type)
+ p = _RelativeURIResolver(baseURI, encoding, _type)
p.feed(htmlSource)
return p.output()
+def _makeSafeAbsoluteURI(base, rel=None):
+ # bail if ACCEPTABLE_URI_SCHEMES is empty
+ if not ACCEPTABLE_URI_SCHEMES:
+ return _urljoin(base, rel or u'')
+ if not base:
+ return rel or u''
+ if not rel:
+ scheme = urlparse.urlparse(base)[0]
+ if not scheme or scheme in ACCEPTABLE_URI_SCHEMES:
+ return base
+ return u''
+ uri = _urljoin(base, rel)
+ if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES:
+ return u''
+ return uri
+
class _HTMLSanitizer(_BaseHTMLProcessor):
- acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'article',
- 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', 'canvas',
- 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'command',
- 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'dir',
- 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', 'figure', 'footer',
- 'font', 'form', 'header', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i',
- 'img', 'input', 'ins', 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map',
- 'menu', 'meter', 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup',
- 'option', 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
- 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', 'sub',
- 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', 'th', 'thead',
- 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript']
+ acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
+ 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
+ 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
+ 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
+ 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
+ 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
+ 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
+ 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
+ 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
+ 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
+ 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
+ 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
+ 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript']
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
@@ -2469,7 +2629,7 @@ def unknown_starttag(self, tag, attrs):
self.unacceptablestack += 1
# add implicit namespaces to html5 inline svg/mathml
- if self.type.endswith('html'):
+ if self._type.endswith('html'):
if not dict(attrs).get('xmlns'):
if tag=='svg':
attrs.append( ('xmlns','http://www.w3.org/2000/svg') )
@@ -2514,6 +2674,9 @@ def unknown_starttag(self, tag, attrs):
for key, value in self.normalize_attrs(attrs):
if key in acceptable_attributes:
key=keymap.get(key,key)
+ # make sure the uri uses an acceptable uri scheme
+ if key == u'href':
+ value = _makeSafeAbsoluteURI(value)
clean_attrs.append((key,value))
elif key=='style':
clean_value = self.sanitize_style(value)
@@ -2569,9 +2732,22 @@ def sanitize_style(self, style):
return ' '.join(clean)
+ def parse_comment(self, i, report=1):
+ ret = _BaseHTMLProcessor.parse_comment(self, i, report)
+ if ret >= 0:
+ return ret
+ # if ret == -1, this may be a malicious attempt to circumvent
+ # sanitization, or a page-destroying unclosed comment
+ match = re.compile(r'--[^>]*>').search(self.rawdata, i+4)
+ if match:
+ return match.end()
+ # unclosed comment; deliberately fail to handle_data()
+ return len(self.rawdata)
-def _sanitizeHTML(htmlSource, encoding, type):
- p = _HTMLSanitizer(encoding, type)
+
+def _sanitizeHTML(htmlSource, encoding, _type):
+ p = _HTMLSanitizer(encoding, _type)
+ htmlSource = htmlSource.replace('= '2.3.3'
assert base64 != None
- user, passw = base64.decodestring(req.headers['Authorization'].split(' ')[1]).split(':')
+ user, passw = _base64decode(req.headers['Authorization'].split(' ')[1]).split(':')
realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0]
self.add_password(realm, host, user, passw)
retry = self.http_error_auth_reqed('www-authenticate', host, req, headers)
@@ -2663,7 +2839,7 @@ def http_error_401(self, req, fp, code, msg, headers):
except:
return self.http_error_default(req, fp, code, msg, headers)
-def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, extra_headers):
+def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers):
"""URL, filename, or string --> stream
This function lets you define parsers that take any input source
@@ -2691,7 +2867,7 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
If handlers is supplied, it is a list of handlers used to build a
urllib2 opener.
- if extra_headers is supplied it is a dictionary of HTTP request headers
+ if request_headers is supplied it is a dictionary of HTTP request headers
that will override the values generated by FeedParser.
"""
@@ -2701,7 +2877,12 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
if url_file_stream_or_string == '-':
return sys.stdin
- if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp'):
+ if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'):
+ # Deal with the feed URI scheme
+ if url_file_stream_or_string.startswith('feed:http'):
+ url_file_stream_or_string = url_file_stream_or_string[5:]
+ elif url_file_stream_or_string.startswith('feed:'):
+ url_file_stream_or_string = 'http:' + url_file_stream_or_string[5:]
if not agent:
agent = USER_AGENT
# test for inline user:password for basic auth
@@ -2713,20 +2894,20 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
user_passwd, realhost = urllib.splituser(realhost)
if user_passwd:
url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest)
- auth = base64.encodestring(user_passwd).strip()
+ auth = base64.standard_b64encode(user_passwd).strip()
# iri support
try:
if isinstance(url_file_stream_or_string,unicode):
- url_file_stream_or_string = url_file_stream_or_string.encode('idna')
+ url_file_stream_or_string = url_file_stream_or_string.encode('idna').decode('utf-8')
else:
- url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna')
+ url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna').decode('utf-8')
except:
pass
# try to open with urllib2 (to use optional headers)
- request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, extra_headers)
- opener = apply(urllib2.build_opener, tuple([_FeedURLHandler()] + handlers))
+ request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, request_headers)
+ opener = apply(urllib2.build_opener, tuple(handlers + [_FeedURLHandler()]))
opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent
try:
return opener.open(request)
@@ -2735,20 +2916,22 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
# try to open with native open function (if url_file_stream_or_string is a filename)
try:
- return open(url_file_stream_or_string)
+ return open(url_file_stream_or_string, 'rb')
except:
pass
# treat url_file_stream_or_string as string
return _StringIO(str(url_file_stream_or_string))
-def _build_urllib2_request(url, agent, etag, modified, referrer, auth, extra_headers):
+def _build_urllib2_request(url, agent, etag, modified, referrer, auth, request_headers):
request = urllib2.Request(url)
request.add_header('User-Agent', agent)
if etag:
request.add_header('If-None-Match', etag)
if type(modified) == type(''):
modified = _parse_date(modified)
+ elif isinstance(modified, datetime.datetime):
+ modified = modified.utctimetuple()
if modified:
# format into an RFC 1123-compliant timestamp. We can't use
# time.strftime() since the %a and %b directives can be affected
@@ -2773,7 +2956,7 @@ def _build_urllib2_request(url, agent, etag, modified, referrer, auth, extra_hea
request.add_header('Accept', ACCEPT_HEADER)
# use this for whatever -- cookies, special headers, etc
# [('Cookie','Something'),('x-special-header','Another Value')]
- for header_name, header_value in extra_headers.items():
+ for header_name, header_value in request_headers.items():
request.add_header(header_name, header_value)
request.add_header('A-IM', 'feed') # RFC 3229 support
return request
@@ -2811,9 +2994,15 @@ def registerDateHandler(func):
+ r'(\.(?P\d+))?'
+ r'(?P[+-](?P\d{2})(:(?P\d{2}))?|Z)?)?'
for tmpl in _iso8601_tmpl]
-del tmpl
+try:
+ del tmpl
+except NameError:
+ pass
_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re]
-del regex
+try:
+ del regex
+except NameError:
+ pass
def _parse_date_iso8601(dateString):
'''Parse a variety of ISO-8601-compatible formats like 20040105'''
m = None
@@ -2887,7 +3076,7 @@ def _parse_date_iso8601(dateString):
# Python's time.mktime() is a wrapper around the ANSI C mktime(3c)
# which is guaranteed to normalize d/m/y/h/m/s.
# Many implementations have bugs, but we'll pretend they don't.
- return time.localtime(time.mktime(tm))
+ return time.localtime(time.mktime(tuple(tm)))
registerDateHandler(_parse_date_iso8601)
# 8-bit date handling routines written by ytrewq1.
@@ -3128,12 +3317,12 @@ def __extract_tzd(m):
__date_re = ('(?P\d\d\d\d)'
'(?:(?P-|)'
- '(?:(?P\d\d\d)'
- '|(?P\d\d)(?:(?P=dsep)(?P\d\d))?))?')
+ '(?:(?P\d\d)(?:(?P=dsep)(?P\d\d))?'
+ '|(?P\d\d\d)))?')
__tzd_re = '(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)'
__tzd_rx = re.compile(__tzd_re)
__time_re = ('(?P\d\d)(?P:|)(?P\d\d)'
- '(?:(?P=tsep)(?P\d\d(?:[.,]\d+)?))?'
+ '(?:(?P=tsep)(?P\d\d)(?:[.,]\d+)?)?'
+ __tzd_re)
__datetime_re = '%s(?:T%s)?' % (__date_re, __time_re)
__datetime_rx = re.compile(__datetime_re)
@@ -3157,6 +3346,10 @@ def _parse_date_rfc822(dateString):
else:
data.append('')
dateString = " ".join(data)
+ # Account for the Etc/GMT timezone by stripping 'Etc/'
+ elif len(data) == 5 and data[4].lower().startswith('etc/'):
+ data[4] = data[4][4:]
+ dateString = " ".join(data)
if len(data) < 5:
dateString += ' 00:00:00 GMT'
tm = rfc822.parsedate_tz(dateString)
@@ -3194,7 +3387,7 @@ def _parse_date(dateString):
raise ValueError
map(int, date9tuple)
return date9tuple
- except Exception, e:
+ except Exception as e:
if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e)))
pass
return None
@@ -3261,59 +3454,59 @@ def _parseHTTPContentType(content_type):
sniffed_xml_encoding = ''
xml_encoding = ''
true_encoding = ''
- http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type'))
+ http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type', http_headers.get('Content-type')))
# Must sniff for non-ASCII-compatible character encodings before
# searching for XML declaration. This heuristic is defined in
# section F of the XML specification:
# http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
try:
- if xml_data[:4] == '\x4c\x6f\xa7\x94':
+ if xml_data[:4] == _l2bytes([0x4c, 0x6f, 0xa7, 0x94]):
# EBCDIC
xml_data = _ebcdic_to_ascii(xml_data)
- elif xml_data[:4] == '\x00\x3c\x00\x3f':
+ elif xml_data[:4] == _l2bytes([0x00, 0x3c, 0x00, 0x3f]):
# UTF-16BE
sniffed_xml_encoding = 'utf-16be'
xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] != '\x00\x00'):
+ elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xfe, 0xff])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])):
# UTF-16BE with BOM
sniffed_xml_encoding = 'utf-16be'
xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
- elif xml_data[:4] == '\x3c\x00\x3f\x00':
+ elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x3f, 0x00]):
# UTF-16LE
sniffed_xml_encoding = 'utf-16le'
xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and (xml_data[2:4] != '\x00\x00'):
+ elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xff, 0xfe])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])):
# UTF-16LE with BOM
sniffed_xml_encoding = 'utf-16le'
xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
- elif xml_data[:4] == '\x00\x00\x00\x3c':
+ elif xml_data[:4] == _l2bytes([0x00, 0x00, 0x00, 0x3c]):
# UTF-32BE
sniffed_xml_encoding = 'utf-32be'
xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
- elif xml_data[:4] == '\x3c\x00\x00\x00':
+ elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x00, 0x00]):
# UTF-32LE
sniffed_xml_encoding = 'utf-32le'
xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
- elif xml_data[:4] == '\x00\x00\xfe\xff':
+ elif xml_data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]):
# UTF-32BE with BOM
sniffed_xml_encoding = 'utf-32be'
xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
- elif xml_data[:4] == '\xff\xfe\x00\x00':
+ elif xml_data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]):
# UTF-32LE with BOM
sniffed_xml_encoding = 'utf-32le'
xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
- elif xml_data[:3] == '\xef\xbb\xbf':
+ elif xml_data[:3] == _l2bytes([0xef, 0xbb, 0xbf]):
# UTF-8 with BOM
sniffed_xml_encoding = 'utf-8'
xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
else:
# ASCII-compatible
pass
- xml_encoding_match = re.compile('^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data)
+ xml_encoding_match = re.compile(_s2bytes('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')).match(xml_data)
except:
xml_encoding_match = None
if xml_encoding_match:
- xml_encoding = xml_encoding_match.groups()[0].lower()
+ xml_encoding = xml_encoding_match.groups()[0].decode('utf-8').lower()
if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')):
xml_encoding = sniffed_xml_encoding
acceptable_content_type = 0
@@ -3329,7 +3522,7 @@ def _parseHTTPContentType(content_type):
true_encoding = http_encoding or 'us-ascii'
elif http_content_type.startswith('text/'):
true_encoding = http_encoding or 'us-ascii'
- elif http_headers and (not http_headers.has_key('content-type')):
+ elif http_headers and (not (http_headers.has_key('content-type') or http_headers.has_key('Content-type'))):
true_encoding = xml_encoding or 'iso-8859-1'
else:
true_encoding = xml_encoding or 'utf-8'
@@ -3347,35 +3540,35 @@ def _toUTF8(data, encoding):
'''
if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding)
# strip Byte Order Mark (if present)
- if (len(data) >= 4) and (data[:2] == '\xfe\xff') and (data[2:4] != '\x00\x00'):
+ if (len(data) >= 4) and (data[:2] == _l2bytes([0xfe, 0xff])) and (data[2:4] != _l2bytes([0x00, 0x00])):
if _debug:
sys.stderr.write('stripping BOM\n')
if encoding != 'utf-16be':
sys.stderr.write('trying utf-16be instead\n')
encoding = 'utf-16be'
data = data[2:]
- elif (len(data) >= 4) and (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'):
+ elif (len(data) >= 4) and (data[:2] == _l2bytes([0xff, 0xfe])) and (data[2:4] != _l2bytes([0x00, 0x00])):
if _debug:
sys.stderr.write('stripping BOM\n')
if encoding != 'utf-16le':
sys.stderr.write('trying utf-16le instead\n')
encoding = 'utf-16le'
data = data[2:]
- elif data[:3] == '\xef\xbb\xbf':
+ elif data[:3] == _l2bytes([0xef, 0xbb, 0xbf]):
if _debug:
sys.stderr.write('stripping BOM\n')
if encoding != 'utf-8':
sys.stderr.write('trying utf-8 instead\n')
encoding = 'utf-8'
data = data[3:]
- elif data[:4] == '\x00\x00\xfe\xff':
+ elif data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]):
if _debug:
sys.stderr.write('stripping BOM\n')
if encoding != 'utf-32be':
sys.stderr.write('trying utf-32be instead\n')
encoding = 'utf-32be'
data = data[4:]
- elif data[:4] == '\xff\xfe\x00\x00':
+ elif data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]):
if _debug:
sys.stderr.write('stripping BOM\n')
if encoding != 'utf-32le':
@@ -3398,36 +3591,36 @@ def _stripDoctype(data):
rss_version may be 'rss091n' or None
stripped_data is the same XML document, minus the DOCTYPE
'''
- start = re.search('<\w',data)
+ start = re.search(_s2bytes('<\w'), data)
start = start and start.start() or -1
head,data = data[:start+1], data[start+1:]
- entity_pattern = re.compile(r'^\s*]*?)>', re.MULTILINE)
+ entity_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE)
entity_results=entity_pattern.findall(head)
- head = entity_pattern.sub('', head)
- doctype_pattern = re.compile(r'^\s*]*?)>', re.MULTILINE)
+ head = entity_pattern.sub(_s2bytes(''), head)
+ doctype_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE)
doctype_results = doctype_pattern.findall(head)
- doctype = doctype_results and doctype_results[0] or ''
- if doctype.lower().count('netscape'):
+ doctype = doctype_results and doctype_results[0] or _s2bytes('')
+ if doctype.lower().count(_s2bytes('netscape')):
version = 'rss091n'
else:
version = None
# only allow in 'safe' inline entity definitions
- replacement=''
+ replacement=_s2bytes('')
if len(doctype_results)==1 and entity_results:
- safe_pattern=re.compile('\s+(\w+)\s+"(\w+;|[^&"]*)"')
+ safe_pattern=re.compile(_s2bytes('\s+(\w+)\s+"(\w+;|[^&"]*)"'))
safe_entities=filter(lambda e: safe_pattern.match(e),entity_results)
if safe_entities:
- replacement='\n]>' % '>\n \n \n]>')
data = doctype_pattern.sub(replacement, head) + data
- return version, data, dict(replacement and safe_pattern.findall(replacement))
+ return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)])
-def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], extra_headers={}):
+def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}):
'''Parse a feed from a URL, file, stream, or string.
- extra_headers, if given, is a dict from http header name to value to add
+ request_headers, if given, is a dict from http header name to value to add
to the request; this overrides internally generated values.
'''
result = FeedParserDict()
@@ -3435,23 +3628,31 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
result['entries'] = []
if _XML_AVAILABLE:
result['bozo'] = 0
- if type(handlers) == types.InstanceType:
+ if not isinstance(handlers, list):
handlers = [handlers]
try:
- f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, extra_headers)
+ f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers)
data = f.read()
- except Exception, e:
+ except Exception as e:
result['bozo'] = 1
result['bozo_exception'] = e
data = None
f = None
+ if hasattr(f, 'headers'):
+ result['headers'] = dict(f.headers)
+ # overwrite existing headers using response_headers
+ if 'headers' in result:
+ result['headers'].update(response_headers)
+ elif response_headers:
+ result['headers'] = copy.deepcopy(response_headers)
+
# if feed is gzip-compressed, decompress it
- if f and data and hasattr(f, 'headers'):
- if gzip and f.headers.get('content-encoding', '') == 'gzip':
+ if f and data and 'headers' in result:
+ if gzip and result['headers'].get('content-encoding') == 'gzip':
try:
data = gzip.GzipFile(fileobj=_StringIO(data)).read()
- except Exception, e:
+ except Exception as e:
# Some feeds claim to be gzipped but they're not, so
# we get garbage. Ideally, we should re-request the
# feed without the 'Accept-encoding: gzip' header,
@@ -3459,30 +3660,29 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
result['bozo'] = 1
result['bozo_exception'] = e
data = ''
- elif zlib and f.headers.get('content-encoding', '') == 'deflate':
+ elif zlib and result['headers'].get('content-encoding') == 'deflate':
try:
data = zlib.decompress(data, -zlib.MAX_WBITS)
- except Exception, e:
+ except Exception as e:
result['bozo'] = 1
result['bozo_exception'] = e
data = ''
# save HTTP headers
- if hasattr(f, 'info'):
- info = f.info()
- etag = info.getheader('ETag')
- if etag:
- result['etag'] = etag
- last_modified = info.getheader('Last-Modified')
- if last_modified:
- result['modified'] = _parse_date(last_modified)
+ if 'headers' in result:
+ if 'etag' in result['headers'] or 'ETag' in result['headers']:
+ etag = result['headers'].get('etag', result['headers'].get('ETag'))
+ if etag:
+ result['etag'] = etag
+ if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']:
+ modified = result['headers'].get('last-modified', result['headers'].get('Last-Modified'))
+ if modified:
+ result['modified'] = _parse_date(modified)
if hasattr(f, 'url'):
result['href'] = f.url
result['status'] = 200
if hasattr(f, 'status'):
result['status'] = f.status
- if hasattr(f, 'headers'):
- result['headers'] = f.headers.dict
if hasattr(f, 'close'):
f.close()
@@ -3495,8 +3695,8 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
result['encoding'], http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type = \
_getCharacterEncoding(http_headers, data)
if http_headers and (not acceptable_content_type):
- if http_headers.has_key('content-type'):
- bozo_message = '%s is not an XML media type' % http_headers['content-type']
+ if http_headers.has_key('content-type') or http_headers.has_key('Content-type'):
+ bozo_message = '%s is not an XML media type' % http_headers.get('content-type', http_headers.get('Content-type'))
else:
bozo_message = 'no Content-type specified'
result['bozo'] = 1
@@ -3505,8 +3705,12 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
if data is not None:
result['version'], data, entities = _stripDoctype(data)
- baseuri = http_headers.get('content-location', result.get('href'))
- baselang = http_headers.get('content-language', None)
+ # ensure that baseuri is an absolute uri using an acceptable URI scheme
+ contentloc = http_headers.get('content-location', http_headers.get('Content-Location', ''))
+ href = result.get('href', '')
+ baseuri = _makeSafeAbsoluteURI(href, contentloc) or _makeSafeAbsoluteURI(contentloc) or href
+
+ baselang = http_headers.get('content-language', http_headers.get('Content-Language', None))
# if server sent 304, we're done
if result.get('status', 0) == 304:
@@ -3582,7 +3786,7 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
elif proposed_encoding != result['encoding']:
result['bozo'] = 1
result['bozo_exception'] = CharacterEncodingOverride( \
- 'documented declared as %s, but parsed as %s' % \
+ 'document declared as %s, but parsed as %s' % \
(result['encoding'], proposed_encoding))
result['encoding'] = proposed_encoding
@@ -3603,7 +3807,7 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
saxparser._ns_stack.append({'http://www.w3.org/XML/1998/namespace':'xml'})
try:
saxparser.parse(source)
- except Exception, e:
+ except Exception as e:
if _debug:
import traceback
traceback.print_stack()
@@ -3613,8 +3817,8 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
result['bozo_exception'] = feedparser.exc or e
use_strict_parser = 0
if not use_strict_parser:
- feedparser = _LooseFeedParser(baseuri, baselang, known_encoding and 'utf-8' or '', entities)
- feedparser.feed(data)
+ feedparser = _LooseFeedParser(baseuri, baselang, 'utf-8', entities)
+ feedparser.feed(data.decode('utf-8', 'replace'))
result['feed'] = feedparser.feeddata
result['entries'] = feedparser.entries
result['version'] = result['version'] or feedparser.version
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 6215132e4b..f2aeb4e4bd 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -14,7 +14,7 @@
from calibre import browser, __appname__, iswindows, \
- strftime, preferred_encoding
+ strftime, preferred_encoding, as_unicode
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre import entity_to_unicode
@@ -986,8 +986,8 @@ def _download_cover(self):
self.cover_path = None
try:
cu = self.get_cover_url()
- except Exception, err:
- self.log.error(_('Could not download cover: %s')%str(err))
+ except Exception as err:
+ self.log.error(_('Could not download cover: %s')%as_unicode(err))
self.log.debug(traceback.format_exc())
else:
if not cu:
@@ -1318,11 +1318,11 @@ def parse_feeds(self):
oldest_article=self.oldest_article,
max_articles_per_feed=self.max_articles_per_feed,
get_article_url=self.get_article_url))
- except Exception, err:
+ except Exception as err:
feed = Feed()
msg = 'Failed feed: %s'%(title if title else url)
feed.populate_from_preparsed_feed(msg, [])
- feed.description = repr(err)
+ feed.description = as_unicode(err)
parsed_feeds.append(feed)
self.log.exception(msg)
@@ -1468,7 +1468,7 @@ def download(self):
'http://news.calibre-ebook.com/subscribed_files/%s/0/temp.downloaded_recipe'
% self.calibre_periodicals_slug
).read()
- except Exception, e:
+ except Exception as e:
if hasattr(e, 'getcode') and e.getcode() == 403:
raise DownloadDenied(
_('You do not have permission to download this issue.'
diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py
index f2e22c8f5e..64a2c32fb3 100644
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@@ -210,7 +210,7 @@ def fetch_url(self, url):
with closing(open_func(url, timeout=self.timeout)) as f:
data = response(f.read()+f.read())
data.newurl = f.geturl()
- except urllib2.URLError, err:
+ except urllib2.URLError as err:
if hasattr(err, 'code') and responses.has_key(err.code):
raise FetchError, responses[err.code]
if getattr(err, 'reason', [0])[0] == 104 or \